summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/.gitignore2
-rw-r--r--tools/testing/selftests/Makefile65
-rw-r--r--tools/testing/selftests/arm64/Makefile66
-rw-r--r--tools/testing/selftests/arm64/README25
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore3
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile32
-rw-r--r--tools/testing/selftests/arm64/signal/README59
-rw-r--r--tools/testing/selftests/arm64/signal/signals.S64
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.c29
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h100
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c328
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h120
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c52
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c77
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c46
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c50
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c37
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c50
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c31
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c35
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h28
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c196
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h104
-rw-r--r--tools/testing/selftests/arm64/tags/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/tags/Makefile7
-rwxr-xr-xtools/testing/selftests/arm64/tags/run_tags_test.sh12
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c31
-rw-r--r--tools/testing/selftests/bpf/.gitignore19
-rw-r--r--tools/testing/selftests/bpf/Makefile507
-rw-r--r--tools/testing/selftests/bpf/bpf_endian.h58
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h504
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h39
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h235
-rw-r--r--tools/testing/selftests/bpf/bpf_trace_helpers.h120
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h2
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c129
-rw-r--r--tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c283
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c212
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c169
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c582
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpu_mask.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c268
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c154
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c206
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c224
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c92
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c (renamed from tools/testing/selftests/bpf/test_section_names.c)90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c (renamed from tools/testing/selftests/bpf/test_select_reuseport.c)550
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c63
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c (renamed from tools/testing/selftests/bpf/test_sockopt.c)50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c235
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c (renamed from tools/testing/selftests/bpf/test_sockopt_multi.c)62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c (renamed from tools/testing/selftests/bpf/test_sockopt_sk.c)83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c487
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_estats.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c (renamed from tools/testing/selftests/bpf/test_tcp_rtt.c)128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c112
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c544
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c216
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c64
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c5
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c5
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c5
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/connect6_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h806
-rw-r--r--tools/testing/selftests/bpf/progs/dev_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c57
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c154
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c27
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c58
-rw-r--r--tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c153
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/loop4.c18
-rw-r--r--tools/testing/selftests/bpf/progs/loop5.c32
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h78
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf_global.c5
-rw-r--r--tools/testing/selftests/bpf/progs/sample_map_ret0.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg6_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/socket_cookie_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c97
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c37
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h43
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c31
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c33
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c40
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_adjust_tail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_nokv.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_extern.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c58
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c63
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c57
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_existence.c79
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_ints.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c96
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_misc.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_mods.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c49
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_size.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func3.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func5.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func6.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func7.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_invalid.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c76
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_md_access.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_queue_stack_map.h (renamed from tools/testing/selftests/bpf/test_queue_stack_map.h)2
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c83
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c52
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_dummy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh147
-rw-r--r--tools/testing/selftests/bpf/test_btf.c4
-rw-r--r--tools/testing/selftests/bpf/test_btf_dump.c143
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_attach.c571
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c6
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp30
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh65
-rwxr-xr-xtools/testing/selftests/bpf/test_ftrace.sh39
-rw-r--r--tools/testing/selftests/bpf/test_hashmap.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_libbpf.sh43
-rw-r--r--tools/testing/selftests/bpf/test_libbpf_open.c144
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh6
-rw-r--r--tools/testing/selftests/bpf/test_maps.c28
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py27
-rw-r--r--tools/testing/selftests/bpf/test_progs.c539
-rw-r--r--tools/testing/selftests/bpf/test_progs.h73
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_sock.c7
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c54
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h4
-rw-r--r--tools/testing/selftests/bpf/test_stub.c4
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c171
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh5
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh3
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c61
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h1
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c25
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c90
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c123
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c125
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/verifier/event_output.c94
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c83
-rw-r--r--tools/testing/selftests/bpf/verifier/loops1.c17
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c194
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/runtime_jit.c151
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rw-r--r--tools/testing/selftests/cgroup/Makefile4
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c42
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h6
-rw-r--r--tools/testing/selftests/cgroup/test_core.c146
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c57
-rwxr-xr-xtools/testing/selftests/cgroup/test_stress.sh4
-rwxr-xr-xtools/testing/selftests/cgroup/with_stress.sh101
-rw-r--r--tools/testing/selftests/clone3/.gitignore3
-rw-r--r--tools/testing/selftests/clone3/Makefile6
-rw-r--r--tools/testing/selftests/clone3/clone3.c202
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c129
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h63
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c397
-rw-r--r--tools/testing/selftests/dmabuf-heaps/Makefile6
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c396
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh129
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh437
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh675
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh557
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh265
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh330
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/fib.sh256
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh176
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh24
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh70
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh67
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh9
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh54
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh18
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh20
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh415
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh72
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh364
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh341
-rw-r--r--tools/testing/selftests/filesystems/epoll/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/epoll/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c3074
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh57
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh17
-rw-r--r--tools/testing/selftests/ftrace/settings1
-rw-r--r--tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc69
-rw-r--r--tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc84
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc35
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc32
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc2
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh21
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c6
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_file_load.sh38
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh (renamed from tools/testing/selftests/kselftest_module.sh)2
-rwxr-xr-xtools/testing/selftests/kselftest/prefix.pl1
-rw-r--r--tools/testing/selftests/kselftest/runner.sh39
-rwxr-xr-xtools/testing/selftests/kselftest_install.sh24
-rw-r--r--tools/testing/selftests/kvm/.gitignore3
-rw-r--r--tools/testing/selftests/kvm/Makefile20
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c140
-rw-r--r--tools/testing/selftests/kvm/include/evmcs.h2
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h32
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h777
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm.h297
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h38
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h24
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c7
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c3
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c112
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c4
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c105
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h3
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c56
-rw-r--r--tools/testing/selftests/kvm/lib/ucall.c157
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c116
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c161
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c56
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c245
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c166
-rw-r--r--tools/testing/selftests/kvm/s390x/resets.c197
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c43
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c39
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c79
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c158
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c45
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c76
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh2
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh2
-rwxr-xr-xtools/testing/selftests/lib/printf.sh2
-rwxr-xr-xtools/testing/selftests/lib/strscpy.sh2
-rw-r--r--tools/testing/selftests/livepatch/Makefile4
-rw-r--r--tools/testing/selftests/livepatch/README2
-rw-r--r--tools/testing/selftests/livepatch/config2
-rw-r--r--tools/testing/selftests/livepatch/functions.sh48
-rw-r--r--tools/testing/selftests/livepatch/settings1
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh65
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh179
-rw-r--r--tools/testing/selftests/lkdtm/Makefile12
-rw-r--r--tools/testing/selftests/lkdtm/config1
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh92
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt71
-rw-r--r--tools/testing/selftests/membarrier/.gitignore3
-rw-r--r--tools/testing/selftests/membarrier/Makefile5
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h (renamed from tools/testing/selftests/membarrier/membarrier_test.c)40
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c73
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c24
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c36
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile6
-rwxr-xr-xtools/testing/selftests/net/altnames.sh75
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh3890
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh6
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh38
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh92
-rw-r--r--tools/testing/selftests/net/fin_ack_lat.c151
-rwxr-xr-xtools/testing/selftests/net/fin_ack_lat.sh35
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh244
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh318
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_lib.sh69
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh873
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh139
-rwxr-xr-xtools/testing/selftests/net/forwarding/loopback.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh189
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh44
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh300
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh227
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh233
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_ets.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh39
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_prio.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_root.sh33
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh53
-rwxr-xr-xtools/testing/selftests/net/l2tp.sh382
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile13
-rw-r--r--tools/testing/selftests/net/mptcp/config4
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c841
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh595
-rw-r--r--tools/testing/selftests/net/mptcp/settings1
-rw-r--r--tools/testing/selftests/net/nettest.c1813
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh11
-rw-r--r--tools/testing/selftests/net/reuseport_dualstack.c3
-rw-r--r--tools/testing/selftests/net/so_txtime.c88
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh9
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c73
-rw-r--r--tools/testing/selftests/net/tls.c148
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh322
-rw-r--r--tools/testing/selftests/net/udpgso.c19
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c3
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh7
-rw-r--r--tools/testing/selftests/netfilter/Makefile3
-rwxr-xr-xtools/testing/selftests/netfilter/ipvs.sh228
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh1481
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh39
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh332
-rw-r--r--tools/testing/selftests/openat2/.gitignore1
-rw-r--r--tools/testing/selftests/openat2/Makefile8
-rw-r--r--tools/testing/selftests/openat2/helpers.c109
-rw-r--r--tools/testing/selftests/openat2/helpers.h106
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c312
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c160
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c523
-rw-r--r--tools/testing/selftests/pidfd/.gitignore3
-rw-r--r--tools/testing/selftests/pidfd/Makefile4
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h39
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c296
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c249
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c5
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c117
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c14
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c271
-rw-r--r--tools/testing/selftests/powerpc/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore9
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/export.h1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S1
-rw-r--r--tools/testing/selftests/powerpc/eeh/Makefile9
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh82
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-functions.sh80
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h1
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c171
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c734
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c119
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c580
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c4
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c4
-rw-r--r--tools/testing/selftests/powerpc/security/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/security/branch_loops.S82
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c218
-rw-r--r--tools/testing/selftests/powerpc/signal/sigfuz.c2
-rw-r--r--tools/testing/selftests/powerpc/stringloops/.gitignore5
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c179
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c49
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c59
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c74
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c130
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c4
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h3
-rw-r--r--tools/testing/selftests/powerpc/utils.c20
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c6
-rw-r--r--tools/testing/selftests/ptp/testptp.c53
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh11
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh15
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mkinitrd.sh55
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS033
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE023
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE043
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE063
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE083
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE093
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL3
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt1
-rw-r--r--tools/testing/selftests/rseq/param_test.c18
-rw-r--r--tools/testing/selftests/rseq/rseq.h12
-rw-r--r--tools/testing/selftests/rseq/settings1
-rw-r--r--tools/testing/selftests/rtc/settings1
-rw-r--r--tools/testing/selftests/safesetid/Makefile5
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c15
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c138
-rw-r--r--tools/testing/selftests/size/get_size.c24
-rw-r--r--tools/testing/selftests/sync/sync.c6
-rw-r--r--tools/testing/selftests/tc-testing/README4
-rw-r--r--tools/testing/selftests/tc-testing/config10
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py2
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json96
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json145
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json749
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json376
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json18
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json391
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json205
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json940
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json304
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json50
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json128
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py12
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py1
-rw-r--r--tools/testing/selftests/timens/.gitignore8
-rw-r--r--tools/testing/selftests/timens/Makefile7
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c149
-rw-r--r--tools/testing/selftests/timens/config1
-rw-r--r--tools/testing/selftests/timens/exec.c94
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c95
-rw-r--r--tools/testing/selftests/timens/log.h26
-rw-r--r--tools/testing/selftests/timens/procfs.c144
-rw-r--r--tools/testing/selftests/timens/timens.c190
-rw-r--r--tools/testing/selftests/timens/timens.h100
-rw-r--r--tools/testing/selftests/timens/timer.c122
-rw-r--r--tools/testing/selftests/timens/timerfd.c128
-rw-r--r--tools/testing/selftests/tpm2/Makefile1
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh6
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py19
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py13
-rw-r--r--tools/testing/selftests/vm/Makefile5
-rw-r--r--tools/testing/selftests/vm/config1
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c10
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests10
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c66
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh550
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore2
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile387
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config9
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config9
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config11
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config14
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config15
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config66
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c285
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config88
-rw-r--r--tools/testing/selftests/x86/Makefile4
-rw-r--r--tools/testing/selftests/x86/ioperm.c16
-rw-r--r--tools/testing/selftests/x86/iopl.c129
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c3
-rw-r--r--tools/testing/selftests/x86/mpx-debug.h15
-rw-r--r--tools/testing/selftests/x86/mpx-dig.c497
-rw-r--r--tools/testing/selftests/x86/mpx-hw.h124
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c1613
-rw-r--r--tools/testing/selftests/x86/mpx-mm.h10
-rw-r--r--tools/testing/selftests/x86/sigreturn.c13
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c94
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c89
653 files changed, 51131 insertions, 7778 deletions
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 8059ce834247..61df01cdf0b2 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -2,3 +2,5 @@ gpiogpio-event-mon
gpiogpio-hammer
gpioinclude/
gpiolsgpio
+tpm2/SpaceTest.log
+tpm2/*.pyc
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 25b43a8c2b15..63430e2664c2 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
TARGETS = android
+TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
TARGETS += capabilities
TARGETS += cgroup
+TARGETS += clone3
TARGETS += cpufreq
TARGETS += cpu-hotplug
TARGETS += drivers/dma-buf
@@ -11,6 +13,7 @@ TARGETS += efivarfs
TARGETS += exec
TARGETS += filesystems
TARGETS += filesystems/binderfs
+TARGETS += filesystems/epoll
TARGETS += firmware
TARGETS += ftrace
TARGETS += futex
@@ -23,12 +26,14 @@ TARGETS += kexec
TARGETS += kvm
TARGETS += lib
TARGETS += livepatch
+TARGETS += lkdtm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
+TARGETS += net/mptcp
TARGETS += netfilter
TARGETS += networking/timestamping
TARGETS += nsfs
@@ -37,6 +42,7 @@ TARGETS += powerpc
TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
+TARGETS += openat2
TARGETS += rseq
TARGETS += rtc
TARGETS += seccomp
@@ -47,6 +53,7 @@ TARGETS += splice
TARGETS += static_keys
TARGETS += sync
TARGETS += sysctl
+TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
endif
@@ -63,6 +70,13 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# User can optionally provide a TARGETS skiplist.
+SKIP_TARGETS ?=
+ifneq ($(SKIP_TARGETS),)
+ TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
+ override TARGETS := $(TMP)
+endif
+
# Clear LDFLAGS and MAKEFLAGS if called from main
# Makefile to avoid test build failures when test
# Makefile doesn't have explicit build rules.
@@ -126,66 +140,74 @@ endif
# in the default INSTALL_HDR_PATH usr/include.
khdr:
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+ $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
else
- make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
+ $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
ARCH=$(ARCH) -C $(top_srcdir) headers_install
endif
all: khdr
- @for TARGET in $(TARGETS); do \
- BUILD_TARGET=$$BUILD/$$TARGET; \
- mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
- done;
+ @ret=1; \
+ for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET; \
+ ret=$$((ret * $$?)); \
+ done; exit $$ret;
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
done;
hotplug:
@for TARGET in $(TARGETS_HOTPLUG); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\
done;
run_hotplug: hotplug
@for TARGET in $(TARGETS_HOTPLUG); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
done;
clean_hotplug:
@for TARGET in $(TARGETS_HOTPLUG); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
run_pstore_crash:
- make -C pstore run_crash
+ $(MAKE) -C pstore run_crash
# Use $BUILD as the default install root. $BUILD points to the
# right output location for the following cases:
# 1. output_dir=kernel_src
# 2. a separate output directory is specified using O= KBUILD_OUTPUT
# 3. a separate output directory is specified using KBUILD_OUTPUT
+# Avoid conflict with INSTALL_PATH set by the main Makefile
#
-INSTALL_PATH ?= $(BUILD)/install
-INSTALL_PATH := $(abspath $(INSTALL_PATH))
+KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install
+KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
+# Avoid changing the rest of the logic here and lib.mk.
+INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
install: all
ifdef INSTALL_PATH
@# Ask all targets to install their files
mkdir -p $(INSTALL_PATH)/kselftest
+ install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
- @for TARGET in $(TARGETS); do \
+ @ret=1; \
+ for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
- done;
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+ ret=$$((ret * $$?)); \
+ done; exit $$ret;
@# Ask all targets to emit their test scripts
echo "#!/bin/sh" > $(ALL_SCRIPT)
@@ -198,12 +220,17 @@ ifdef INSTALL_PATH
echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
+ @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# they could be the result of a build failure and should NOT be
+ @# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
+ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
echo -n "run_many" >> $(ALL_SCRIPT); \
- make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+ echo -n "Emit Tests for $$TARGET\n"; \
+ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
echo "" >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
done;
@@ -216,7 +243,7 @@ endif
clean:
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
new file mode 100644
index 000000000000..93b567d23c8b
--- /dev/null
+++ b/tools/testing/selftests/arm64/Makefile
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# When ARCH not overridden for crosscompiling, lookup machine
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+
+ifneq (,$(filter $(ARCH),aarch64 arm64))
+ARM64_SUBTARGETS ?= tags signal
+else
+ARM64_SUBTARGETS :=
+endif
+
+CFLAGS := -Wall -O2 -g
+
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../)
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
+
+# Guessing where the Kernel headers could have been installed
+# depending on ENV config
+ifeq ($(KBUILD_OUTPUT),)
+khdr_dir = $(top_srcdir)/usr/include
+else
+# the KSFT preferred location when KBUILD_OUTPUT is set
+khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
+endif
+
+CFLAGS += -I$(khdr_dir)
+
+export CFLAGS
+export top_srcdir
+
+all:
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir -p $$BUILD_TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+install: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+run_tests: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+# Avoid any output on non arm64 on emit_tests
+emit_tests: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+clean:
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+.PHONY: all clean install run_tests emit_tests
diff --git a/tools/testing/selftests/arm64/README b/tools/testing/selftests/arm64/README
new file mode 100644
index 000000000000..a1badd882102
--- /dev/null
+++ b/tools/testing/selftests/arm64/README
@@ -0,0 +1,25 @@
+KSelfTest ARM64
+===============
+
+- These tests are arm64 specific and so not built or run but just skipped
+ completely when env-variable ARCH is found to be different than 'arm64'
+ and `uname -m` reports other than 'aarch64'.
+
+- Holding true the above, ARM64 KSFT tests can be run within the KSelfTest
+ framework using standard Linux top-level-makefile targets:
+
+ $ make TARGETS=arm64 kselftest-clean
+ $ make TARGETS=arm64 kselftest
+
+ or
+
+ $ make -C tools/testing/selftests TARGETS=arm64 \
+ INSTALL_PATH=<your-installation-path> install
+
+ or, alternatively, only specific arm64/ subtargets can be picked:
+
+ $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS="tags signal" \
+ INSTALL_PATH=<your-installation-path> install
+
+ Further details on building and running KFST can be found in:
+ Documentation/dev-tools/kselftest.rst
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
new file mode 100644
index 000000000000..3c5b4e8ff894
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -0,0 +1,3 @@
+mangle_*
+fake_sigreturn_*
+!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
new file mode 100644
index 000000000000..b497cfea4643
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 ARM Limited
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+
+SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(notdir $(PROGS))
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(PROGS)
+ cp $(PROGS) $(OUTPUT)/
+
+clean:
+ $(CLEAN)
+ rm -f $(PROGS)
+
+# Common test-unit targets to build common-layout test-cases executables
+# Needs secondary expansion to properly include the testcase c-file in pre-reqs
+.SECONDEXPANSION:
+$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h
+ $(CC) $(CFLAGS) $^ -o $@
diff --git a/tools/testing/selftests/arm64/signal/README b/tools/testing/selftests/arm64/signal/README
new file mode 100644
index 000000000000..967a531b245c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/README
@@ -0,0 +1,59 @@
+KSelfTest arm64/signal/
+=======================
+
+Signals Tests
++++++++++++++
+
+- Tests are built around a common main compilation unit: such shared main
+ enforces a standard sequence of operations needed to perform a single
+ signal-test (setup/trigger/run/result/cleanup)
+
+- The above mentioned ops are configurable on a test-by-test basis: each test
+ is described (and configured) using the descriptor signals.h::struct tdescr
+
+- Each signal testcase is compiled into its own executable: a separate
+ executable is used for each test since many tests complete successfully
+ by receiving some kind of fatal signal from the Kernel, so it's safer
+ to run each test unit in its own standalone process, so as to start each
+ test from a clean slate.
+
+- New tests can be simply defined in testcases/ dir providing a proper struct
+ tdescr overriding all the defaults we wish to change (as of now providing a
+ custom run method is mandatory though)
+
+- Signals' test-cases hereafter defined belong currently to two
+ principal families:
+
+ - 'mangle_' tests: a real signal (SIGUSR1) is raised and used as a trigger
+ and then the test case code modifies the signal frame from inside the
+ signal handler itself.
+
+ - 'fake_sigreturn_' tests: a brand new custom artificial sigframe structure
+ is placed on the stack and a sigreturn syscall is called to simulate a
+ real signal return. This kind of tests does not use a trigger usually and
+ they are just fired using some simple included assembly trampoline code.
+
+ - Most of these tests are successfully passing if the process gets killed by
+ some fatal signal: usually SIGSEGV or SIGBUS. Since while writing this
+ kind of tests it is extremely easy in fact to end-up injecting other
+ unrelated SEGV bugs in the testcases, it becomes extremely tricky to
+ be really sure that the tests are really addressing what they are meant
+ to address and they are not instead falling apart due to unplanned bugs
+ in the test code.
+ In order to alleviate the misery of the life of such test-developer, a few
+ helpers are provided:
+
+ - a couple of ASSERT_BAD/GOOD_CONTEXT() macros to easily parse a ucontext_t
+ and verify if it is indeed GOOD or BAD (depending on what we were
+ expecting), using the same logic/perspective as in the arm64 Kernel signals
+ routines.
+
+ - a sanity mechanism to be used in 'fake_sigreturn_'-alike tests: enabled by
+ default it takes care to verify that the test-execution had at least
+ successfully progressed up to the stage of triggering the fake sigreturn
+ call.
+
+ In both cases test results are expected in terms of:
+ - some fatal signal sent by the Kernel to the test process
+ or
+ - analyzing some final regs state
diff --git a/tools/testing/selftests/arm64/signal/signals.S b/tools/testing/selftests/arm64/signal/signals.S
new file mode 100644
index 000000000000..9f8c1aefc3b9
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/signals.S
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#include <asm/unistd.h>
+
+.section .rodata, "a"
+call_fmt:
+ .asciz "Calling sigreturn with fake sigframe sized:%zd at SP @%08lX\n"
+
+.text
+
+.globl fake_sigreturn
+
+/* fake_sigreturn x0:&sigframe, x1:sigframe_size, x2:misalign_bytes */
+fake_sigreturn:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ mov x20, x0
+ mov x21, x1
+ mov x22, x2
+
+ /* create space on the stack for fake sigframe 16 bytes-aligned */
+ add x0, x21, x22
+ add x0, x0, #15
+ bic x0, x0, #15 /* round_up(sigframe_size + misalign_bytes, 16) */
+ sub sp, sp, x0
+ add x23, sp, x22 /* new sigframe base with misaligment if any */
+
+ ldr x0, =call_fmt
+ mov x1, x21
+ mov x2, x23
+ bl printf
+
+ /* memcpy the provided content, while still keeping SP aligned */
+ mov x0, x23
+ mov x1, x20
+ mov x2, x21
+ bl memcpy
+
+ /*
+ * Here saving a last minute SP to current->token acts as a marker:
+ * if we got here, we are successfully faking a sigreturn; in other
+ * words we are sure no bad fatal signal has been raised till now
+ * for unrelated reasons, so we should consider the possibly observed
+ * fatal signal like SEGV coming from Kernel restore_sigframe() and
+ * triggered as expected from our test-case.
+ * For simplicity this assumes that current field 'token' is laid out
+ * as first in struct tdescr
+ */
+ ldr x0, current
+ str x23, [x0]
+ /* finally move SP to misaligned address...if any requested */
+ mov sp, x23
+
+ mov x8, #__NR_rt_sigreturn
+ svc #0
+
+ /*
+ * Above sigreturn should not return...looping here leads to a timeout
+ * and ensure proper and clean test failure, instead of jumping around
+ * on a potentially corrupted stack.
+ */
+ b .
diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c
new file mode 100644
index 000000000000..416b1ff43199
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Generic test wrapper for arm64 signal tests.
+ *
+ * Each test provides its own tde struct tdescr descriptor to link with
+ * this wrapper. Framework provides common helpers.
+ */
+#include <kselftest.h>
+
+#include "test_signals.h"
+#include "test_signals_utils.h"
+
+struct tdescr *current;
+
+int main(int argc, char *argv[])
+{
+ current = &tde;
+
+ ksft_print_msg("%s :: %s\n", current->name, current->descr);
+ if (test_setup(current) && test_init(current)) {
+ test_run(current);
+ test_cleanup(current);
+ }
+ test_result(current);
+
+ return current->result;
+}
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
new file mode 100644
index 000000000000..f96baf1cef1a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#ifndef __TEST_SIGNALS_H__
+#define __TEST_SIGNALS_H__
+
+#include <signal.h>
+#include <stdbool.h>
+#include <ucontext.h>
+
+/*
+ * Using ARCH specific and sanitized Kernel headers installed by KSFT
+ * framework since we asked for it by setting flag KSFT_KHDR_INSTALL
+ * in our Makefile.
+ */
+#include <asm/ptrace.h>
+#include <asm/hwcap.h>
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define get_regval(regname, out) \
+{ \
+ asm volatile("mrs %0, " __stringify(regname) \
+ : "=r" (out) \
+ : \
+ : "memory"); \
+}
+
+/*
+ * Feature flags used in tdescr.feats_required to specify
+ * any feature by the test
+ */
+enum {
+ FSSBS_BIT,
+ FMAX_END
+};
+
+#define FEAT_SSBS (1UL << FSSBS_BIT)
+
+/*
+ * A descriptor used to describe and configure a test case.
+ * Fields with a non-trivial meaning are described inline in the following.
+ */
+struct tdescr {
+ /* KEEP THIS FIELD FIRST for easier lookup from assembly */
+ void *token;
+ /* when disabled token based sanity checking is skipped in handler */
+ bool sanity_disabled;
+ /* just a name for the test-case; manadatory field */
+ char *name;
+ char *descr;
+ unsigned long feats_required;
+ /* bitmask of effectively supported feats: populated at run-time */
+ unsigned long feats_supported;
+ bool initialized;
+ unsigned int minsigstksz;
+ /* signum used as a test trigger. Zero if no trigger-signal is used */
+ int sig_trig;
+ /*
+ * signum considered as a successful test completion.
+ * Zero when no signal is expected on success
+ */
+ int sig_ok;
+ /* signum expected on unsupported CPU features. */
+ int sig_unsupp;
+ /* a timeout in second for test completion */
+ unsigned int timeout;
+ bool triggered;
+ bool pass;
+ unsigned int result;
+ /* optional sa_flags for the installed handler */
+ int sa_flags;
+ ucontext_t saved_uc;
+ /* used by get_current_ctx() */
+ size_t live_sz;
+ ucontext_t *live_uc;
+ volatile sig_atomic_t live_uc_valid;
+ /* optional test private data */
+ void *priv;
+
+ /* a custom setup: called alternatively to default_setup */
+ int (*setup)(struct tdescr *td);
+ /* a custom init: called by default test init after test_setup */
+ bool (*init)(struct tdescr *td);
+ /* a custom cleanup function called before test exits */
+ void (*cleanup)(struct tdescr *td);
+ /* an optional function to be used as a trigger for starting test */
+ int (*trigger)(struct tdescr *td);
+ /*
+ * the actual test-core: invoked differently depending on the
+ * presence of the trigger function above; this is mandatory
+ */
+ int (*run)(struct tdescr *td, siginfo_t *si, ucontext_t *uc);
+ /* an optional function for custom results' processing */
+ void (*check_result)(struct tdescr *td);
+};
+
+extern struct tdescr tde;
+#endif
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
new file mode 100644
index 000000000000..2de6e5ed5e25
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 ARM Limited */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
+#include <ucontext.h>
+
+#include <asm/unistd.h>
+
+#include <kselftest.h>
+
+#include "test_signals.h"
+#include "test_signals_utils.h"
+#include "testcases/testcases.h"
+
+
+extern struct tdescr *current;
+
+static int sig_copyctx = SIGTRAP;
+
+static char const *const feats_names[FMAX_END] = {
+ " SSBS ",
+};
+
+#define MAX_FEATS_SZ 128
+static char feats_string[MAX_FEATS_SZ];
+
+static inline char *feats_to_string(unsigned long feats)
+{
+ size_t flen = MAX_FEATS_SZ - 1;
+
+ for (int i = 0; i < FMAX_END; i++) {
+ if (feats & (1UL << i)) {
+ size_t tlen = strlen(feats_names[i]);
+
+ assert(flen > tlen);
+ flen -= tlen;
+ strncat(feats_string, feats_names[i], flen);
+ }
+ }
+
+ return feats_string;
+}
+
+static void unblock_signal(int signum)
+{
+ sigset_t sset;
+
+ sigemptyset(&sset);
+ sigaddset(&sset, signum);
+ sigprocmask(SIG_UNBLOCK, &sset, NULL);
+}
+
+static void default_result(struct tdescr *td, bool force_exit)
+{
+ if (td->result == KSFT_SKIP) {
+ fprintf(stderr, "==>> completed. SKIP.\n");
+ } else if (td->pass) {
+ fprintf(stderr, "==>> completed. PASS(1)\n");
+ td->result = KSFT_PASS;
+ } else {
+ fprintf(stdout, "==>> completed. FAIL(0)\n");
+ td->result = KSFT_FAIL;
+ }
+
+ if (force_exit)
+ exit(td->result);
+}
+
+/*
+ * The following handle_signal_* helpers are used by main default_handler
+ * and are meant to return true when signal is handled successfully:
+ * when false is returned instead, it means that the signal was somehow
+ * unexpected in that context and it was NOT handled; default_handler will
+ * take care of such unexpected situations.
+ */
+
+static bool handle_signal_unsupported(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ if (feats_ok(td))
+ return false;
+
+ /* Mangling PC to avoid loops on original SIGILL */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+
+ if (!td->initialized) {
+ fprintf(stderr,
+ "Got SIG_UNSUPP @test_init. Ignore.\n");
+ } else {
+ fprintf(stderr,
+ "-- RX SIG_UNSUPP on unsupported feat...OK\n");
+ td->pass = 1;
+ default_result(current, 1);
+ }
+
+ return true;
+}
+
+static bool handle_signal_trigger(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ td->triggered = 1;
+ /* ->run was asserted NON-NULL in test_setup() already */
+ td->run(td, si, uc);
+
+ return true;
+}
+
+static bool handle_signal_ok(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ /*
+ * it's a bug in the test code when this assert fail:
+ * if sig_trig was defined, it must have been used before getting here.
+ */
+ assert(!td->sig_trig || td->triggered);
+ fprintf(stderr,
+ "SIG_OK -- SP:0x%llX si_addr@:%p si_code:%d token@:%p offset:%ld\n",
+ ((ucontext_t *)uc)->uc_mcontext.sp,
+ si->si_addr, si->si_code, td->token, td->token - si->si_addr);
+ /*
+ * fake_sigreturn tests, which have sanity_enabled=1, set, at the very
+ * last time, the token field to the SP address used to place the fake
+ * sigframe: so token==0 means we never made it to the end,
+ * segfaulting well-before, and the test is possibly broken.
+ */
+ if (!td->sanity_disabled && !td->token) {
+ fprintf(stdout,
+ "current->token ZEROED...test is probably broken!\n");
+ abort();
+ }
+ /*
+ * Trying to narrow down the SEGV to the ones generated by Kernel itself
+ * via arm64_notify_segfault(). This is a best-effort check anyway, and
+ * the si_code check may need to change if this aspect of the kernel
+ * ABI changes.
+ */
+ if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
+ fprintf(stdout,
+ "si_code != SEGV_ACCERR...test is probably broken!\n");
+ abort();
+ }
+ td->pass = 1;
+ /*
+ * Some tests can lead to SEGV loops: in such a case we want to
+ * terminate immediately exiting straight away; some others are not
+ * supposed to outlive the signal handler code, due to the content of
+ * the fake sigframe which caused the signal itself.
+ */
+ default_result(current, 1);
+
+ return true;
+}
+
+static bool handle_signal_copyctx(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ /* Mangling PC to avoid loops on original BRK instr */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ memcpy(td->live_uc, uc, td->live_sz);
+ ASSERT_GOOD_CONTEXT(td->live_uc);
+ td->live_uc_valid = 1;
+ fprintf(stderr,
+ "GOOD CONTEXT grabbed from sig_copyctx handler\n");
+
+ return true;
+}
+
+static void default_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (current->sig_unsupp && signum == current->sig_unsupp &&
+ handle_signal_unsupported(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_UNSUPP\n");
+ } else if (current->sig_trig && signum == current->sig_trig &&
+ handle_signal_trigger(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_TRIG\n");
+ } else if (current->sig_ok && signum == current->sig_ok &&
+ handle_signal_ok(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_OK\n");
+ } else if (signum == sig_copyctx && current->live_uc &&
+ handle_signal_copyctx(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_COPYCTX\n");
+ } else {
+ if (signum == SIGALRM && current->timeout) {
+ fprintf(stderr, "-- Timeout !\n");
+ } else {
+ fprintf(stderr,
+ "-- RX UNEXPECTED SIGNAL: %d\n", signum);
+ }
+ default_result(current, 1);
+ }
+}
+
+static int default_setup(struct tdescr *td)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = default_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+ sa.sa_flags |= td->sa_flags;
+ sigemptyset(&sa.sa_mask);
+ /* uncatchable signals naturally skipped ... */
+ for (int sig = 1; sig < 32; sig++)
+ sigaction(sig, &sa, NULL);
+ /*
+ * RT Signals default disposition is Term but they cannot be
+ * generated by the Kernel in response to our tests; so just catch
+ * them all and report them as UNEXPECTED signals.
+ */
+ for (int sig = SIGRTMIN; sig <= SIGRTMAX; sig++)
+ sigaction(sig, &sa, NULL);
+
+ /* just in case...unblock explicitly all we need */
+ if (td->sig_trig)
+ unblock_signal(td->sig_trig);
+ if (td->sig_ok)
+ unblock_signal(td->sig_ok);
+ if (td->sig_unsupp)
+ unblock_signal(td->sig_unsupp);
+
+ if (td->timeout) {
+ unblock_signal(SIGALRM);
+ alarm(td->timeout);
+ }
+ fprintf(stderr, "Registered handlers for all signals.\n");
+
+ return 1;
+}
+
+static inline int default_trigger(struct tdescr *td)
+{
+ return !raise(td->sig_trig);
+}
+
+int test_init(struct tdescr *td)
+{
+ if (td->sig_trig == sig_copyctx) {
+ fprintf(stdout,
+ "Signal %d is RESERVED, cannot be used as a trigger. Aborting\n",
+ sig_copyctx);
+ return 0;
+ }
+ /* just in case */
+ unblock_signal(sig_copyctx);
+
+ td->minsigstksz = getauxval(AT_MINSIGSTKSZ);
+ if (!td->minsigstksz)
+ td->minsigstksz = MINSIGSTKSZ;
+ fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
+
+ if (td->feats_required) {
+ td->feats_supported = 0;
+ /*
+ * Checking for CPU required features using both the
+ * auxval and the arm64 MRS Emulation to read sysregs.
+ */
+ if (getauxval(AT_HWCAP) & HWCAP_SSBS)
+ td->feats_supported |= FEAT_SSBS;
+ if (feats_ok(td))
+ fprintf(stderr,
+ "Required Features: [%s] supported\n",
+ feats_to_string(td->feats_required &
+ td->feats_supported));
+ else
+ fprintf(stderr,
+ "Required Features: [%s] NOT supported\n",
+ feats_to_string(td->feats_required &
+ ~td->feats_supported));
+ }
+
+ /* Perform test specific additional initialization */
+ if (td->init && !td->init(td)) {
+ fprintf(stderr, "FAILED Testcase initialization.\n");
+ return 0;
+ }
+ td->initialized = 1;
+ fprintf(stderr, "Testcase initialized.\n");
+
+ return 1;
+}
+
+int test_setup(struct tdescr *td)
+{
+ /* assert core invariants symptom of a rotten testcase */
+ assert(current);
+ assert(td);
+ assert(td->name);
+ assert(td->run);
+
+ /* Default result is FAIL if test setup fails */
+ td->result = KSFT_FAIL;
+ if (td->setup)
+ return td->setup(td);
+ else
+ return default_setup(td);
+}
+
+int test_run(struct tdescr *td)
+{
+ if (td->sig_trig) {
+ if (td->trigger)
+ return td->trigger(td);
+ else
+ return default_trigger(td);
+ } else {
+ return td->run(td, NULL, NULL);
+ }
+}
+
+void test_result(struct tdescr *td)
+{
+ if (td->initialized && td->result != KSFT_SKIP && td->check_result)
+ td->check_result(td);
+ default_result(td, 0);
+}
+
+void test_cleanup(struct tdescr *td)
+{
+ if (td->cleanup)
+ td->cleanup(td);
+}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
new file mode 100644
index 000000000000..6772b5c8d274
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#ifndef __TEST_SIGNALS_UTILS_H__
+#define __TEST_SIGNALS_UTILS_H__
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "test_signals.h"
+
+int test_init(struct tdescr *td);
+int test_setup(struct tdescr *td);
+void test_cleanup(struct tdescr *td);
+int test_run(struct tdescr *td);
+void test_result(struct tdescr *td);
+
+static inline bool feats_ok(struct tdescr *td)
+{
+ return (td->feats_required & td->feats_supported) == td->feats_required;
+}
+
+/*
+ * Obtaining a valid and full-blown ucontext_t from userspace is tricky:
+ * libc getcontext does() not save all the regs and messes with some of
+ * them (pstate value in particular is not reliable).
+ *
+ * Here we use a service signal to grab the ucontext_t from inside a
+ * dedicated signal handler, since there, it is populated by Kernel
+ * itself in setup_sigframe(). The grabbed context is then stored and
+ * made available in td->live_uc.
+ *
+ * As service-signal is used a SIGTRAP induced by a 'brk' instruction,
+ * because here we have to avoid syscalls to trigger the signal since
+ * they would cause any SVE sigframe content (if any) to be removed.
+ *
+ * Anyway this function really serves a dual purpose:
+ *
+ * 1. grab a valid sigcontext into td->live_uc for result analysis: in
+ * such case it returns 1.
+ *
+ * 2. detect if, somehow, a previously grabbed live_uc context has been
+ * used actively with a sigreturn: in such a case the execution would have
+ * magically resumed in the middle of this function itself (seen_already==1):
+ * in such a case return 0, since in fact we have not just simply grabbed
+ * the context.
+ *
+ * This latter case is useful to detect when a fake_sigreturn test-case has
+ * unexpectedly survived without hitting a SEGV.
+ *
+ * Note that the case of runtime dynamically sized sigframes (like in SVE
+ * context) is still NOT addressed: sigframe size is supposed to be fixed
+ * at sizeof(ucontext_t).
+ */
+static __always_inline bool get_current_context(struct tdescr *td,
+ ucontext_t *dest_uc)
+{
+ static volatile bool seen_already;
+
+ assert(td && dest_uc);
+ /* it's a genuine invocation..reinit */
+ seen_already = 0;
+ td->live_uc_valid = 0;
+ td->live_sz = sizeof(*dest_uc);
+ memset(dest_uc, 0x00, td->live_sz);
+ td->live_uc = dest_uc;
+ /*
+ * Grab ucontext_t triggering a SIGTRAP.
+ *
+ * Note that:
+ * - live_uc_valid is declared volatile sig_atomic_t in
+ * struct tdescr since it will be changed inside the
+ * sig_copyctx handler
+ * - the additional 'memory' clobber is there to avoid possible
+ * compiler's assumption on live_uc_valid and the content
+ * pointed by dest_uc, which are all changed inside the signal
+ * handler
+ * - BRK causes a debug exception which is handled by the Kernel
+ * and finally causes the SIGTRAP signal to be delivered to this
+ * test thread. Since such delivery happens on the ret_to_user()
+ * /do_notify_resume() debug exception return-path, we are sure
+ * that the registered SIGTRAP handler has been run to completion
+ * before the execution path is restored here: as a consequence
+ * we can be sure that the volatile sig_atomic_t live_uc_valid
+ * carries a meaningful result. Being in a single thread context
+ * we'll also be sure that any access to memory modified by the
+ * handler (namely ucontext_t) will be visible once returned.
+ * - note that since we are using a breakpoint instruction here
+ * to cause a SIGTRAP, the ucontext_t grabbed from the signal
+ * handler would naturally contain a PC pointing exactly to this
+ * BRK line, which means that, on return from the signal handler,
+ * or if we place the ucontext_t on the stack to fake a sigreturn,
+ * we'll end up in an infinite loop of BRK-SIGTRAP-handler.
+ * For this reason we take care to artificially move forward the
+ * PC to the next instruction while inside the signal handler.
+ */
+ asm volatile ("brk #666"
+ : "+m" (*dest_uc)
+ :
+ : "memory");
+
+ /*
+ * If we get here with seen_already==1 it implies the td->live_uc
+ * context has been used to get back here....this probably means
+ * a test has failed to cause a SEGV...anyway live_uc does not
+ * point to a just acquired copy of ucontext_t...so return 0
+ */
+ if (seen_already) {
+ fprintf(stdout,
+ "Unexpected successful sigreturn detected: live_uc is stale !\n");
+ return 0;
+ }
+ seen_already = 1;
+
+ return td->live_uc_valid;
+}
+
+int fake_sigreturn(void *sigframe, size_t sz, int misalign_bytes);
+#endif
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
new file mode 100644
index 000000000000..8dc600a7d4fd
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a BAD Unknown magic
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_bad_magic_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */
+ head = get_starting_head(shead, HDR_SZ * 2, GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ /*
+ * use a well known NON existent bad magic...something
+ * we should pretty sure won't be ever defined in Kernel
+ */
+ head->magic = KSFT_BAD_MAGIC;
+ head->size = HDR_SZ;
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_MAGIC",
+ .descr = "Trigger a sigreturn with a sigframe with a bad magic",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_magic_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
new file mode 100644
index 000000000000..b3c362100666
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a bad record overflowing
+ * the __reserved space: on sigreturn Kernel must spot this attempt and
+ * the test case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+#define MIN_SZ_ALIGN 16
+
+static int fake_sigreturn_bad_size_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, need_sz, offset;
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ /* at least HDR_SZ + bad sized esr_context needed */
+ need_sz = sizeof(struct esr_context) + HDR_SZ;
+ head = get_starting_head(shead, need_sz, resv_sz, &offset);
+ if (!head)
+ return 0;
+
+ /*
+ * Use an esr_context to build a fake header with a
+ * size greater then the free __reserved area minus HDR_SZ;
+ * using ESR_MAGIC here since it is not checked for size nor
+ * is limited to one instance.
+ *
+ * At first inject an additional normal esr_context
+ */
+ head->magic = ESR_MAGIC;
+ head->size = sizeof(struct esr_context);
+ /* and terminate properly */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+ ASSERT_GOOD_CONTEXT(&sf.uc);
+
+ /*
+ * now mess with fake esr_context size: leaving less space than
+ * needed while keeping size value 16-aligned
+ *
+ * It must trigger a SEGV from Kernel on:
+ *
+ * resv_sz - offset < sizeof(*head)
+ */
+ /* at first set the maximum good 16-aligned size */
+ head->size = (resv_sz - offset - need_sz + MIN_SZ_ALIGN) & ~0xfUL;
+ /* plus a bit more of 16-aligned sized stuff */
+ head->size += MIN_SZ_ALIGN;
+ /* and terminate properly */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_SIZE",
+ .descr = "Triggers a sigreturn with a overrun __reserved area",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_size_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
new file mode 100644
index 000000000000..a44b88bfc81a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a badly sized terminator
+ * record: on sigreturn Kernel must spot this attempt and the test case
+ * is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* at least HDR_SZ for the badly sized terminator. */
+ head = get_starting_head(shead, HDR_SZ, GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ head->magic = 0;
+ head->size = HDR_SZ;
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_SIZE_FOR_TERMINATOR",
+ .descr = "Trigger a sigreturn using non-zero size terminator",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_size_for_magic0_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
new file mode 100644
index 000000000000..afe8915f0998
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including an additional FPSIMD
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ,
+ GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ /* Add a spurious fpsimd_context */
+ head->magic = FPSIMD_MAGIC;
+ head->size = sizeof(struct fpsimd_context);
+ /* and terminate */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_DUPLICATED_FPSIMD",
+ .descr = "Triggers a sigreturn including two fpsimd_context",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_duplicated_fpsimd_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
new file mode 100644
index 000000000000..1e089e66f9f3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack at a misaligned SP: on sigreturn
+ * Kernel must spot this attempt and the test case is expected to be
+ * terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_misaligned_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* Forcing sigframe on misaligned SP (16 + 3) */
+ fake_sigreturn(&sf, sizeof(sf), 3);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_MISALIGNED_SP",
+ .descr = "Triggers a sigreturn with a misaligned sigframe",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_misaligned_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
new file mode 100644
index 000000000000..08ecd8073a1a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack missing the mandatory FPSIMD
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, FPSIMD_MAGIC, resv_sz, &offset);
+ if (head && resv_sz - offset >= HDR_SZ) {
+ fprintf(stderr, "Mangling template header. Spare space:%zd\n",
+ resv_sz - offset);
+ /* Just overwrite fpsmid_context */
+ write_terminator_record(head);
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+ }
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_MISSING_FPSIMD",
+ .descr = "Triggers a sigreturn with a missing fpsimd_context",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_missing_fpsimd_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c
new file mode 100644
index 000000000000..2cb118b0ba05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the execution state bit: this attempt must be spotted by Kernel and
+ * the test case is expected to be terminated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ /* This config should trigger a SIGSEGV by Kernel */
+ uc->uc_mcontext.pstate ^= PSR_MODE32_BIT;
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .sanity_disabled = true,
+ .name = "MANGLE_PSTATE_INVALID_STATE_TOGGLE",
+ .descr = "Mangling uc_mcontext with INVALID STATE_TOGGLE",
+ .sig_trig = SIGUSR1,
+ .sig_ok = SIGSEGV,
+ .run = mangle_invalid_pstate_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c
new file mode 100644
index 000000000000..434b82597007
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, mangling the
+ * DAIF bits in an illegal manner: this attempt must be spotted by Kernel
+ * and the test case is expected to be terminated via SEGV.
+ *
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ /*
+ * This config should trigger a SIGSEGV by Kernel when it checks
+ * the sigframe consistency in valid_user_regs() routine.
+ */
+ uc->uc_mcontext.pstate |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT;
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .sanity_disabled = true,
+ .name = "MANGLE_PSTATE_INVALID_DAIF_BITS",
+ .descr = "Mangling uc_mcontext with INVALID DAIF_BITS",
+ .sig_trig = SIGUSR1,
+ .sig_ok = SIGSEGV,
+ .run = mangle_invalid_pstate_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c
new file mode 100644
index 000000000000..95f821abdf46
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c
new file mode 100644
index 000000000000..cc222d8a618a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c
new file mode 100644
index 000000000000..2188add7d28c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c
new file mode 100644
index 000000000000..df32dd5a479c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c
new file mode 100644
index 000000000000..9e6829b7e5db
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c
new file mode 100644
index 000000000000..5685a4f10d06
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h
new file mode 100644
index 000000000000..f5bf1804d858
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Utility macro to ease definition of testcases toggling mode EL
+ */
+
+#define DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(_mode) \
+ \
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, \
+ ucontext_t *uc) \
+{ \
+ ASSERT_GOOD_CONTEXT(uc); \
+ \
+ uc->uc_mcontext.pstate &= ~PSR_MODE_MASK; \
+ uc->uc_mcontext.pstate |= PSR_MODE_EL ## _mode; \
+ \
+ return 1; \
+} \
+ \
+struct tdescr tde = { \
+ .sanity_disabled = true, \
+ .name = "MANGLE_PSTATE_INVALID_MODE_EL"#_mode, \
+ .descr = "Mangling uc_mcontext INVALID MODE EL"#_mode, \
+ .sig_trig = SIGUSR1, \
+ .sig_ok = SIGSEGV, \
+ .run = mangle_invalid_pstate_run, \
+}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
new file mode 100644
index 000000000000..61ebcdf63831
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 ARM Limited */
+#include "testcases.h"
+
+struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *found = NULL;
+
+ if (!head || resv_sz < HDR_SZ)
+ return found;
+
+ while (offs <= resv_sz - HDR_SZ &&
+ head->magic != magic && head->magic) {
+ offs += head->size;
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+ if (head->magic == magic) {
+ found = head;
+ if (offset)
+ *offset = offs;
+ }
+
+ return found;
+}
+
+bool validate_extra_context(struct extra_context *extra, char **err)
+{
+ struct _aarch64_ctx *term;
+
+ if (!extra || !err)
+ return false;
+
+ fprintf(stderr, "Validating EXTRA...\n");
+ term = GET_RESV_NEXT_HEAD(extra);
+ if (!term || term->magic || term->size) {
+ *err = "Missing terminator after EXTRA context";
+ return false;
+ }
+ if (extra->datap & 0x0fUL)
+ *err = "Extra DATAP misaligned";
+ else if (extra->size & 0x0fUL)
+ *err = "Extra SIZE misaligned";
+ else if (extra->datap != (uint64_t)term + sizeof(*term))
+ *err = "Extra DATAP misplaced (not contiguous)";
+ if (*err)
+ return false;
+
+ return true;
+}
+
+bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
+{
+ bool terminated = false;
+ size_t offs = 0;
+ int flags = 0;
+ struct extra_context *extra = NULL;
+ struct _aarch64_ctx *head =
+ (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+
+ if (!err)
+ return false;
+ /* Walk till the end terminator verifying __reserved contents */
+ while (head && !terminated && offs < resv_sz) {
+ if ((uint64_t)head & 0x0fUL) {
+ *err = "Misaligned HEAD";
+ return false;
+ }
+
+ switch (head->magic) {
+ case 0:
+ if (head->size)
+ *err = "Bad size for terminator";
+ else
+ terminated = true;
+ break;
+ case FPSIMD_MAGIC:
+ if (flags & FPSIMD_CTX)
+ *err = "Multiple FPSIMD_MAGIC";
+ else if (head->size !=
+ sizeof(struct fpsimd_context))
+ *err = "Bad size for fpsimd_context";
+ flags |= FPSIMD_CTX;
+ break;
+ case ESR_MAGIC:
+ if (head->size != sizeof(struct esr_context))
+ *err = "Bad size for esr_context";
+ break;
+ case SVE_MAGIC:
+ if (flags & SVE_CTX)
+ *err = "Multiple SVE_MAGIC";
+ else if (head->size !=
+ sizeof(struct sve_context))
+ *err = "Bad size for sve_context";
+ flags |= SVE_CTX;
+ break;
+ case EXTRA_MAGIC:
+ if (flags & EXTRA_CTX)
+ *err = "Multiple EXTRA_MAGIC";
+ else if (head->size !=
+ sizeof(struct extra_context))
+ *err = "Bad size for extra_context";
+ flags |= EXTRA_CTX;
+ extra = (struct extra_context *)head;
+ break;
+ case KSFT_BAD_MAGIC:
+ /*
+ * This is a BAD magic header defined
+ * artificially by a testcase and surely
+ * unknown to the Kernel parse_user_sigframe().
+ * It MUST cause a Kernel induced SEGV
+ */
+ *err = "BAD MAGIC !";
+ break;
+ default:
+ /*
+ * A still unknown Magic: potentially freshly added
+ * to the Kernel code and still unknown to the
+ * tests.
+ */
+ fprintf(stdout,
+ "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n",
+ head->magic);
+ break;
+ }
+
+ if (*err)
+ return false;
+
+ offs += head->size;
+ if (resv_sz < offs + sizeof(*head)) {
+ *err = "HEAD Overrun";
+ return false;
+ }
+
+ if (flags & EXTRA_CTX)
+ if (!validate_extra_context(extra, err))
+ return false;
+
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+
+ if (terminated && !(flags & FPSIMD_CTX)) {
+ *err = "Missing FPSIMD";
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This function walks through the records inside the provided reserved area
+ * trying to find enough space to fit @need_sz bytes: if not enough space is
+ * available and an extra_context record is present, it throws away the
+ * extra_context record.
+ *
+ * It returns a pointer to a new header where it is possible to start storing
+ * our need_sz bytes.
+ *
+ * @shead: points to the start of reserved area
+ * @need_sz: needed bytes
+ * @resv_sz: reserved area size in bytes
+ * @offset: if not null, this will be filled with the offset of the return
+ * head pointer from @shead
+ *
+ * @return: pointer to a new head where to start storing need_sz bytes, or
+ * NULL if space could not be made available.
+ */
+struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead,
+ size_t need_sz, size_t resv_sz,
+ size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *head;
+
+ head = get_terminator(shead, resv_sz, &offs);
+ /* not found a terminator...no need to update offset if any */
+ if (!head)
+ return head;
+ if (resv_sz - offs < need_sz) {
+ fprintf(stderr, "Low on space:%zd. Discarding extra_context.\n",
+ resv_sz - offs);
+ head = get_header(shead, EXTRA_MAGIC, resv_sz, &offs);
+ if (!head || resv_sz - offs < need_sz) {
+ fprintf(stderr,
+ "Failed to reclaim space on sigframe.\n");
+ return NULL;
+ }
+ }
+
+ fprintf(stderr, "Available space:%zd\n", resv_sz - offs);
+ if (offset)
+ *offset = offs;
+ return head;
+}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
new file mode 100644
index 000000000000..ad884c135314
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+#ifndef __TESTCASES_H__
+#define __TESTCASES_H__
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <signal.h>
+
+/* Architecture specific sigframe definitions */
+#include <asm/sigcontext.h>
+
+#define FPSIMD_CTX (1 << 0)
+#define SVE_CTX (1 << 1)
+#define EXTRA_CTX (1 << 2)
+
+#define KSFT_BAD_MAGIC 0xdeadbeef
+
+#define HDR_SZ \
+ sizeof(struct _aarch64_ctx)
+
+#define GET_SF_RESV_HEAD(sf) \
+ (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
+
+#define GET_SF_RESV_SIZE(sf) \
+ sizeof((sf).uc.uc_mcontext.__reserved)
+
+#define GET_UCP_RESV_SIZE(ucp) \
+ sizeof((ucp)->uc_mcontext.__reserved)
+
+#define ASSERT_BAD_CONTEXT(uc) do { \
+ char *err = NULL; \
+ if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \
+ if (err) \
+ fprintf(stderr, \
+ "Using badly built context - ERR: %s\n",\
+ err); \
+ } else { \
+ abort(); \
+ } \
+} while (0)
+
+#define ASSERT_GOOD_CONTEXT(uc) do { \
+ char *err = NULL; \
+ if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \
+ if (err) \
+ fprintf(stderr, \
+ "Detected BAD context - ERR: %s\n", err);\
+ abort(); \
+ } else { \
+ fprintf(stderr, "uc context validated.\n"); \
+ } \
+} while (0)
+
+/*
+ * A simple record-walker for __reserved area: it walks through assuming
+ * only to find a proper struct __aarch64_ctx header descriptor.
+ *
+ * Instead it makes no assumptions on the content and ordering of the
+ * records, any needed bounds checking must be enforced by the caller
+ * if wanted: this way can be used by caller on any maliciously built bad
+ * contexts.
+ *
+ * head->size accounts both for payload and header _aarch64_ctx size !
+ */
+#define GET_RESV_NEXT_HEAD(h) \
+ (struct _aarch64_ctx *)((char *)(h) + (h)->size)
+
+struct fake_sigframe {
+ siginfo_t info;
+ ucontext_t uc;
+};
+
+
+bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
+
+bool validate_extra_context(struct extra_context *extra, char **err);
+
+struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset);
+
+static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
+ size_t resv_sz,
+ size_t *offset)
+{
+ return get_header(head, 0, resv_sz, offset);
+}
+
+static inline void write_terminator_record(struct _aarch64_ctx *tail)
+{
+ if (tail) {
+ tail->magic = 0;
+ tail->size = 0;
+ }
+}
+
+struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead,
+ size_t need_sz, size_t resv_sz,
+ size_t *offset);
+#endif
diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore
new file mode 100644
index 000000000000..e8fae8d61ed6
--- /dev/null
+++ b/tools/testing/selftests/arm64/tags/.gitignore
@@ -0,0 +1 @@
+tags_test
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile
new file mode 100644
index 000000000000..41cb75070511
--- /dev/null
+++ b/tools/testing/selftests/arm64/tags/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := tags_test
+TEST_PROGS := run_tags_test.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh
new file mode 100755
index 000000000000..745f11379930
--- /dev/null
+++ b/tools/testing/selftests/arm64/tags/run_tags_test.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "--------------------"
+echo "running tags test"
+echo "--------------------"
+./tags_test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+else
+ echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
new file mode 100644
index 000000000000..5701163460ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/prctl.h>
+#include <sys/utsname.h>
+
+#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
+#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
+ SHIFT_TAG(tag))
+
+int main(void)
+{
+ static int tbi_enabled = 0;
+ unsigned long tag = 0;
+ struct utsname *ptr;
+ int err;
+
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0)
+ tbi_enabled = 1;
+ ptr = (struct utsname *)malloc(sizeof(*ptr));
+ if (tbi_enabled)
+ tag = 0x42;
+ ptr = (struct utsname *)SET_TAG(ptr, tag);
+ err = uname(ptr);
+ free(ptr);
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 90f70d2c7c22..ec464859c6b6 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -7,11 +7,10 @@ FEATURE-DUMP.libbpf
fixdep
test_align
test_dev_cgroup
-test_progs
+/test_progs*
test_tcpbpf_user
test_verifier_log
feature
-test_libbpf_open
test_sock
test_sock_addr
test_sock_fields
@@ -22,24 +21,20 @@ test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_socket_cookie
-test_cgroup_attach
test_cgroup_storage
-test_select_reuseport
test_flow_dissector
flow_dissector_load
test_netcnt
-test_section_names
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_sysctl
-alu32
-libbpf.pc
-libbpf.so.*
test_hashmap
test_btf_dump
xdping
-test_sockopt
-test_sockopt_sk
-test_sockopt_multi
-test_tcp_rtt
+test_cpp
+*.skel.h
+/no_alu32
+/bpf_gcc
+/tools
+
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c085964e1d05..257a1aaaa37d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -2,10 +2,14 @@
include ../../../../scripts/Kbuild.include
include ../../../scripts/Makefile.arch
-LIBDIR := ../../../lib
+CURDIR := $(abspath .)
+TOOLSDIR := $(abspath ../../..)
+LIBDIR := $(TOOLSDIR)/lib
BPFDIR := $(LIBDIR)/bpf
-APIDIR := ../../../include/uapi
-GENDIR := ../../../../include/generated
+TOOLSINCDIR := $(TOOLSDIR)/include
+BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
+APIDIR := $(TOOLSINCDIR)/uapi
+GENDIR := $(abspath ../../../../include/generated)
GENHDR := $(GENDIR)/autoconf.h
ifneq ($(wildcard $(GENHDR)),)
@@ -15,38 +19,32 @@ endif
CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
-LLVM_READELF ?= llvm-readelf
-BTF_PAHOLE ?= pahole
-CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
- -Dbpf_prog_load=bpf_prog_test_load \
+BPF_GCC ?= $(shell command -v bpf-gcc;)
+CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
+ -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
+ -Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
-LDLIBS += -lcap -lelf -lrt -lpthread
+LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
- test_cgroup_storage test_select_reuseport test_section_names \
+ test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
- test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
- test_sockopt_multi test_tcp_rtt
-
-BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
-TEST_GEN_FILES = $(BPF_OBJ_FILES)
-
-# Also test sub-register code-gen if LLVM has eBPF v3 processor support which
-# contains both ALU32 and JMP32 instructions.
-SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
- $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
- $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \
- grep 'if w')
-ifneq ($(SUBREG_CODEGEN),)
-TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES))
+ test_progs-no_alu32
+
+# Also test bpf-gcc, if present
+ifneq ($(BPF_GCC),)
+TEST_GEN_PROGS += test_progs-bpf_gcc
endif
+TEST_GEN_FILES =
+TEST_FILES = test_lwt_ip_encap.o \
+ test_tc_edt.o
+
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
- test_libbpf.sh \
test_xdp_redirect.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
@@ -63,35 +61,83 @@ TEST_PROGS := test_kmod.sh \
test_tcp_check_syncookie.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
- test_xdping.sh
+ test_xdping.sh \
+ test_bpftool_build.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
tcp_client.py \
- tcp_server.py
+ tcp_server.py \
+ test_xdp_vlan.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user
-
-include ../lib.mk
-
-# NOTE: $(OUTPUT) won't get default value if used before lib.mk
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
-all: $(TEST_CUSTOM_PROGS)
-
-$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
- $(CC) -o $@ $< -Wl,--build-id
-
-$(OUTPUT)/test_stub.o: test_stub.c
- $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $<
+ test_lirc_mode2_user xdping test_cpp runqslower
+
+TEST_CUSTOM_PROGS = urandom_read
+
+# Emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
+endif
-BPFOBJ := $(OUTPUT)/libbpf.a
+# override lib.mk's default rules
+OVERRIDE_TARGETS := 1
+override define CLEAN
+ $(call msg,CLEAN)
+ $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+endef
-$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ)
+include ../lib.mk
-$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+INCLUDE_DIR := $(SCRATCH_DIR)/include
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# to build individual tests.
+# NOTE: Semicolon at the end is critical to override lib.mk's default static
+# rule for binaries.
+$(notdir $(TEST_GEN_PROGS) \
+ $(TEST_PROGS) \
+ $(TEST_PROGS_EXTENDED) \
+ $(TEST_GEN_PROGS_EXTENDED) \
+ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+
+$(OUTPUT)/%:%.c
+ $(call msg,BINARY,,$@)
+ $(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c
+ $(call msg,BINARY,,$@)
+ $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+
+$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
+ $(call msg,CC,,$@)
+ $(CC) -c $(CFLAGS) -o $@ $<
+
+VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
+$(OUTPUT)/runqslower: $(BPFOBJ)
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
+ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
+ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -101,35 +147,29 @@ $(OUTPUT)/test_socket_cookie: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
-$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
-$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
-$(OUTPUT)/test_sockopt: cgroup_helpers.c
-$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
-$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
-$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
-.PHONY: force
-
-# force a rebuild of BPFOBJ when its dependencies are updated
-force:
-
-$(BPFOBJ): force
- $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
-
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-# Let newer LLVM versions transparently probe the kernel for availability
-# of full BPF instruction set.
-ifeq ($(PROBE),)
- CPU ?= probe
-else
- CPU ?= generic
-endif
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(BPFOBJ) | $(BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ OUTPUT=$(BUILD_DIR)/bpftool/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ ../../../include/uapi/linux/bpf.h \
+ | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
+ $(call msg,MKDIR,,$@)
+ mkdir -p $@
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -137,157 +177,222 @@ endif
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+endef
-CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
- $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types \
- -D__TARGET_ARCH_$(SRCARCH)
+# Determine target endianness.
+IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
+MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
-$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
-$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \
+ -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
-$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
-$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
-$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
-$(OUTPUT)/test_progs.o: flow_dissector_load.h
-
-BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
-BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
-BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
-BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
- $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
- $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
- /bin/rm -f ./llvm_btf_verify.o)
-
-ifneq ($(BTF_LLVM_PROBE),)
- CLANG_FLAGS += -g
-else
-ifneq ($(BTF_LLC_PROBE),)
-ifneq ($(BTF_PAHOLE_PROBE),)
-ifneq ($(BTF_OBJCOPY_PROBE),)
- CLANG_FLAGS += -g
- LLC_FLAGS += -mattr=dwarfris
- DWARF2BTF = y
-endif
-endif
-endif
-endif
-
-TEST_PROGS_CFLAGS := -I. -I$(OUTPUT)
-TEST_MAPS_CFLAGS := -I. -I$(OUTPUT)
-TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier
+$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
-ifneq ($(SUBREG_CODEGEN),)
-ALU32_BUILD_DIR = $(OUTPUT)/alu32
-TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32
-$(ALU32_BUILD_DIR):
- mkdir -p $@
+$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
-$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR)
- cp $< $@
-
-$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
- $(ALU32_BUILD_DIR)/urandom_read \
- | $(ALU32_BUILD_DIR)
- $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
- -o $(ALU32_BUILD_DIR)/test_progs_32 \
- test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \
- $(OUTPUT)/libbpf.a $(LDLIBS)
-
-$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
-$(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c
-
-$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \
- | $(ALU32_BUILD_DIR)
- ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \
- echo "clang failed") | \
- $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
- -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
- $(BTF_PAHOLE) -J $@
-endif
+# Build BPF object using Clang
+# $1 - input .c file
+# $2 - output .o file
+# $3 - CFLAGS
+# $4 - LDFLAGS
+define CLANG_BPF_BUILD_RULE
+ $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
+ ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ -c $1 -o - || echo "BPF obj compilation failed") | \
+ $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
+define CLANG_NOALU32_BPF_BUILD_RULE
+ $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
+ ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ -c $1 -o - || echo "BPF obj compilation failed") | \
+ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
+define CLANG_NATIVE_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ ($(CLANG) $3 -O2 -emit-llvm \
+ -c $1 -o - || echo "BPF obj compilation failed") | \
+ $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+endef
+# Build BPF object using GCC
+define GCC_BPF_BUILD_RULE
+ $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
+ $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+endef
+
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+
+# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
+# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
+# Parameters:
+# $1 - test runner base binary name (e.g., test_progs)
+# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+define DEFINE_TEST_RUNNER
+
+TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
+TRUNNER_BINARY := $1$(if $2,-)$2
+TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
+ $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
+TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
+ $$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
+TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
+TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
+TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
+ $$(filter-out $(SKEL_BLACKLIST), \
+ $$(TRUNNER_BPF_SRCS)))
+
+# Evaluate rules now with extra TRUNNER_XXX variables above already defined
+$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
+
+endef
+
+# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
+# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
+# $1 - test runner base binary name (e.g., test_progs)
+# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+define DEFINE_TEST_RUNNER_RULES
+
+ifeq ($($(TRUNNER_OUTPUT)-dir),)
+$(TRUNNER_OUTPUT)-dir := y
+$(TRUNNER_OUTPUT):
+ $$(call msg,MKDIR,,$$@)
+ mkdir -p $$@
endif
-# Have one program compiled without "-target bpf" to test whether libbpf loads
-# it successfully
-$(OUTPUT)/test_xdp.o: progs/test_xdp.c
- ($(CLANG) $(CLANG_FLAGS) -O2 -emit-llvm -c $< -o - || \
- echo "clang failed") | \
- $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
- $(BTF_PAHOLE) -J $@
+# ensure we set up BPF objects generation rule just once for a given
+# input/output directory combination
+ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),)
+$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
+$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
+ $(TRUNNER_BPF_PROGS_DIR)/%.c \
+ $(TRUNNER_BPF_PROGS_DIR)/*.h \
+ $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
+ $(TRUNNER_BPF_CFLAGS), \
+ $(TRUNNER_BPF_LDFLAGS))
+
+$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
+ $(TRUNNER_OUTPUT)/%.o \
+ | $(BPFTOOL) $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $$(BPFTOOL) gen skeleton $$< > $$@
endif
-$(OUTPUT)/%.o: progs/%.c
- ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \
- echo "clang failed") | \
- $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-ifeq ($(DWARF2BTF),y)
- $(BTF_PAHOLE) -J $@
+# ensure we set up tests.h header generation rule just once
+ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
+$(TRUNNER_TESTS_DIR)-tests-hdr := y
+$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
+ $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@)
+ $$(shell ( cd $(TRUNNER_TESTS_DIR); \
+ echo '/* Generated header, do not edit */'; \
+ ls *.c 2> /dev/null | \
+ sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
+ ) > $$@)
endif
-PROG_TESTS_DIR = $(OUTPUT)/prog_tests
-$(PROG_TESTS_DIR):
- mkdir -p $@
-PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h
-PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
-test_progs.c: $(PROG_TESTS_H)
-$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
-$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES)
-$(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
- $(shell ( cd prog_tests/; \
- echo '/* Generated header, do not edit */'; \
- echo '#ifdef DECLARE'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
- echo '#endif'; \
- echo '#ifdef CALL'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
- echo '#endif' \
- ) > $(PROG_TESTS_H))
+# compile individual test files
+# Note: we cd into output directory to ensure embedded BPF object is found
+$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
+ $(TRUNNER_TESTS_DIR)/%.c \
+ $(TRUNNER_EXTRA_HDRS) \
+ $(TRUNNER_BPF_OBJS) \
+ $(TRUNNER_BPF_SKELS) \
+ $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
+ cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+
+$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
+ %.c \
+ $(TRUNNER_EXTRA_HDRS) \
+ $(TRUNNER_TESTS_HDR) \
+ $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
+ $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
+# only copy extra resources if in flavored build
+$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
+ifneq ($2,)
+ $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
+ cp -a $$^ $(TRUNNER_OUTPUT)/
+endif
-MAP_TESTS_DIR = $(OUTPUT)/map_tests
-$(MAP_TESTS_DIR):
- mkdir -p $@
-MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
-MAP_TESTS_FILES := $(wildcard map_tests/*.c)
-test_maps.c: $(MAP_TESTS_H)
-$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
-$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES)
-$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR)
- $(shell ( cd map_tests/; \
- echo '/* Generated header, do not edit */'; \
- echo '#ifdef DECLARE'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
- echo '#endif'; \
- echo '#ifdef CALL'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
- echo '#endif' \
- ) > $(MAP_TESTS_H))
+$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
+ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ | $(TRUNNER_BINARY)-extras
+ $$(call msg,BINARY,,$$@)
+ $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+
+endef
+
+# Define test_progs test runner.
+TRUNNER_TESTS_DIR := prog_tests
+TRUNNER_BPF_PROGS_DIR := progs
+TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
+ flow_dissector_load.h
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
+ $(wildcard progs/btf_dump_test_case_*.c)
+TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_LDFLAGS := -mattr=+alu32
+$(eval $(call DEFINE_TEST_RUNNER,test_progs))
+
+# Define test_progs-no_alu32 test runner.
+TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
+
+# Define test_progs BPF-GCC-flavored test runner.
+ifneq ($(BPF_GCC),)
+TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
+endif
-VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
-$(VERIFIER_TESTS_DIR):
- mkdir -p $@
-VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h
-VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
-test_verifier.c: $(VERIFIER_TESTS_H)
-$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
-$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H)
-$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR)
+# Define test_maps test runner.
+TRUNNER_TESTS_DIR := map_tests
+TRUNNER_BPF_PROGS_DIR := progs
+TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_EXTRA_FILES :=
+TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
+TRUNNER_BPF_CFLAGS :=
+TRUNNER_BPF_LDFLAGS :=
+$(eval $(call DEFINE_TEST_RUNNER,test_maps))
+
+# Define test_verifier test runner.
+# It is much simpler than test_maps/test_progs and sufficiently different from
+# them (e.g., test.h is using completely pattern), that it's worth just
+# explicitly defining all the rules explicitly.
+verifier/tests.h: verifier/*.c
$(shell ( cd verifier/; \
echo '/* Generated header, do not edit */'; \
echo '#ifdef FILL_ARRAY'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\(.*\)@#include \"\1\"@'; \
+ ls *.c 2> /dev/null | sed -e 's@\(.*\)@#include \"\1\"@'; \
echo '#endif' \
- ) > $(VERIFIER_TESTS_H))
-
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \
- $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \
- feature
+ ) > verifier/tests.h)
+$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+# Make sure we are able to include and link libbpf against c++.
+$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
+ $(call msg,CXX,,$@)
+ $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
+ prog_tests/tests.h map_tests/tests.h verifier/tests.h \
+ feature \
+ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
deleted file mode 100644
index 05f036df8a4c..000000000000
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_ENDIAN__
-#define __BPF_ENDIAN__
-
-#include <linux/stddef.h>
-#include <linux/swab.h>
-
-/* LLVM's BPF target selects the endianness of the CPU
- * it compiles on, or the user specifies (bpfel/bpfeb),
- * respectively. The used __BYTE_ORDER__ is defined by
- * the compiler, we cannot rely on __BYTE_ORDER from
- * libc headers, since it doesn't reflect the actual
- * requested byte order.
- *
- * Note, LLVM's BPF target has different __builtin_bswapX()
- * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
- * in bpfel and bpfeb case, which means below, that we map
- * to cpu_to_be16(). We could use it unconditionally in BPF
- * case, but better not rely on it, so that this header here
- * can be used from application and BPF program side, which
- * use different targets.
- */
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-# define __bpf_ntohs(x) __builtin_bswap16(x)
-# define __bpf_htons(x) __builtin_bswap16(x)
-# define __bpf_constant_ntohs(x) ___constant_swab16(x)
-# define __bpf_constant_htons(x) ___constant_swab16(x)
-# define __bpf_ntohl(x) __builtin_bswap32(x)
-# define __bpf_htonl(x) __builtin_bswap32(x)
-# define __bpf_constant_ntohl(x) ___constant_swab32(x)
-# define __bpf_constant_htonl(x) ___constant_swab32(x)
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-# define __bpf_ntohs(x) (x)
-# define __bpf_htons(x) (x)
-# define __bpf_constant_ntohs(x) (x)
-# define __bpf_constant_htons(x) (x)
-# define __bpf_ntohl(x) (x)
-# define __bpf_htonl(x) (x)
-# define __bpf_constant_ntohl(x) (x)
-# define __bpf_constant_htonl(x) (x)
-#else
-# error "Fix your compiler's __BYTE_ORDER__?!"
-#endif
-
-#define bpf_htons(x) \
- (__builtin_constant_p(x) ? \
- __bpf_constant_htons(x) : __bpf_htons(x))
-#define bpf_ntohs(x) \
- (__builtin_constant_p(x) ? \
- __bpf_constant_ntohs(x) : __bpf_ntohs(x))
-#define bpf_htonl(x) \
- (__builtin_constant_p(x) ? \
- __bpf_constant_htonl(x) : __bpf_htonl(x))
-#define bpf_ntohl(x) \
- (__builtin_constant_p(x) ? \
- __bpf_constant_ntohl(x) : __bpf_ntohl(x))
-
-#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
deleted file mode 100644
index f804f210244e..000000000000
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ /dev/null
@@ -1,504 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_HELPERS_H
-#define __BPF_HELPERS_H
-
-/* helper macro to place programs, maps, license in
- * different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
- */
-#define SEC(NAME) __attribute__((section(NAME), used))
-
-#define __uint(name, val) int (*name)[val]
-#define __type(name, val) val *name
-
-/* helper macro to print out debug messages */
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-/* helper functions called from eBPF programs written in C */
-static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
- (void *) BPF_FUNC_map_lookup_elem;
-static int (*bpf_map_update_elem)(void *map, const void *key, const void *value,
- unsigned long long flags) =
- (void *) BPF_FUNC_map_update_elem;
-static int (*bpf_map_delete_elem)(void *map, const void *key) =
- (void *) BPF_FUNC_map_delete_elem;
-static int (*bpf_map_push_elem)(void *map, const void *value,
- unsigned long long flags) =
- (void *) BPF_FUNC_map_push_elem;
-static int (*bpf_map_pop_elem)(void *map, void *value) =
- (void *) BPF_FUNC_map_pop_elem;
-static int (*bpf_map_peek_elem)(void *map, void *value) =
- (void *) BPF_FUNC_map_peek_elem;
-static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =
- (void *) BPF_FUNC_probe_read;
-static unsigned long long (*bpf_ktime_get_ns)(void) =
- (void *) BPF_FUNC_ktime_get_ns;
-static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
- (void *) BPF_FUNC_trace_printk;
-static void (*bpf_tail_call)(void *ctx, void *map, int index) =
- (void *) BPF_FUNC_tail_call;
-static unsigned long long (*bpf_get_smp_processor_id)(void) =
- (void *) BPF_FUNC_get_smp_processor_id;
-static unsigned long long (*bpf_get_current_pid_tgid)(void) =
- (void *) BPF_FUNC_get_current_pid_tgid;
-static unsigned long long (*bpf_get_current_uid_gid)(void) =
- (void *) BPF_FUNC_get_current_uid_gid;
-static int (*bpf_get_current_comm)(void *buf, int buf_size) =
- (void *) BPF_FUNC_get_current_comm;
-static unsigned long long (*bpf_perf_event_read)(void *map,
- unsigned long long flags) =
- (void *) BPF_FUNC_perf_event_read;
-static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
- (void *) BPF_FUNC_clone_redirect;
-static int (*bpf_redirect)(int ifindex, int flags) =
- (void *) BPF_FUNC_redirect;
-static int (*bpf_redirect_map)(void *map, int key, int flags) =
- (void *) BPF_FUNC_redirect_map;
-static int (*bpf_perf_event_output)(void *ctx, void *map,
- unsigned long long flags, void *data,
- int size) =
- (void *) BPF_FUNC_perf_event_output;
-static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
- (void *) BPF_FUNC_get_stackid;
-static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =
- (void *) BPF_FUNC_probe_write_user;
-static int (*bpf_current_task_under_cgroup)(void *map, int index) =
- (void *) BPF_FUNC_current_task_under_cgroup;
-static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
- (void *) BPF_FUNC_skb_get_tunnel_key;
-static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
- (void *) BPF_FUNC_skb_set_tunnel_key;
-static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
- (void *) BPF_FUNC_skb_get_tunnel_opt;
-static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
- (void *) BPF_FUNC_skb_set_tunnel_opt;
-static unsigned long long (*bpf_get_prandom_u32)(void) =
- (void *) BPF_FUNC_get_prandom_u32;
-static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
- (void *) BPF_FUNC_xdp_adjust_head;
-static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
- (void *) BPF_FUNC_xdp_adjust_meta;
-static int (*bpf_get_socket_cookie)(void *ctx) =
- (void *) BPF_FUNC_get_socket_cookie;
-static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
- int optlen) =
- (void *) BPF_FUNC_setsockopt;
-static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
- int optlen) =
- (void *) BPF_FUNC_getsockopt;
-static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
- (void *) BPF_FUNC_sock_ops_cb_flags_set;
-static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
- (void *) BPF_FUNC_sk_redirect_map;
-static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
- (void *) BPF_FUNC_sk_redirect_hash;
-static int (*bpf_sock_map_update)(void *map, void *key, void *value,
- unsigned long long flags) =
- (void *) BPF_FUNC_sock_map_update;
-static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
- unsigned long long flags) =
- (void *) BPF_FUNC_sock_hash_update;
-static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
- void *buf, unsigned int buf_size) =
- (void *) BPF_FUNC_perf_event_read_value;
-static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
- unsigned int buf_size) =
- (void *) BPF_FUNC_perf_prog_read_value;
-static int (*bpf_override_return)(void *ctx, unsigned long rc) =
- (void *) BPF_FUNC_override_return;
-static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
- (void *) BPF_FUNC_msg_redirect_map;
-static int (*bpf_msg_redirect_hash)(void *ctx,
- void *map, void *key, int flags) =
- (void *) BPF_FUNC_msg_redirect_hash;
-static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
- (void *) BPF_FUNC_msg_apply_bytes;
-static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
- (void *) BPF_FUNC_msg_cork_bytes;
-static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
- (void *) BPF_FUNC_msg_pull_data;
-static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
- (void *) BPF_FUNC_msg_push_data;
-static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) =
- (void *) BPF_FUNC_msg_pop_data;
-static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
- (void *) BPF_FUNC_bind;
-static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
- (void *) BPF_FUNC_xdp_adjust_tail;
-static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
- int size, int flags) =
- (void *) BPF_FUNC_skb_get_xfrm_state;
-static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
- (void *) BPF_FUNC_sk_select_reuseport;
-static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
- (void *) BPF_FUNC_get_stack;
-static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
- int plen, __u32 flags) =
- (void *) BPF_FUNC_fib_lookup;
-static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
- unsigned int len) =
- (void *) BPF_FUNC_lwt_push_encap;
-static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
- void *from, unsigned int len) =
- (void *) BPF_FUNC_lwt_seg6_store_bytes;
-static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
- unsigned int param_len) =
- (void *) BPF_FUNC_lwt_seg6_action;
-static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
- unsigned int len) =
- (void *) BPF_FUNC_lwt_seg6_adjust_srh;
-static int (*bpf_rc_repeat)(void *ctx) =
- (void *) BPF_FUNC_rc_repeat;
-static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
- unsigned long long scancode, unsigned int toggle) =
- (void *) BPF_FUNC_rc_keydown;
-static unsigned long long (*bpf_get_current_cgroup_id)(void) =
- (void *) BPF_FUNC_get_current_cgroup_id;
-static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
- (void *) BPF_FUNC_get_local_storage;
-static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
- (void *) BPF_FUNC_skb_cgroup_id;
-static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
- (void *) BPF_FUNC_skb_ancestor_cgroup_id;
-static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
- struct bpf_sock_tuple *tuple,
- int size, unsigned long long netns_id,
- unsigned long long flags) =
- (void *) BPF_FUNC_sk_lookup_tcp;
-static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
- struct bpf_sock_tuple *tuple,
- int size, unsigned long long netns_id,
- unsigned long long flags) =
- (void *) BPF_FUNC_skc_lookup_tcp;
-static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
- struct bpf_sock_tuple *tuple,
- int size, unsigned long long netns_id,
- unsigned long long flags) =
- (void *) BPF_FUNC_sk_lookup_udp;
-static int (*bpf_sk_release)(struct bpf_sock *sk) =
- (void *) BPF_FUNC_sk_release;
-static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
- (void *) BPF_FUNC_skb_vlan_push;
-static int (*bpf_skb_vlan_pop)(void *ctx) =
- (void *) BPF_FUNC_skb_vlan_pop;
-static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) =
- (void *) BPF_FUNC_rc_pointer_rel;
-static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
- (void *) BPF_FUNC_spin_lock;
-static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
- (void *) BPF_FUNC_spin_unlock;
-static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
- (void *) BPF_FUNC_sk_fullsock;
-static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
- (void *) BPF_FUNC_tcp_sock;
-static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
- (void *) BPF_FUNC_get_listener_sock;
-static int (*bpf_skb_ecn_set_ce)(void *ctx) =
- (void *) BPF_FUNC_skb_ecn_set_ce;
-static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
- void *ip, int ip_len, void *tcp, int tcp_len) =
- (void *) BPF_FUNC_tcp_check_syncookie;
-static int (*bpf_sysctl_get_name)(void *ctx, char *buf,
- unsigned long long buf_len,
- unsigned long long flags) =
- (void *) BPF_FUNC_sysctl_get_name;
-static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf,
- unsigned long long buf_len) =
- (void *) BPF_FUNC_sysctl_get_current_value;
-static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf,
- unsigned long long buf_len) =
- (void *) BPF_FUNC_sysctl_get_new_value;
-static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf,
- unsigned long long buf_len) =
- (void *) BPF_FUNC_sysctl_set_new_value;
-static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
- unsigned long long flags, long *res) =
- (void *) BPF_FUNC_strtol;
-static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
- unsigned long long flags, unsigned long *res) =
- (void *) BPF_FUNC_strtoul;
-static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
- void *value, __u64 flags) =
- (void *) BPF_FUNC_sk_storage_get;
-static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
- (void *)BPF_FUNC_sk_storage_delete;
-static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
-
-/* llvm builtin functions that eBPF C program may use to
- * emit BPF_LD_ABS and BPF_LD_IND instructions
- */
-struct sk_buff;
-unsigned long long load_byte(void *skb,
- unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
- unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
- unsigned long long off) asm("llvm.bpf.load.word");
-
-/* a helper structure used by eBPF C program
- * to describe map attributes to elf_bpf loader
- */
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
- struct ____btf_map_##name { \
- type_key key; \
- type_val value; \
- }; \
- struct ____btf_map_##name \
- __attribute__ ((section(".maps." #name), used)) \
- ____btf_map_##name = { }
-
-static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
- (void *) BPF_FUNC_skb_load_bytes;
-static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
- (void *) BPF_FUNC_skb_load_bytes_relative;
-static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
- (void *) BPF_FUNC_skb_store_bytes;
-static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
- (void *) BPF_FUNC_l3_csum_replace;
-static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
- (void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
- (void *) BPF_FUNC_csum_diff;
-static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
- (void *) BPF_FUNC_skb_under_cgroup;
-static int (*bpf_skb_change_head)(void *, int len, int flags) =
- (void *) BPF_FUNC_skb_change_head;
-static int (*bpf_skb_pull_data)(void *, int len) =
- (void *) BPF_FUNC_skb_pull_data;
-static unsigned int (*bpf_get_cgroup_classid)(void *ctx) =
- (void *) BPF_FUNC_get_cgroup_classid;
-static unsigned int (*bpf_get_route_realm)(void *ctx) =
- (void *) BPF_FUNC_get_route_realm;
-static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) =
- (void *) BPF_FUNC_skb_change_proto;
-static int (*bpf_skb_change_type)(void *ctx, __u32 type) =
- (void *) BPF_FUNC_skb_change_type;
-static unsigned int (*bpf_get_hash_recalc)(void *ctx) =
- (void *) BPF_FUNC_get_hash_recalc;
-static unsigned long long (*bpf_get_current_task)(void) =
- (void *) BPF_FUNC_get_current_task;
-static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) =
- (void *) BPF_FUNC_skb_change_tail;
-static long long (*bpf_csum_update)(void *ctx, __u32 csum) =
- (void *) BPF_FUNC_csum_update;
-static void (*bpf_set_hash_invalid)(void *ctx) =
- (void *) BPF_FUNC_set_hash_invalid;
-static int (*bpf_get_numa_node_id)(void) =
- (void *) BPF_FUNC_get_numa_node_id;
-static int (*bpf_probe_read_str)(void *ctx, __u32 size,
- const void *unsafe_ptr) =
- (void *) BPF_FUNC_probe_read_str;
-static unsigned int (*bpf_get_socket_uid)(void *ctx) =
- (void *) BPF_FUNC_get_socket_uid;
-static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) =
- (void *) BPF_FUNC_set_hash;
-static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
- unsigned long long flags) =
- (void *) BPF_FUNC_skb_adjust_room;
-
-/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
-#if defined(__TARGET_ARCH_x86)
- #define bpf_target_x86
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_s390)
- #define bpf_target_s390
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_arm)
- #define bpf_target_arm
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_arm64)
- #define bpf_target_arm64
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_mips)
- #define bpf_target_mips
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_powerpc)
- #define bpf_target_powerpc
- #define bpf_target_defined
-#elif defined(__TARGET_ARCH_sparc)
- #define bpf_target_sparc
- #define bpf_target_defined
-#else
- #undef bpf_target_defined
-#endif
-
-/* Fall back to what the compiler says */
-#ifndef bpf_target_defined
-#if defined(__x86_64__)
- #define bpf_target_x86
-#elif defined(__s390__)
- #define bpf_target_s390
-#elif defined(__arm__)
- #define bpf_target_arm
-#elif defined(__aarch64__)
- #define bpf_target_arm64
-#elif defined(__mips__)
- #define bpf_target_mips
-#elif defined(__powerpc__)
- #define bpf_target_powerpc
-#elif defined(__sparc__)
- #define bpf_target_sparc
-#endif
-#endif
-
-#if defined(bpf_target_x86)
-
-#ifdef __KERNEL__
-#define PT_REGS_PARM1(x) ((x)->di)
-#define PT_REGS_PARM2(x) ((x)->si)
-#define PT_REGS_PARM3(x) ((x)->dx)
-#define PT_REGS_PARM4(x) ((x)->cx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->sp)
-#define PT_REGS_FP(x) ((x)->bp)
-#define PT_REGS_RC(x) ((x)->ax)
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->ip)
-#else
-#ifdef __i386__
-/* i386 kernel is built with -mregparm=3 */
-#define PT_REGS_PARM1(x) ((x)->eax)
-#define PT_REGS_PARM2(x) ((x)->edx)
-#define PT_REGS_PARM3(x) ((x)->ecx)
-#define PT_REGS_PARM4(x) 0
-#define PT_REGS_PARM5(x) 0
-#define PT_REGS_RET(x) ((x)->esp)
-#define PT_REGS_FP(x) ((x)->ebp)
-#define PT_REGS_RC(x) ((x)->eax)
-#define PT_REGS_SP(x) ((x)->esp)
-#define PT_REGS_IP(x) ((x)->eip)
-#else
-#define PT_REGS_PARM1(x) ((x)->rdi)
-#define PT_REGS_PARM2(x) ((x)->rsi)
-#define PT_REGS_PARM3(x) ((x)->rdx)
-#define PT_REGS_PARM4(x) ((x)->rcx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->rsp)
-#define PT_REGS_FP(x) ((x)->rbp)
-#define PT_REGS_RC(x) ((x)->rax)
-#define PT_REGS_SP(x) ((x)->rsp)
-#define PT_REGS_IP(x) ((x)->rip)
-#endif
-#endif
-
-#elif defined(bpf_target_s390)
-
-/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_S390 const volatile user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
-
-#elif defined(bpf_target_arm)
-
-#define PT_REGS_PARM1(x) ((x)->uregs[0])
-#define PT_REGS_PARM2(x) ((x)->uregs[1])
-#define PT_REGS_PARM3(x) ((x)->uregs[2])
-#define PT_REGS_PARM4(x) ((x)->uregs[3])
-#define PT_REGS_PARM5(x) ((x)->uregs[4])
-#define PT_REGS_RET(x) ((x)->uregs[14])
-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->uregs[0])
-#define PT_REGS_SP(x) ((x)->uregs[13])
-#define PT_REGS_IP(x) ((x)->uregs[12])
-
-#elif defined(bpf_target_arm64)
-
-/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_ARM64 const volatile struct user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
-
-#elif defined(bpf_target_mips)
-
-#define PT_REGS_PARM1(x) ((x)->regs[4])
-#define PT_REGS_PARM2(x) ((x)->regs[5])
-#define PT_REGS_PARM3(x) ((x)->regs[6])
-#define PT_REGS_PARM4(x) ((x)->regs[7])
-#define PT_REGS_PARM5(x) ((x)->regs[8])
-#define PT_REGS_RET(x) ((x)->regs[31])
-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
-#define PT_REGS_SP(x) ((x)->regs[29])
-#define PT_REGS_IP(x) ((x)->cp0_epc)
-
-#elif defined(bpf_target_powerpc)
-
-#define PT_REGS_PARM1(x) ((x)->gpr[3])
-#define PT_REGS_PARM2(x) ((x)->gpr[4])
-#define PT_REGS_PARM3(x) ((x)->gpr[5])
-#define PT_REGS_PARM4(x) ((x)->gpr[6])
-#define PT_REGS_PARM5(x) ((x)->gpr[7])
-#define PT_REGS_RC(x) ((x)->gpr[3])
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->nip)
-
-#elif defined(bpf_target_sparc)
-
-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
-
-/* Should this also be a bpf_target check for the sparc case? */
-#if defined(__arch64__)
-#define PT_REGS_IP(x) ((x)->tpc)
-#else
-#define PT_REGS_IP(x) ((x)->pc)
-#endif
-
-#endif
-
-#if defined(bpf_target_powerpc)
-#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
-#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#elif defined(bpf_target_sparc)
-#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
-#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#else
-#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \
- bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
-#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \
- bpf_probe_read(&(ip), sizeof(ip), \
- (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
-#endif
-
-#endif
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
new file mode 100644
index 000000000000..6f8988738bc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_LEGACY__
+#define __BPF_LEGACY__
+
+/*
+ * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
+ * use outside of samples/bpf
+ */
+struct bpf_map_def_legacy {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int inner_map_idx;
+ unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
+ struct ____btf_map_##name { \
+ type_key key; \
+ type_val value; \
+ }; \
+ struct ____btf_map_##name \
+ __attribute__ ((section(".maps." #name), used)) \
+ ____btf_map_##name = { }
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+#endif
+
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
new file mode 100644
index 000000000000..8f21965ffc6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TCP_HELPERS_H
+#define __BPF_TCP_HELPERS_H
+
+#include <stdbool.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_trace_helpers.h"
+
+#define BPF_STRUCT_OPS(name, args...) \
+SEC("struct_ops/"#name) \
+BPF_PROG(name, args)
+
+#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+
+struct sock_common {
+ unsigned char skc_state;
+} __attribute__((preserve_access_index));
+
+enum sk_pacing {
+ SK_PACING_NONE = 0,
+ SK_PACING_NEEDED = 1,
+ SK_PACING_FQ = 2,
+};
+
+struct sock {
+ struct sock_common __sk_common;
+ unsigned long sk_pacing_rate;
+ __u32 sk_pacing_status; /* see enum sk_pacing */
+} __attribute__((preserve_access_index));
+
+struct inet_sock {
+ struct sock sk;
+} __attribute__((preserve_access_index));
+
+struct inet_connection_sock {
+ struct inet_sock icsk_inet;
+ __u8 icsk_ca_state:6,
+ icsk_ca_setsockopt:1,
+ icsk_ca_dst_locked:1;
+ struct {
+ __u8 pending;
+ } icsk_ack;
+ __u64 icsk_ca_priv[104 / sizeof(__u64)];
+} __attribute__((preserve_access_index));
+
+struct tcp_sock {
+ struct inet_connection_sock inet_conn;
+
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u8 ecn_flags;
+ __u32 delivered;
+ __u32 delivered_ce;
+ __u32 snd_cwnd;
+ __u32 snd_cwnd_cnt;
+ __u32 snd_cwnd_clamp;
+ __u32 snd_ssthresh;
+ __u8 syn_data:1, /* SYN includes data */
+ syn_fastopen:1, /* SYN includes Fast Open option */
+ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
+ syn_fastopen_ch:1, /* Active TFO re-enabling probe */
+ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+ save_syn:1, /* Save headers of SYN packet */
+ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
+ syn_smc:1; /* SYN includes SMC */
+ __u32 max_packets_out;
+ __u32 lsndtime;
+ __u32 prior_cwnd;
+ __u64 tcp_mstamp; /* most recent packet received/sent */
+} __attribute__((preserve_access_index));
+
+static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+ return (struct inet_connection_sock *)sk;
+}
+
+static __always_inline void *inet_csk_ca(const struct sock *sk)
+{
+ return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+static __always_inline bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
+#define after(seq2, seq1) before(seq1, seq2)
+
+#define TCP_ECN_OK 1
+#define TCP_ECN_QUEUE_CWR 2
+#define TCP_ECN_DEMAND_CWR 4
+#define TCP_ECN_SEEN 8
+
+enum inet_csk_ack_state_t {
+ ICSK_ACK_SCHED = 1,
+ ICSK_ACK_TIMER = 2,
+ ICSK_ACK_PUSHED = 4,
+ ICSK_ACK_PUSHED2 = 8,
+ ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
+};
+
+enum tcp_ca_event {
+ CA_EVENT_TX_START = 0,
+ CA_EVENT_CWND_RESTART = 1,
+ CA_EVENT_COMPLETE_CWR = 2,
+ CA_EVENT_LOSS = 3,
+ CA_EVENT_ECN_NO_CE = 4,
+ CA_EVENT_ECN_IS_CE = 5,
+};
+
+enum tcp_ca_state {
+ TCP_CA_Open = 0,
+ TCP_CA_Disorder = 1,
+ TCP_CA_CWR = 2,
+ TCP_CA_Recovery = 3,
+ TCP_CA_Loss = 4
+};
+
+struct ack_sample {
+ __u32 pkts_acked;
+ __s32 rtt_us;
+ __u32 in_flight;
+} __attribute__((preserve_access_index));
+
+struct rate_sample {
+ __u64 prior_mstamp; /* starting timestamp for interval */
+ __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
+ __s32 delivered; /* number of packets delivered over interval */
+ long interval_us; /* time for tp->delivered to incr "delivered" */
+ __u32 snd_interval_us; /* snd interval for delivered packets */
+ __u32 rcv_interval_us; /* rcv interval for delivered packets */
+ long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
+ int losses; /* number of packets marked lost upon ACK */
+ __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
+ __u32 prior_in_flight; /* in flight before this ACK */
+ bool is_app_limited; /* is sample from packet with bubble in pipe? */
+ bool is_retrans; /* is sample from retransmission? */
+ bool is_ack_delayed; /* is this (likely) a delayed ACK? */
+} __attribute__((preserve_access_index));
+
+#define TCP_CA_NAME_MAX 16
+#define TCP_CONG_NEEDS_ECN 0x2
+
+struct tcp_congestion_ops {
+ char name[TCP_CA_NAME_MAX];
+ __u32 flags;
+
+ /* initialize private data (optional) */
+ void (*init)(struct sock *sk);
+ /* cleanup private data (optional) */
+ void (*release)(struct sock *sk);
+
+ /* return slow start threshold (required) */
+ __u32 (*ssthresh)(struct sock *sk);
+ /* do new cwnd calculation (required) */
+ void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
+ /* call before changing ca_state (optional) */
+ void (*set_state)(struct sock *sk, __u8 new_state);
+ /* call when cwnd event occurs (optional) */
+ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+ /* call when ack arrives (optional) */
+ void (*in_ack_event)(struct sock *sk, __u32 flags);
+ /* new value of cwnd after loss (required) */
+ __u32 (*undo_cwnd)(struct sock *sk);
+ /* hook for packet ack accounting (optional) */
+ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+ /* override sysctl_tcp_min_tso_segs */
+ __u32 (*min_tso_segs)(struct sock *sk);
+ /* returns the multiplier used in tcp_sndbuf_expand (optional) */
+ __u32 (*sndbuf_expand)(struct sock *sk);
+ /* call when packets are delivered to update cwnd and pacing rate,
+ * after all the ca_state processing. (optional)
+ */
+ void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+};
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
+{
+ __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+
+ acked -= cwnd - tp->snd_cwnd;
+ tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+
+ return acked;
+}
+
+static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
+static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tcp_in_slow_start(tp))
+ return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+ return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
+}
+
+static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
+{
+ /* If credits accumulated at a higher w, apply them gently now. */
+ if (tp->snd_cwnd_cnt >= w) {
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd++;
+ }
+
+ tp->snd_cwnd_cnt += acked;
+ if (tp->snd_cwnd_cnt >= w) {
+ __u32 delta = tp->snd_cwnd_cnt / w;
+
+ tp->snd_cwnd_cnt -= delta * w;
+ tp->snd_cwnd += delta;
+ }
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h
new file mode 100644
index 000000000000..c6f1354d93fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_trace_helpers.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACE_HELPERS_H
+#define __BPF_TRACE_HELPERS_H
+
+#include <bpf/bpf_helpers.h>
+
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+ ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
+ */
+#define BPF_PROG(name, args...) \
+name(unsigned long long *ctx); \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args); \
+typeof(name(0)) name(unsigned long long *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_ctx_cast(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+ ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+ ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+ ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+ ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+ ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+ ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_argsN(x, args...) \
+ ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
+#define ___bpf_kretprobe_args(args...) \
+ ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
+ * input kprobe arguments, one last extra argument has to be specified, which
+ * captures kprobe return value.
+ */
+#define BPF_KRETPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kretprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index ec219f84e041..a3352a64c067 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,7 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
-#include <libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
static inline unsigned int bpf_num_possible_cpus(void)
{
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e95c33e333a4..0fb910df5387 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -41,7 +41,7 @@
*
* If successful, 0 is returned.
*/
-int enable_all_controllers(char *cgroup_path)
+static int enable_all_controllers(char *cgroup_path)
{
char path[PATH_MAX + 1];
char buf[PATH_MAX];
@@ -98,7 +98,7 @@ int enable_all_controllers(char *cgroup_path)
*/
int setup_cgroup_environment(void)
{
- char cgroup_workdir[PATH_MAX + 1];
+ char cgroup_workdir[PATH_MAX - 24];
format_cgroup_path(cgroup_workdir, "");
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f7a0744db31e..5dc109f4c097 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -34,3 +34,4 @@ CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_IPV6_SIT=m
+CONFIG_BPF_JIT=y
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
new file mode 100644
index 000000000000..f0a64d8ac59a
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
+ int *values)
+{
+ int i, err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i] = i;
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ int *keys, int *values)
+{
+ int i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ CHECK(keys[i] + 1 != values[i], "key/value checking",
+ "error: i %d key %d value %d\n", i, keys[i], values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_array_map_batch_ops(void)
+{
+ struct bpf_create_map_attr xattr = {
+ .name = "array_map",
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ };
+ int map_fd, *keys, *values, *visited;
+ __u32 count, total, total_success;
+ const __u32 max_entries = 10;
+ bool nospace_err;
+ __u64 batch = 0;
+ int err, step;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ xattr.max_entries = max_entries;
+ map_fd = bpf_create_map_xattr(&xattr);
+ CHECK(map_fd == -1,
+ "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+
+ keys = malloc(max_entries * sizeof(int));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ /* populate elements to the map */
+ map_batch_update(map_fd, max_entries, keys, values);
+
+ /* test 1: lookup in a loop with various steps. */
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ nospace_err = false;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total,
+ values + total,
+ &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+
+ }
+
+ if (nospace_err == true)
+ continue;
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
new file mode 100644
index 000000000000..976bf415fbdd
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
+ void *values, bool is_pcpu)
+{
+ typedef BPF_DECLARE_PERCPU(int, value);
+ value *v = NULL;
+ int i, j, err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ if (is_pcpu)
+ v = (value *)values;
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i] = i + 1;
+ if (is_pcpu)
+ for (j = 0; j < bpf_num_possible_cpus(); j++)
+ bpf_percpu(v[i], j) = i + 2 + j;
+ else
+ ((int *)values)[i] = i + 2;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ int *keys, void *values, bool is_pcpu)
+{
+ typedef BPF_DECLARE_PERCPU(int, value);
+ value *v = NULL;
+ int i, j;
+
+ if (is_pcpu)
+ v = (value *)values;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+
+ if (is_pcpu) {
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j),
+ "key/value checking",
+ "error: i %d j %d key %d value %d\n",
+ i, j, keys[i], bpf_percpu(v[i], j));
+ }
+ } else {
+ CHECK(keys[i] + 1 != ((int *)values)[i],
+ "key/value checking",
+ "error: i %d key %d value %d\n", i, keys[i],
+ ((int *)values)[i]);
+ }
+
+ visited[i] = 1;
+
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void __test_map_lookup_and_delete_batch(bool is_pcpu)
+{
+ __u32 batch, count, total, total_success;
+ typedef BPF_DECLARE_PERCPU(int, value);
+ int map_fd, *keys, *visited, key;
+ const __u32 max_entries = 10;
+ value pcpu_values[max_entries];
+ int err, step, value_size;
+ bool nospace_err;
+ void *values;
+ struct bpf_create_map_attr xattr = {
+ .name = "hash_map",
+ .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
+ BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ xattr.max_entries = max_entries;
+ map_fd = bpf_create_map_xattr(&xattr);
+ CHECK(map_fd == -1,
+ "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+
+ value_size = is_pcpu ? sizeof(value) : sizeof(int);
+ keys = malloc(max_entries * sizeof(int));
+ if (is_pcpu)
+ values = pcpu_values;
+ else
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()",
+ "error:%s\n", strerror(errno));
+
+ /* test 1: lookup/delete an empty hash table, -ENOENT */
+ count = max_entries;
+ err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+ values, &count, &opts);
+ CHECK((err && errno != ENOENT), "empty map",
+ "error: %s\n", strerror(errno));
+
+ /* populate elements to the map */
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+
+ /* test 2: lookup/delete with count = 0, success */
+ count = 0;
+ err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+ values, &count, &opts);
+ CHECK(err, "count = 0", "error: %s\n", strerror(errno));
+
+ /* test 3: lookup/delete with count = max_entries, success */
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * value_size);
+ count = max_entries;
+ err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
+ values, &count, &opts);
+ CHECK((err && errno != ENOENT), "count = max_entries",
+ "error: %s\n", strerror(errno));
+ CHECK(count != max_entries, "count = max_entries",
+ "count = %u, max_entries = %u\n", count, max_entries);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+
+ /* bpf_map_get_next_key() should return -ENOENT for an empty map. */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno));
+
+ /* test 4: lookup/delete in a loop with various steps. */
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * value_size);
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each
+ */
+ count = step;
+ nospace_err = false;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL,
+ &batch, keys + total,
+ values +
+ total * value_size,
+ &count, &opts);
+ /* It is possible that we are failing due to buffer size
+ * not big enough. In such cases, let us just exit and
+ * go with large steps. Not that a buffer size with
+ * max_entries should always work.
+ */
+ if (err && errno == ENOSPC) {
+ nospace_err = true;
+ break;
+ }
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+
+ }
+ if (nospace_err == true)
+ continue;
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd,
+ keys + total,
+ &count, &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errono == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ /* iteratively lookup/delete elements with 'step'
+ * elements each
+ */
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * value_size);
+ total = 0;
+ count = step;
+ nospace_err = false;
+ while (true) {
+ err = bpf_map_lookup_and_delete_batch(map_fd,
+ total ? &batch : NULL,
+ &batch, keys + total,
+ values +
+ total * value_size,
+ &count, &opts);
+ /* It is possible that we are failing due to buffer size
+ * not big enough. In such cases, let us just exit and
+ * go with large steps. Not that a buffer size with
+ * max_entries should always work.
+ */
+ if (err && errno == ENOSPC) {
+ nospace_err = true;
+ break;
+ }
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ if (nospace_err == true)
+ continue;
+
+ CHECK(total != max_entries, "lookup/delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err, "bpf_map_get_next_key()", "error: %s\n",
+ strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+ free(keys);
+ free(visited);
+ if (!is_pcpu)
+ free(values);
+}
+
+void htab_map_batch_ops(void)
+{
+ __test_map_lookup_and_delete_batch(false);
+ printf("test_%s:PASS\n", __func__);
+}
+
+void htab_percpu_map_batch_ops(void)
+{
+ __test_map_lookup_and_delete_batch(true);
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_htab_map_batch_ops(void)
+{
+ htab_map_batch_ops();
+ htab_percpu_map_batch_ops();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 5ecc267d98b0..a0ee87c8e1ea 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_attach_probe.skel.h"
ssize_t get_base_addr() {
- size_t start;
+ size_t start, offset;
char buf[256];
FILE *f;
@@ -10,10 +11,11 @@ ssize_t get_base_addr() {
if (!f)
return -errno;
- while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
if (strcmp(buf, "r-xp") == 0) {
fclose(f);
- return start;
+ return start - offset;
}
}
@@ -23,22 +25,10 @@ ssize_t get_base_addr() {
void test_attach_probe(void)
{
- const char *kprobe_name = "kprobe/sys_nanosleep";
- const char *kretprobe_name = "kretprobe/sys_nanosleep";
- const char *uprobe_name = "uprobe/trigger_func";
- const char *uretprobe_name = "uretprobe/trigger_func";
- const int kprobe_idx = 0, kretprobe_idx = 1;
- const int uprobe_idx = 2, uretprobe_idx = 3;
- const char *file = "./test_attach_probe.o";
- struct bpf_program *kprobe_prog, *kretprobe_prog;
- struct bpf_program *uprobe_prog, *uretprobe_prog;
- struct bpf_object *obj;
- int err, prog_fd, duration = 0, res;
- struct bpf_link *kprobe_link = NULL;
- struct bpf_link *kretprobe_link = NULL;
- struct bpf_link *uprobe_link = NULL;
- struct bpf_link *uretprobe_link = NULL;
- int results_map_fd;
+ int duration = 0;
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe* skel;
size_t uprobe_offset;
ssize_t base_addr;
@@ -48,113 +38,68 @@ void test_attach_probe(void)
return;
uprobe_offset = (size_t)&get_base_addr - base_addr;
- /* load programs */
- err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+ skel = test_attach_probe__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
-
- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
- if (CHECK(!kprobe_prog, "find_probe",
- "prog '%s' not found\n", kprobe_name))
- goto cleanup;
- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
- if (CHECK(!kretprobe_prog, "find_probe",
- "prog '%s' not found\n", kretprobe_name))
- goto cleanup;
- uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name);
- if (CHECK(!uprobe_prog, "find_probe",
- "prog '%s' not found\n", uprobe_name))
- goto cleanup;
- uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name);
- if (CHECK(!uretprobe_prog, "find_probe",
- "prog '%s' not found\n", uretprobe_name))
- goto cleanup;
-
- /* load maps */
- results_map_fd = bpf_find_map(__func__, obj, "results_map");
- if (CHECK(results_map_fd < 0, "find_results_map",
- "err %d\n", results_map_fd))
+ if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
goto cleanup;
- kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
+ kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
+ "err %ld\n", PTR_ERR(kprobe_link)))
goto cleanup;
- }
- kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
+ skel->links.handle_kprobe = kprobe_link;
+
+ kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link))) {
- kretprobe_link = NULL;
+ "err %ld\n", PTR_ERR(kretprobe_link)))
goto cleanup;
- }
- uprobe_link = bpf_program__attach_uprobe(uprobe_prog,
+ skel->links.handle_kretprobe = kretprobe_link;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
false /* retprobe */,
0 /* self pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link))) {
- uprobe_link = NULL;
+ "err %ld\n", PTR_ERR(uprobe_link)))
goto cleanup;
- }
- uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog,
+ skel->links.handle_uprobe = uprobe_link;
+
+ uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
true /* retprobe */,
-1 /* any pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link))) {
- uretprobe_link = NULL;
+ "err %ld\n", PTR_ERR(uretprobe_link)))
goto cleanup;
- }
+ skel->links.handle_uretprobe = uretprobe_link;
/* trigger & validate kprobe && kretprobe */
usleep(1);
- err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
- if (CHECK(err, "get_kprobe_res",
- "failed to get kprobe res: %d\n", err))
+ if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
+ "wrong kprobe res: %d\n", skel->bss->kprobe_res))
goto cleanup;
- if (CHECK(res != kprobe_idx + 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", res))
- goto cleanup;
-
- err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res);
- if (CHECK(err, "get_kretprobe_res",
- "failed to get kretprobe res: %d\n", err))
- goto cleanup;
- if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", res))
+ if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
+ "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
goto cleanup;
/* trigger & validate uprobe & uretprobe */
get_base_addr();
- err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
- if (CHECK(err, "get_uprobe_res",
- "failed to get uprobe res: %d\n", err))
- goto cleanup;
- if (CHECK(res != uprobe_idx + 1, "check_uprobe_res",
- "wrong uprobe res: %d\n", res))
- goto cleanup;
-
- err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res);
- if (CHECK(err, "get_uretprobe_res",
- "failed to get uretprobe res: %d\n", err))
+ if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
+ "wrong uprobe res: %d\n", skel->bss->uprobe_res))
goto cleanup;
- if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", res))
+ if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
+ "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
goto cleanup;
cleanup:
- bpf_link__destroy(kprobe_link);
- bpf_link__destroy(kretprobe_link);
- bpf_link__destroy(uprobe_link);
- bpf_link__destroy(uretprobe_link);
- bpf_object__close(obj);
+ test_attach_probe__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index cb827383db4d..f10029821e16 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -48,16 +48,17 @@ void test_bpf_obj_id(void)
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (err)
- error_cnt++;
- assert(!err);
+ if (CHECK_FAIL(err))
+ continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- assert(map_fds[i] >= 0);
+ if (CHECK_FAIL(map_fds[i] < 0))
+ goto done;
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
@@ -96,9 +97,11 @@ void test_bpf_obj_id(void)
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
@@ -106,8 +109,8 @@ void test_bpf_obj_id(void)
if (CHECK(err ||
prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
info_len != sizeof(struct bpf_prog_info) ||
- (jit_enabled && !prog_infos[i].jited_prog_len) ||
- (jit_enabled &&
+ (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
+ (env.jit_enabled &&
!memcmp(jited_insns, zeros, sizeof(zeros))) ||
!prog_infos[i].xlated_prog_len ||
!memcmp(xlated_insns, zeros, sizeof(zeros)) ||
@@ -121,7 +124,7 @@ void test_bpf_obj_id(void)
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
- jit_enabled,
+ env.jit_enabled,
prog_infos[i].jited_prog_len,
prog_infos[i].xlated_prog_len,
!!memcmp(jited_insns, zeros, sizeof(zeros)),
@@ -224,7 +227,8 @@ void test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
CHECK(err || info_len != sizeof(struct bpf_map_info) ||
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
new file mode 100644
index 000000000000..8482bbc67eec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/err.h>
+#include <test_progs.h>
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+static const struct timeval timeo_sec = { .tv_sec = 10 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+static int stop, duration;
+
+static int settimeo(int fd)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+ timeo_optlen);
+ if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
+ errno))
+ return -1;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
+ timeo_optlen);
+ if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
+ errno))
+ return -1;
+
+ return 0;
+}
+
+static int settcpca(int fd, const char *tcp_ca)
+{
+ int err;
+
+ err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
+ if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
+ errno))
+ return -1;
+
+ return 0;
+}
+
+static void *server(void *arg)
+{
+ int lfd = (int)(long)arg, err = 0, fd;
+ ssize_t nr_sent = 0, bytes = 0;
+ char batch[1500];
+
+ fd = accept(lfd, NULL, NULL);
+ while (fd == -1) {
+ if (errno == EINTR)
+ continue;
+ err = -errno;
+ goto done;
+ }
+
+ if (settimeo(fd)) {
+ err = -errno;
+ goto done;
+ }
+
+ while (bytes < total_bytes && !READ_ONCE(stop)) {
+ nr_sent = send(fd, &batch,
+ min(total_bytes - bytes, sizeof(batch)), 0);
+ if (nr_sent == -1 && errno == EINTR)
+ continue;
+ if (nr_sent == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_sent;
+ }
+
+ CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
+ bytes, total_bytes, nr_sent, errno);
+
+done:
+ if (fd != -1)
+ close(fd);
+ if (err) {
+ WRITE_ONCE(stop, 1);
+ return ERR_PTR(err);
+ }
+ return NULL;
+}
+
+static void do_test(const char *tcp_ca)
+{
+ struct sockaddr_in6 sa6 = {};
+ ssize_t nr_recv = 0, bytes = 0;
+ int lfd = -1, fd = -1;
+ pthread_t srv_thread;
+ socklen_t addrlen = sizeof(sa6);
+ void *thread_ret;
+ char batch[1500];
+ int err;
+
+ WRITE_ONCE(stop, 0);
+
+ lfd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+ return;
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+ close(lfd);
+ return;
+ }
+
+ if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
+ settimeo(lfd) || settimeo(fd))
+ goto done;
+
+ /* bind, listen and start server thread to accept */
+ sa6.sin6_family = AF_INET6;
+ sa6.sin6_addr = in6addr_loopback;
+ err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
+ if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+ goto done;
+ err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
+ if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+ goto done;
+ err = listen(lfd, 1);
+ if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+ goto done;
+ err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+ if (CHECK(err != 0, "pthread_create", "err:%d\n", err))
+ goto done;
+
+ /* connect to server */
+ err = connect(fd, (struct sockaddr *)&sa6, addrlen);
+ if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+ goto wait_thread;
+
+ /* recv total_bytes */
+ while (bytes < total_bytes && !READ_ONCE(stop)) {
+ nr_recv = recv(fd, &batch,
+ min(total_bytes - bytes, sizeof(batch)), 0);
+ if (nr_recv == -1 && errno == EINTR)
+ continue;
+ if (nr_recv == -1)
+ break;
+ bytes += nr_recv;
+ }
+
+ CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
+ bytes, total_bytes, nr_recv, errno);
+
+wait_thread:
+ WRITE_ONCE(stop, 1);
+ pthread_join(srv_thread, &thread_ret);
+ CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
+ PTR_ERR(thread_ret));
+done:
+ close(lfd);
+ close(fd);
+}
+
+static void test_cubic(void)
+{
+ struct bpf_cubic *cubic_skel;
+ struct bpf_link *link;
+
+ cubic_skel = bpf_cubic__open_and_load();
+ if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+ return;
+
+ link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
+ PTR_ERR(link))) {
+ bpf_cubic__destroy(cubic_skel);
+ return;
+ }
+
+ do_test("bpf_cubic");
+
+ bpf_link__destroy(link);
+ bpf_cubic__destroy(cubic_skel);
+}
+
+static void test_dctcp(void)
+{
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link;
+
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+ return;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
+ PTR_ERR(link))) {
+ bpf_dctcp__destroy(dctcp_skel);
+ return;
+ }
+
+ do_test("bpf_dctcp");
+
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+}
+
+void test_bpf_tcp_ca(void)
+{
+ if (test__start_subtest("dctcp"))
+ test_dctcp();
+ if (test__start_subtest("cubic"))
+ test_cubic();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e1b55261526f..e9f2f12ba06b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -4,14 +4,19 @@
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
- if (level != LIBBPF_DEBUG)
- return vfprintf(stderr, format, args);
+ if (level != LIBBPF_DEBUG) {
+ vprintf(format, args);
+ return 0;
+ }
if (!strstr(format, "verifier log"))
return 0;
- return vfprintf(stderr, "%s", args);
+ vprintf("%s", args);
+ return 0;
}
+extern int extra_prog_load_log_flags;
+
static int check_load(const char *file, enum bpf_prog_type type)
{
struct bpf_prog_load_attr attr;
@@ -21,23 +26,34 @@ static int check_load(const char *file, enum bpf_prog_type type)
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = file;
attr.prog_type = type;
- attr.log_level = 4;
+ attr.log_level = 4 | extra_prog_load_log_flags;
attr.prog_flags = BPF_F_TEST_RND_HI32;
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
bpf_object__close(obj);
- if (err)
- error_cnt++;
return err;
}
+struct scale_test_def {
+ const char *file;
+ enum bpf_prog_type attach_type;
+ bool fails;
+};
+
void test_bpf_verif_scale(void)
{
- const char *sched_cls[] = {
- "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
- };
- const char *raw_tp[] = {
+ struct scale_test_def tests[] = {
+ { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ },
+
+ { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS },
+ { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
+ { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
+
+ { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
/* full unroll by llvm */
- "./pyperf50.o", "./pyperf100.o", "./pyperf180.o",
+ { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* partial unroll. llvm will unroll loop ~150 times.
* C loop count -> 600.
@@ -45,7 +61,7 @@ void test_bpf_verif_scale(void)
* 16k insns in loop body.
* Total of 5 such loops. Total program size ~82k insns.
*/
- "./pyperf600.o",
+ { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* no unroll at all.
* C loop count -> 600.
@@ -53,48 +69,47 @@ void test_bpf_verif_scale(void)
* ~110 insns in loop body.
* Total of 5 such loops. Total program size ~1500 insns.
*/
- "./pyperf600_nounroll.o",
+ { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- "./loop1.o", "./loop2.o",
+ { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
+ { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
/* partial unroll. 19k insn in a loop.
* Total program size 20.8k insn.
* ~350k processed_insns
*/
- "./strobemeta.o",
+ { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* no unroll, tiny loops */
- "./strobemeta_nounroll1.o",
- "./strobemeta_nounroll2.o",
- };
- const char *cg_sysctl[] = {
- "./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
- };
- int err, i;
+ { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- if (verifier_stats)
- libbpf_set_print(libbpf_debug_print);
+ { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
+ { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
- err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
- printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+ { "test_xdp_loop.o", BPF_PROG_TYPE_XDP },
+ { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL },
+ };
+ libbpf_print_fn_t old_print_fn = NULL;
+ int err, i;
- for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
- err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
- printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
+ if (env.verifier_stats) {
+ test__force_log();
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
}
- for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
- err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
- printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
- }
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ const struct scale_test_def *test = &tests[i];
+
+ if (!test__start_subtest(test->file))
+ continue;
- for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
- err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
- printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+ err = check_load(test->file, test->attach_type);
+ CHECK_FAIL(err && !test->fails);
}
- err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
- printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
- err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
- printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
+ if (env.verifier_stats)
+ libbpf_set_print(old_print_fn);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
new file mode 100644
index 000000000000..7390d3061065
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+ vfprintf(ctx, fmt, args);
+}
+
+static struct btf_dump_test_case {
+ const char *name;
+ const char *file;
+ struct btf_dump_opts opts;
+} btf_dump_test_cases[] = {
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
+ {"btf_dump: padding", "btf_dump_test_case_padding", {}},
+ {"btf_dump: packing", "btf_dump_test_case_packing", {}},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+};
+
+static int btf_dump_all_types(const struct btf *btf,
+ const struct btf_dump_opts *opts)
+{
+ size_t type_cnt = btf__get_nr_types(btf);
+ struct btf_dump *d;
+ int err = 0, id;
+
+ d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+
+ for (id = 1; id <= type_cnt; id++) {
+ err = btf_dump__dump_type(d, id);
+ if (err)
+ goto done;
+ }
+
+done:
+ btf_dump__free(d);
+ return err;
+}
+
+static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
+{
+ char test_file[256], out_file[256], diff_cmd[1024];
+ struct btf *btf = NULL;
+ int err = 0, fd = -1;
+ FILE *f = NULL;
+
+ snprintf(test_file, sizeof(test_file), "%s.o", t->file);
+
+ btf = btf__parse_elf(test_file, NULL);
+ if (CHECK(IS_ERR(btf), "btf_parse_elf",
+ "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ err = -PTR_ERR(btf);
+ btf = NULL;
+ goto done;
+ }
+
+ snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
+ fd = mkstemp(out_file);
+ if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+ err = fd;
+ goto done;
+ }
+ f = fdopen(fd, "w");
+ if (CHECK(f == NULL, "open_tmp", "failed to open file: %s(%d)\n",
+ strerror(errno), errno)) {
+ close(fd);
+ goto done;
+ }
+
+ t->opts.ctx = f;
+ err = btf_dump_all_types(btf, &t->opts);
+ fclose(f);
+ close(fd);
+ if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) {
+ goto done;
+ }
+
+ snprintf(test_file, sizeof(test_file), "progs/%s.c", t->file);
+ if (access(test_file, R_OK) == -1)
+ /*
+ * When the test is run with O=, kselftest copies TEST_FILES
+ * without preserving the directory structure.
+ */
+ snprintf(test_file, sizeof(test_file), "%s.c", t->file);
+ /*
+ * Diff test output and expected test output, contained between
+ * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
+ * For expected output lines, everything before '*' is stripped out.
+ * Also lines containing comment start and comment end markers are
+ * ignored.
+ */
+ snprintf(diff_cmd, sizeof(diff_cmd),
+ "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
+ "/END-EXPECTED-OUTPUT/{out=0} "
+ "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
+ "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
+ test_file, out_file);
+ err = system(diff_cmd);
+ if (CHECK(err, "diff",
+ "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
+ out_file, err, diff_cmd))
+ goto done;
+
+ remove(out_file);
+
+done:
+ btf__free(btf);
+ return err;
+}
+
+void test_btf_dump() {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
+ struct btf_dump_test_case *t = &btf_dump_test_cases[i];
+
+ if (!test__start_subtest(t->name))
+ continue;
+
+ test_btf_dump_case(i, &btf_dump_test_cases[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
new file mode 100644
index 000000000000..5b13f2c6c402
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int prog_load(void)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+void test_cgroup_attach_autodetach(void)
+{
+ __u32 duration = 0, prog_cnt = 4, attach_flags;
+ int allow_prog[2] = {-1};
+ __u32 prog_ids[2] = {0};
+ void *ptr = NULL;
+ int cg = 0, i;
+ int attempts;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ allow_prog[i] = prog_load();
+ if (CHECK(allow_prog[i] < 0, "prog_load",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+ }
+
+ if (CHECK_FAIL(setup_cgroup_environment()))
+ goto err;
+
+ /* create a cgroup, attach two programs and remember their ids */
+ cg = create_and_get_cgroup("/cg_autodetach");
+ if (CHECK_FAIL(cg < 0))
+ goto err;
+
+ if (CHECK_FAIL(join_cgroup("/cg_autodetach")))
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (CHECK(bpf_prog_attach(allow_prog[i], cg,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog_attach", "prog[%d], errno=%d\n", i, errno))
+ goto err;
+
+ /* make sure that programs are attached and run some traffic */
+ if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+ prog_ids, &prog_cnt),
+ "prog_query", "errno=%d\n", errno))
+ goto err;
+ if (CHECK_FAIL(system(PING_CMD)))
+ goto err;
+
+ /* allocate some memory (4Mb) to pin the original cgroup */
+ ptr = malloc(4 * (1 << 20));
+ if (CHECK_FAIL(!ptr))
+ goto err;
+
+ /* close programs and cgroup fd */
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ close(allow_prog[i]);
+ allow_prog[i] = -1;
+ }
+
+ close(cg);
+ cg = 0;
+
+ /* leave the cgroup and remove it. don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* wait for the asynchronous auto-detachment.
+ * wait for no more than 5 sec and give up.
+ */
+ for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+ for (attempts = 5; attempts >= 0; attempts--) {
+ int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+ if (fd < 0)
+ break;
+
+ /* don't leave the fd open */
+ close(fd);
+
+ if (CHECK_FAIL(!attempts))
+ goto err;
+
+ sleep(1);
+ }
+ }
+
+err:
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (allow_prog[i] >= 0)
+ close(allow_prog[i]);
+ if (cg)
+ close(cg);
+ free(ptr);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
new file mode 100644
index 000000000000..2ff21dbce179
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int map_fd = -1;
+
+static int prog_load_cnt(int verdict, int val)
+{
+ int cgroup_storage_fd, percpu_cgroup_storage_fd;
+
+ if (map_fd < 0)
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ percpu_cgroup_storage_fd = bpf_create_map(
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (percpu_cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, val),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
+
+ BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int ret;
+
+ ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+ close(cgroup_storage_fd);
+ return ret;
+}
+
+void test_cgroup_attach_multi(void)
+{
+ __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
+ int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+ int allow_prog[7] = {-1};
+ unsigned long long value;
+ __u32 duration = 0;
+ int i = 0;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ allow_prog[i] = prog_load_cnt(1, 1 << i);
+ if (CHECK(allow_prog[i] < 0, "prog_load",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+ }
+
+ if (CHECK_FAIL(setup_cgroup_environment()))
+ goto err;
+
+ cg1 = create_and_get_cgroup("/cg1");
+ if (CHECK_FAIL(cg1 < 0))
+ goto err;
+ cg2 = create_and_get_cgroup("/cg1/cg2");
+ if (CHECK_FAIL(cg2 < 0))
+ goto err;
+ cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
+ if (CHECK_FAIL(cg3 < 0))
+ goto err;
+ cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
+ if (CHECK_FAIL(cg4 < 0))
+ goto err;
+ cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
+ if (CHECK_FAIL(cg5 < 0))
+ goto err;
+
+ if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5")))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog0_attach_to_cg1_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "fail_same_prog_attach_to_cg1", "unexpected success\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog1_attach_to_cg1_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog2_attach_to_cg2_override", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog3_attach_to_cg3_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog4_attach_to_cg4_override", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0),
+ "prog5_attach_to_cg5_none", "errno=%d\n", errno))
+ goto err;
+
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 1 + 2 + 8 + 32);
+
+ /* query the number of effective progs in cg5 */
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 4);
+ /* retrieve prog_ids of effective progs in cg5 */
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 4);
+ CHECK_FAIL(attach_flags != 0);
+ saved_prog_id = prog_ids[0];
+ /* check enospc handling */
+ prog_ids[0] = 0;
+ prog_cnt = 2;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt) != -1);
+ CHECK_FAIL(errno != ENOSPC);
+ CHECK_FAIL(prog_cnt != 4);
+ /* check that prog_ids are returned even when buffer is too small */
+ CHECK_FAIL(prog_ids[0] != saved_prog_id);
+ /* retrieve prog_id of single attached prog in cg5 */
+ prog_ids[0] = 0;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 1);
+ CHECK_FAIL(prog_ids[0] != saved_prog_id);
+
+ /* detach bottom program and ping again */
+ if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_from_cg5", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 1 + 2 + 8 + 16);
+
+ /* test replace */
+
+ attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
+ attach_opts.replace_prog_fd = allow_prog[0];
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_override", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EINVAL);
+
+ attach_opts.flags = BPF_F_REPLACE;
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_no_multi", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EINVAL);
+
+ attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
+ attach_opts.replace_prog_fd = -1;
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_bad_fd", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EBADF);
+
+ /* replacing a program that is not attached to cgroup should fail */
+ attach_opts.replace_prog_fd = allow_prog[3];
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_no_ent", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != ENOENT);
+
+ /* replace 1st from the top program */
+ attach_opts.replace_prog_fd = allow_prog[0];
+ if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "prog_replace", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 8 + 16);
+
+ /* detach 3rd from bottom program and ping again */
+ if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS),
+ "fail_prog_detach_from_cg3", "unexpected success\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS),
+ "prog3_detach_from_cg3", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 16);
+
+ /* detach 2nd from bottom program and ping again */
+ if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_from_cg4", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 4);
+
+ prog_cnt = 4;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 3);
+ CHECK_FAIL(attach_flags != 0);
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 0);
+
+err:
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (allow_prog[i] >= 0)
+ close(allow_prog[i]);
+ close(cg1);
+ close(cg2);
+ close(cg3);
+ close(cg4);
+ close(cg5);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
new file mode 100644
index 000000000000..9d8cb48b99de
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define FOO "/foo"
+#define BAR "/foo/bar/"
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int prog_load(int verdict)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+void test_cgroup_attach_override(void)
+{
+ int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
+ __u32 duration = 0;
+
+ allow_prog = prog_load(1);
+ if (CHECK(allow_prog < 0, "prog_load_allow",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+
+ drop_prog = prog_load(0);
+ if (CHECK(drop_prog < 0, "prog_load_drop",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+
+ foo = test__join_cgroup(FOO);
+ if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_drop_foo_override",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ bar = test__join_cgroup(BAR);
+ if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n"))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_bar",
+ "detach prog from %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_foo",
+ "detach prog from %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
+ "fail_prog_attach_allow_bar_none",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_bar",
+ "detach prog from %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
+ "fail_prog_detach_foo",
+ "double detach from %s unexpectedly succeeded\n", FOO))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
+ "prog_attach_allow_foo_none",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
+ "fail_prog_attach_allow_bar_none",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "fail_prog_attach_allow_bar_override",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "fail_prog_attach_allow_foo_override",
+ "attach prog to %s unexpectedly succeeded\n", FOO))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
+ "prog_attach_drop_foo_none",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+err:
+ close(foo);
+ close(bar);
+ close(allow_prog);
+ close(drop_prog);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
new file mode 100644
index 000000000000..b093787e9448
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <linux/version.h>
+#include "test_core_extern.skel.h"
+
+static uint32_t get_kernel_version(void)
+{
+ uint32_t major, minor, patch;
+ struct utsname info;
+
+ uname(&info);
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+ return 0;
+ return KERNEL_VERSION(major, minor, patch);
+}
+
+#define CFG "CONFIG_BPF_SYSCALL=n\n"
+
+static struct test_case {
+ const char *name;
+ const char *cfg;
+ bool fails;
+ struct test_core_extern__data data;
+} test_cases[] = {
+ { .name = "default search path", .data = { .bpf_syscall = true } },
+ {
+ .name = "custom values",
+ .cfg = "CONFIG_BPF_SYSCALL=n\n"
+ "CONFIG_TRISTATE=m\n"
+ "CONFIG_BOOL=y\n"
+ "CONFIG_CHAR=100\n"
+ "CONFIG_USHORT=30000\n"
+ "CONFIG_INT=123456\n"
+ "CONFIG_ULONG=0xDEADBEEFC0DE\n"
+ "CONFIG_STR=\"abracad\"\n"
+ "CONFIG_MISSING=0",
+ .data = {
+ .bpf_syscall = false,
+ .tristate_val = TRI_MODULE,
+ .bool_val = true,
+ .char_val = 100,
+ .ushort_val = 30000,
+ .int_val = 123456,
+ .ulong_val = 0xDEADBEEFC0DE,
+ .str_val = "abracad",
+ },
+ },
+ /* TRISTATE */
+ { .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n",
+ .data = { .tristate_val = TRI_YES } },
+ { .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n",
+ .data = { .tristate_val = TRI_NO } },
+ { .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n",
+ .data = { .tristate_val = TRI_MODULE } },
+ { .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" },
+ { .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" },
+ /* BOOL */
+ { .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n",
+ .data = { .bool_val = true } },
+ { .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n",
+ .data = { .bool_val = false } },
+ { .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" },
+ { .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" },
+ /* CHAR */
+ { .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n",
+ .data = { .char_val = 'm' } },
+ { .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" },
+ { .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" },
+ { .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" },
+ /* STRING */
+ { .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n",
+ .data = { .str_val = "\0\0\0\0\0\0\0" } },
+ { .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n",
+ .data = { .str_val = "abra\0\0\0" } },
+ { .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n",
+ .data = { .str_val = "abracad" } },
+ { .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" },
+ { .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" },
+ /* INTEGERS */
+ {
+ .name = "integer forms",
+ .cfg = CFG
+ "CONFIG_CHAR=0xA\n"
+ "CONFIG_USHORT=0462\n"
+ "CONFIG_INT=-100\n"
+ "CONFIG_ULONG=+1000000000000",
+ .data = {
+ .char_val = 0xA,
+ .ushort_val = 0462,
+ .int_val = -100,
+ .ulong_val = 1000000000000,
+ },
+ },
+ { .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" },
+ { .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" },
+ { .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" },
+ { .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" },
+ { .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647",
+ .data = { .int_val = 2147483647 } },
+ { .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648",
+ .data = { .int_val = -2147483648 } },
+ { .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" },
+ { .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" },
+ { .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535",
+ .data = { .ushort_val = 65535 } },
+ { .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0",
+ .data = { .ushort_val = 0 } },
+ { .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" },
+ { .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" },
+ { .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff",
+ .data = { .ulong_val = 0xffffffffffffffff } },
+ { .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0",
+ .data = { .ulong_val = 0 } },
+ { .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" },
+};
+
+void test_core_extern(void)
+{
+ const uint32_t kern_ver = get_kernel_version();
+ int err, duration = 0, i, j;
+ struct test_core_extern *skel = NULL;
+ uint64_t *got, *exp;
+ int n = sizeof(*skel->data) / sizeof(uint64_t);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *t = &test_cases[i];
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .kconfig = t->cfg,
+ );
+
+ if (!test__start_subtest(t->name))
+ continue;
+
+ skel = test_core_extern__open_opts(&opts);
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ goto cleanup;
+ err = test_core_extern__load(skel);
+ if (t->fails) {
+ CHECK(!err, "skel_load",
+ "shouldn't succeed open/load of skeleton\n");
+ goto cleanup;
+ } else if (CHECK(err, "skel_load",
+ "failed to open/load skeleton\n")) {
+ goto cleanup;
+ }
+ err = test_core_extern__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ t->data.kern_ver = kern_ver;
+ t->data.missing_val = 0xDEADC0DE;
+ got = (uint64_t *)skel->data;
+ exp = (uint64_t *)&t->data;
+ for (j = 0; j < n; j++) {
+ CHECK(got[j] != exp[j], "check_res",
+ "result #%d: expected %lx, but got %lx\n",
+ j, exp[j], got[j]);
+ }
+cleanup:
+ test_core_extern__destroy(skel);
+ skel = NULL;
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
new file mode 100644
index 000000000000..31e177adbdf1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -0,0 +1,582 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "progs/core_reloc_types.h"
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
+
+#define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = 42, \
+ .b = 0xc001, \
+ .c = 0xbeef, \
+}
+
+#define FLAVORS_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_flavors.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define FLAVORS_CASE(name) { \
+ FLAVORS_CASE_COMMON(name), \
+ .input = FLAVORS_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = FLAVORS_DATA(core_reloc_flavors), \
+ .output_len = sizeof(struct core_reloc_flavors), \
+}
+
+#define FLAVORS_ERR_CASE(name) { \
+ FLAVORS_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define NESTING_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = { .a = { .a = 42 } }, \
+ .b = { .b = { .b = 0xc001 } }, \
+}
+
+#define NESTING_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_nesting.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define NESTING_CASE(name) { \
+ NESTING_CASE_COMMON(name), \
+ .input = NESTING_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = NESTING_DATA(core_reloc_nesting), \
+ .output_len = sizeof(struct core_reloc_nesting) \
+}
+
+#define NESTING_ERR_CASE(name) { \
+ NESTING_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = { [2] = 1 }, \
+ .b = { [1] = { [2] = { [3] = 2 } } }, \
+ .c = { [1] = { .c = 3 } }, \
+ .d = { [0] = { [0] = { .d = 4 } } }, \
+}
+
+#define ARRAYS_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_arrays.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define ARRAYS_CASE(name) { \
+ ARRAYS_CASE_COMMON(name), \
+ .input = ARRAYS_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
+ .a2 = 1, \
+ .b123 = 2, \
+ .c1c = 3, \
+ .d00d = 4, \
+ .f10c = 0, \
+ }, \
+ .output_len = sizeof(struct core_reloc_arrays_output) \
+}
+
+#define ARRAYS_ERR_CASE(name) { \
+ ARRAYS_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define PRIMITIVES_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = 1, \
+ .b = 2, \
+ .c = 3, \
+ .d = (void *)4, \
+ .f = (void *)5, \
+}
+
+#define PRIMITIVES_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_primitives.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define PRIMITIVES_CASE(name) { \
+ PRIMITIVES_CASE_COMMON(name), \
+ .input = PRIMITIVES_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = PRIMITIVES_DATA(core_reloc_primitives), \
+ .output_len = sizeof(struct core_reloc_primitives), \
+}
+
+#define PRIMITIVES_ERR_CASE(name) { \
+ PRIMITIVES_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define MODS_CASE(name) { \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_mods.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) { \
+ .a = 1, \
+ .b = 2, \
+ .c = (void *)3, \
+ .d = (void *)4, \
+ .e = { [2] = 5 }, \
+ .f = { [1] = 6 }, \
+ .g = { .x = 7 }, \
+ .h = { .y = 8 }, \
+ }, \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_mods_output) { \
+ .a = 1, .b = 2, .c = 3, .d = 4, \
+ .e = 5, .f = 6, .g = 7, .h = 8, \
+ }, \
+ .output_len = sizeof(struct core_reloc_mods_output), \
+}
+
+#define PTR_AS_ARR_CASE(name) { \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_ptr_as_arr.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .input = (const char *)&(struct core_reloc_##name []){ \
+ { .a = 1 }, \
+ { .a = 2 }, \
+ { .a = 3 }, \
+ }, \
+ .input_len = 3 * sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_ptr_as_arr) { \
+ .a = 3, \
+ }, \
+ .output_len = sizeof(struct core_reloc_ptr_as_arr), \
+}
+
+#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .u8_field = 1, \
+ .s8_field = 2, \
+ .u16_field = 3, \
+ .s16_field = 4, \
+ .u32_field = 5, \
+ .s32_field = 6, \
+ .u64_field = 7, \
+ .s64_field = 8, \
+}
+
+#define INTS_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_ints.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define INTS_CASE(name) { \
+ INTS_CASE_COMMON(name), \
+ .input = INTS_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = INTS_DATA(core_reloc_ints), \
+ .output_len = sizeof(struct core_reloc_ints), \
+}
+
+#define INTS_ERR_CASE(name) { \
+ INTS_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define EXISTENCE_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_existence.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .relaxed_core_relocs = true
+
+#define EXISTENCE_ERR_CASE(name) { \
+ EXISTENCE_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \
+ .case_name = test_name_prefix#name, \
+ .bpf_obj_file = objfile, \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define BITFIELDS_CASE(name, ...) { \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
+ "direct:", name), \
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_bitfields_output), \
+}, { \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
+ "probed:", name), \
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_bitfields_output), \
+ .direct_raw_tp = true, \
+}
+
+
+#define BITFIELDS_ERR_CASE(name) { \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
+ "probed:", name), \
+ .fails = true, \
+}, { \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
+ "direct:", name), \
+ .direct_raw_tp = true, \
+ .fails = true, \
+}
+
+#define SIZE_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_size.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .relaxed_core_relocs = true
+
+#define SIZE_OUTPUT_DATA(type) \
+ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \
+ .int_sz = sizeof(((type *)0)->int_field), \
+ .struct_sz = sizeof(((type *)0)->struct_field), \
+ .union_sz = sizeof(((type *)0)->union_field), \
+ .arr_sz = sizeof(((type *)0)->arr_field), \
+ .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
+ .ptr_sz = sizeof(((type *)0)->ptr_field), \
+ .enum_sz = sizeof(((type *)0)->enum_field), \
+ }
+
+#define SIZE_CASE(name) { \
+ SIZE_CASE_COMMON(name), \
+ .input_len = 0, \
+ .output = SIZE_OUTPUT_DATA(struct core_reloc_##name), \
+ .output_len = sizeof(struct core_reloc_size_output), \
+}
+
+#define SIZE_ERR_CASE(name) { \
+ SIZE_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+struct core_reloc_test_case {
+ const char *case_name;
+ const char *bpf_obj_file;
+ const char *btf_src_file;
+ const char *input;
+ int input_len;
+ const char *output;
+ int output_len;
+ bool fails;
+ bool relaxed_core_relocs;
+ bool direct_raw_tp;
+};
+
+static struct core_reloc_test_case test_cases[] = {
+ /* validate we can find kernel image and use its BTF for relocs */
+ {
+ .case_name = "kernel",
+ .bpf_obj_file = "test_core_reloc_kernel.o",
+ .btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
+ .input = "",
+ .input_len = 0,
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_kernel_output) {
+ .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
+ .comm = "test_progs",
+ .comm_len = sizeof("test_progs"),
+ },
+ .output_len = sizeof(struct core_reloc_kernel_output),
+ },
+
+ /* validate BPF program can use multiple flavors to match against
+ * single target BTF type
+ */
+ FLAVORS_CASE(flavors),
+
+ FLAVORS_ERR_CASE(flavors__err_wrong_name),
+
+ /* various struct/enum nesting and resolution scenarios */
+ NESTING_CASE(nesting),
+ NESTING_CASE(nesting___anon_embed),
+ NESTING_CASE(nesting___struct_union_mixup),
+ NESTING_CASE(nesting___extra_nesting),
+ NESTING_CASE(nesting___dup_compat_types),
+
+ NESTING_ERR_CASE(nesting___err_missing_field),
+ NESTING_ERR_CASE(nesting___err_array_field),
+ NESTING_ERR_CASE(nesting___err_missing_container),
+ NESTING_ERR_CASE(nesting___err_nonstruct_container),
+ NESTING_ERR_CASE(nesting___err_array_container),
+ NESTING_ERR_CASE(nesting___err_dup_incompat_types),
+ NESTING_ERR_CASE(nesting___err_partial_match_dups),
+ NESTING_ERR_CASE(nesting___err_too_deep),
+
+ /* various array access relocation scenarios */
+ ARRAYS_CASE(arrays),
+ ARRAYS_CASE(arrays___diff_arr_dim),
+ ARRAYS_CASE(arrays___diff_arr_val_sz),
+ ARRAYS_CASE(arrays___equiv_zero_sz_arr),
+ ARRAYS_CASE(arrays___fixed_arr),
+
+ ARRAYS_ERR_CASE(arrays___err_too_small),
+ ARRAYS_ERR_CASE(arrays___err_too_shallow),
+ ARRAYS_ERR_CASE(arrays___err_non_array),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+ ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+
+ /* enum/ptr/int handling scenarios */
+ PRIMITIVES_CASE(primitives),
+ PRIMITIVES_CASE(primitives___diff_enum_def),
+ PRIMITIVES_CASE(primitives___diff_func_proto),
+ PRIMITIVES_CASE(primitives___diff_ptr_type),
+
+ PRIMITIVES_ERR_CASE(primitives___err_non_enum),
+ PRIMITIVES_ERR_CASE(primitives___err_non_int),
+ PRIMITIVES_ERR_CASE(primitives___err_non_ptr),
+
+ /* const/volatile/restrict and typedefs scenarios */
+ MODS_CASE(mods),
+ MODS_CASE(mods___mod_swap),
+ MODS_CASE(mods___typedefs),
+
+ /* handling "ptr is an array" semantics */
+ PTR_AS_ARR_CASE(ptr_as_arr),
+ PTR_AS_ARR_CASE(ptr_as_arr___diff_sz),
+
+ /* int signedness/sizing/bitfield handling */
+ INTS_CASE(ints),
+ INTS_CASE(ints___bool),
+ INTS_CASE(ints___reverse_sign),
+
+ /* validate edge cases of capturing relocations */
+ {
+ .case_name = "misc",
+ .bpf_obj_file = "test_core_reloc_misc.o",
+ .btf_src_file = "btf__core_reloc_misc.o",
+ .input = (const char *)&(struct core_reloc_misc_extensible[]){
+ { .a = 1 },
+ { .a = 2 }, /* not read */
+ { .a = 3 },
+ },
+ .input_len = 4 * sizeof(int),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_misc_output) {
+ .a = 1,
+ .b = 1,
+ .c = 0, /* BUG in clang, should be 3 */
+ },
+ .output_len = sizeof(struct core_reloc_misc_output),
+ },
+
+ /* validate field existence checks */
+ {
+ EXISTENCE_CASE_COMMON(existence),
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
+ .a = 1,
+ .b = 2,
+ .c = 3,
+ .arr = { 4 },
+ .s = { .x = 5 },
+ },
+ .input_len = sizeof(struct core_reloc_existence),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+ .a_exists = 1,
+ .b_exists = 1,
+ .c_exists = 1,
+ .arr_exists = 1,
+ .s_exists = 1,
+ .a_value = 1,
+ .b_value = 2,
+ .c_value = 3,
+ .arr_value = 4,
+ .s_value = 5,
+ },
+ .output_len = sizeof(struct core_reloc_existence_output),
+ },
+ {
+ EXISTENCE_CASE_COMMON(existence___minimal),
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
+ .a = 42,
+ },
+ .input_len = sizeof(struct core_reloc_existence),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+ .a_exists = 1,
+ .b_exists = 0,
+ .c_exists = 0,
+ .arr_exists = 0,
+ .s_exists = 0,
+ .a_value = 42,
+ .b_value = 0xff000002u,
+ .c_value = 0xff000003u,
+ .arr_value = 0xff000004u,
+ .s_value = 0xff000005u,
+ },
+ .output_len = sizeof(struct core_reloc_existence_output),
+ },
+
+ EXISTENCE_ERR_CASE(existence__err_int_sz),
+ EXISTENCE_ERR_CASE(existence__err_int_type),
+ EXISTENCE_ERR_CASE(existence__err_int_kind),
+ EXISTENCE_ERR_CASE(existence__err_arr_kind),
+ EXISTENCE_ERR_CASE(existence__err_arr_value_type),
+ EXISTENCE_ERR_CASE(existence__err_struct_type),
+
+ /* bitfield relocation checks */
+ BITFIELDS_CASE(bitfields, {
+ .ub1 = 1,
+ .ub2 = 2,
+ .ub7 = 96,
+ .sb4 = -7,
+ .sb20 = -0x76543,
+ .u32 = 0x80000000,
+ .s32 = -0x76543210,
+ }),
+ BITFIELDS_CASE(bitfields___bit_sz_change, {
+ .ub1 = 6,
+ .ub2 = 0xABCDE,
+ .ub7 = 1,
+ .sb4 = -1,
+ .sb20 = -0x17654321,
+ .u32 = 0xBEEF,
+ .s32 = -0x3FEDCBA987654321,
+ }),
+ BITFIELDS_CASE(bitfields___bitfield_vs_int, {
+ .ub1 = 0xFEDCBA9876543210,
+ .ub2 = 0xA6,
+ .ub7 = -0x7EDCBA987654321,
+ .sb4 = -0x6123456789ABCDE,
+ .sb20 = 0xD00D,
+ .u32 = -0x76543,
+ .s32 = 0x0ADEADBEEFBADB0B,
+ }),
+ BITFIELDS_CASE(bitfields___just_big_enough, {
+ .ub1 = 0xF,
+ .ub2 = 0x0812345678FEDCBA,
+ }),
+ BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
+
+ /* size relocation checks */
+ SIZE_CASE(size),
+ SIZE_CASE(size___diff_sz),
+};
+
+struct data {
+ char in[256];
+ char out[256];
+ uint64_t my_pid_tgid;
+};
+
+static size_t roundup_page(size_t sz)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ return (sz + page_size - 1) / page_size * page_size;
+}
+
+void test_core_reloc(void)
+{
+ const size_t mmap_sz = roundup_page(sizeof(struct data));
+ struct bpf_object_load_attr load_attr = {};
+ struct core_reloc_test_case *test_case;
+ const char *tp_name, *probe_name;
+ int err, duration = 0, i, equal;
+ struct bpf_link *link = NULL;
+ struct bpf_map *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ uint64_t my_pid_tgid;
+ struct data *data;
+ void *mmap_data = NULL;
+
+ my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test_case = &test_cases[i];
+ if (!test__start_subtest(test_case->case_name))
+ continue;
+
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .relaxed_core_relocs = test_case->relaxed_core_relocs,
+ );
+
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+ if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
+ test_case->bpf_obj_file, PTR_ERR(obj)))
+ continue;
+
+ /* for typed raw tracepoints, NULL should be specified */
+ if (test_case->direct_raw_tp) {
+ probe_name = "tp_btf/sys_enter";
+ tp_name = NULL;
+ } else {
+ probe_name = "raw_tracepoint/sys_enter";
+ tp_name = "sys_enter";
+ }
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK(!prog, "find_probe",
+ "prog '%s' not found\n", probe_name))
+ goto cleanup;
+
+ load_attr.obj = obj;
+ load_attr.log_level = 0;
+ load_attr.target_btf_path = test_case->btf_src_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (test_case->fails) {
+ CHECK(!err, "obj_load_fail",
+ "should fail to load prog '%s'\n", probe_name);
+ goto cleanup;
+ } else {
+ if (CHECK(err, "obj_load",
+ "failed to load prog '%s': %d\n",
+ probe_name, err))
+ goto cleanup;
+ }
+
+ data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto cleanup;
+
+ mmap_data = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(data_map), 0);
+ if (CHECK(mmap_data == MAP_FAILED, "mmap",
+ ".bss mmap failed: %d", errno)) {
+ mmap_data = NULL;
+ goto cleanup;
+ }
+ data = mmap_data;
+
+ memset(mmap_data, 0, sizeof(*data));
+ memcpy(data->in, test_case->input, test_case->input_len);
+ data->my_pid_tgid = my_pid_tgid;
+
+ link = bpf_program__attach_raw_tracepoint(prog, tp_name);
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link)))
+ goto cleanup;
+
+ /* trigger test run */
+ usleep(1);
+
+ equal = memcmp(data->out, test_case->output,
+ test_case->output_len) == 0;
+ if (CHECK(!equal, "check_result",
+ "input/output data don't match\n")) {
+ int j;
+
+ for (j = 0; j < test_case->input_len; j++) {
+ printf("input byte #%d: 0x%02hhx\n",
+ j, test_case->input[j]);
+ }
+ for (j = 0; j < test_case->output_len; j++) {
+ printf("output byte #%d: EXP 0x%02hhx GOT 0x%02hhx\n",
+ j, test_case->output[j], data->out[j]);
+ }
+ goto cleanup;
+ }
+
+cleanup:
+ if (mmap_data) {
+ CHECK_FAIL(munmap(mmap_data, mmap_sz));
+ mmap_data = NULL;
+ }
+ if (!IS_ERR_OR_NULL(link)) {
+ bpf_link__destroy(link);
+ link = NULL;
+ }
+ bpf_object__close(obj);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c
new file mode 100644
index 000000000000..f7c7e25232be
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+
+static int duration = 0;
+
+static void validate_mask(int case_nr, const char *exp, bool *mask, int n)
+{
+ int i;
+
+ for (i = 0; exp[i]; i++) {
+ if (exp[i] == '1') {
+ if (CHECK(i + 1 > n, "mask_short",
+ "case #%d: mask too short, got n=%d, need at least %d\n",
+ case_nr, n, i + 1))
+ return;
+ CHECK(!mask[i], "cpu_not_set",
+ "case #%d: mask differs, expected cpu#%d SET\n",
+ case_nr, i);
+ } else {
+ CHECK(i < n && mask[i], "cpu_set",
+ "case #%d: mask differs, expected cpu#%d UNSET\n",
+ case_nr, i);
+ }
+ }
+ CHECK(i < n, "mask_long",
+ "case #%d: mask too long, got n=%d, expected at most %d\n",
+ case_nr, n, i);
+}
+
+static struct {
+ const char *cpu_mask;
+ const char *expect;
+ bool fails;
+} test_cases[] = {
+ { "0\n", "1", false },
+ { "0,2\n", "101", false },
+ { "0-2\n", "111", false },
+ { "0-2,3-4\n", "11111", false },
+ { "0", "1", false },
+ { "0-2", "111", false },
+ { "0,2", "101", false },
+ { "0,1-3", "1111", false },
+ { "0,1,2,3", "1111", false },
+ { "0,2-3,5", "101101", false },
+ { "3-3", "0001", false },
+ { "2-4,6,9-10", "00111010011", false },
+ /* failure cases */
+ { "", "", true },
+ { "0-", "", true },
+ { "0 ", "", true },
+ { "0_1", "", true },
+ { "1-0", "", true },
+ { "-1", "", true },
+};
+
+void test_cpu_mask()
+{
+ int i, err, n;
+ bool *mask;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ mask = NULL;
+ err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n);
+ if (test_cases[i].fails) {
+ CHECK(!err, "should_fail",
+ "case #%d: parsing should fail!\n", i + 1);
+ } else {
+ if (CHECK(err, "parse_err",
+ "case #%d: cpu mask parsing failed: %d\n",
+ i + 1, err))
+ continue;
+ validate_mask(i + 1, test_cases[i].expect, mask, n);
+ }
+ free(mask);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
new file mode 100644
index 000000000000..235ac4f67f5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include "test_pkt_access.skel.h"
+#include "fentry_test.skel.h"
+#include "fexit_test.skel.h"
+
+void test_fentry_fexit(void)
+{
+ struct test_pkt_access *pkt_skel = NULL;
+ struct fentry_test *fentry_skel = NULL;
+ struct fexit_test *fexit_skel = NULL;
+ __u64 *fentry_res, *fexit_res;
+ __u32 duration = 0, retval;
+ int err, pkt_fd, i;
+
+ pkt_skel = test_pkt_access__open_and_load();
+ if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
+ return;
+ fentry_skel = fentry_test__open_and_load();
+ if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ goto close_prog;
+ fexit_skel = fexit_test__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto close_prog;
+
+ err = fentry_test__attach(fentry_skel);
+ if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ goto close_prog;
+ err = fexit_test__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto close_prog;
+
+ pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ fentry_res = (__u64 *)fentry_skel->bss;
+ fexit_res = (__u64 *)fexit_skel->bss;
+ printf("%lld\n", fentry_skel->bss->test1_result);
+ for (i = 0; i < 6; i++) {
+ CHECK(fentry_res[i] != 1, "result",
+ "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
+ CHECK(fexit_res[i] != 1, "result",
+ "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+ }
+
+close_prog:
+ test_pkt_access__destroy(pkt_skel);
+ fentry_test__destroy(fentry_skel);
+ fexit_test__destroy(fexit_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
new file mode 100644
index 000000000000..5cc06021f27d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include "test_pkt_access.skel.h"
+#include "fentry_test.skel.h"
+
+void test_fentry_test(void)
+{
+ struct test_pkt_access *pkt_skel = NULL;
+ struct fentry_test *fentry_skel = NULL;
+ int err, pkt_fd, i;
+ __u32 duration = 0, retval;
+ __u64 *result;
+
+ pkt_skel = test_pkt_access__open_and_load();
+ if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
+ return;
+ fentry_skel = fentry_test__open_and_load();
+ if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ goto cleanup;
+
+ err = fentry_test__attach(fentry_skel);
+ if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ goto cleanup;
+
+ pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ result = (__u64 *)fentry_skel->bss;
+ for (i = 0; i < 6; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fentry_test%d failed err %lld\n", i + 1, result[i]))
+ goto cleanup;
+ }
+
+cleanup:
+ fentry_test__destroy(fentry_skel);
+ test_pkt_access__destroy(pkt_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
new file mode 100644
index 000000000000..cde463af7071
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+static void test_fexit_bpf2bpf_common(const char *obj_file,
+ const char *target_obj_file,
+ int prog_cnt,
+ const char **prog_name)
+{
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd, i;
+ struct bpf_link **link = NULL;
+ struct bpf_program **prog = NULL;
+ __u32 duration = 0, retval;
+ struct bpf_map *data_map;
+ const int zero = 0;
+ u64 *result = NULL;
+
+ err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .attach_prog_fd = pkt_fd,
+ );
+
+ link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ prog = calloc(sizeof(struct bpf_program *), prog_cnt);
+ result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
+ if (CHECK(!link || !prog || !result, "alloc_memory",
+ "failed to alloc memory"))
+ goto close_prog;
+
+ obj = bpf_object__open_file(obj_file, &opts);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open fexit_bpf2bpf: %ld\n",
+ PTR_ERR(obj)))
+ goto close_prog;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < prog_cnt; i++) {
+ prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < prog_cnt; i++)
+ if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
+ result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < prog_cnt; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ if (!IS_ERR_OR_NULL(obj))
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+ free(link);
+ free(prog);
+ free(result);
+}
+
+static void test_target_no_callees(void)
+{
+ const char *prog_name[] = {
+ "fexit/test_pkt_md_access",
+ };
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
+ "./test_pkt_md_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name);
+}
+
+static void test_target_yes_callees(void)
+{
+ const char *prog_name[] = {
+ "fexit/test_pkt_access",
+ "fexit/test_pkt_access_subprog1",
+ "fexit/test_pkt_access_subprog2",
+ "fexit/test_pkt_access_subprog3",
+ };
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
+ "./test_pkt_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name);
+}
+
+static void test_func_replace(void)
+{
+ const char *prog_name[] = {
+ "fexit/test_pkt_access",
+ "fexit/test_pkt_access_subprog1",
+ "fexit/test_pkt_access_subprog2",
+ "fexit/test_pkt_access_subprog3",
+ "freplace/get_skb_len",
+ "freplace/get_skb_ifindex",
+ "freplace/get_constant",
+ };
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
+ "./test_pkt_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name);
+}
+
+void test_fexit_bpf2bpf(void)
+{
+ test_target_no_callees();
+ test_target_yes_callees();
+ test_func_replace();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
new file mode 100644
index 000000000000..3b9dbf7433f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+/* x86-64 fits 55 JITed and 43 interpreted progs into half page */
+#define CNT 40
+
+void test_fexit_stress(void)
+{
+ char test_skb[128] = {};
+ int fexit_fd[CNT] = {};
+ int link_fd[CNT] = {};
+ __u32 duration = 0;
+ char error[4096];
+ __u32 prog_ret;
+ int err, i, filter_fd;
+
+ const struct bpf_insn trace_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ struct bpf_load_program_attr load_attr = {
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .license = "GPL",
+ .insns = trace_program,
+ .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ };
+
+ const struct bpf_insn skb_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ struct bpf_load_program_attr skb_load_attr = {
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .license = "GPL",
+ .insns = skb_program,
+ .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
+ };
+
+ err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
+ load_attr.expected_attach_type);
+ if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+ goto out;
+ load_attr.attach_btf_id = err;
+
+ for (i = 0; i < CNT; i++) {
+ fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ if (CHECK(fexit_fd[i] < 0, "fexit loaded",
+ "failed: %d errno %d\n", fexit_fd[i], errno))
+ goto out;
+ link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
+ if (CHECK(link_fd[i] < 0, "fexit attach failed",
+ "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+ goto out;
+ }
+
+ filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
+ filter_fd, errno))
+ goto out;
+
+ err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
+ 0, &prog_ret, 0);
+ close(filter_fd);
+ CHECK_FAIL(err);
+out:
+ for (i = 0; i < CNT; i++) {
+ if (link_fd[i])
+ close(link_fd[i]);
+ if (fexit_fd[i])
+ close(fexit_fd[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
new file mode 100644
index 000000000000..d2c3655dd7a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+
+void test_fexit_test(void)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./fexit_test.o",
+ };
+
+ char prog_name[] = "fexit/bpf_fentry_testX";
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd, kfree_skb_fd, i;
+ struct bpf_link *link[6] = {};
+ struct bpf_program *prog[6];
+ __u32 duration = 0, retval;
+ struct bpf_map *data_map;
+ const int zero = 0;
+ u64 result[6];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ &pkt_obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
+ if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++) {
+ prog_name[sizeof(prog_name) - 2] = '1' + i;
+ prog[i] = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
+ goto close_prog;
+ link[i] = bpf_program__attach_trace(prog[i]);
+ if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ goto close_prog;
+ }
+ data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ for (i = 0; i < 6; i++)
+ if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
+ i + 1, result[i]))
+ goto close_prog;
+
+close_prog:
+ for (i = 0; i < 6; i++)
+ if (!IS_ERR_OR_NULL(link[i]))
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index c938283ac232..92563898867c 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -5,6 +5,10 @@
#include <linux/if_tun.h>
#include <sys/uio.h>
+#ifndef IP_MF
+#define IP_MF 0x2000
+#endif
+
#define CHECK_FLOW_KEYS(desc, got, expected) \
CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
desc, \
@@ -16,6 +20,7 @@
"is_encap=%u/%u " \
"ip_proto=0x%x/0x%x " \
"n_proto=0x%x/0x%x " \
+ "flow_label=0x%x/0x%x " \
"sport=%u/%u " \
"dport=%u/%u\n", \
got.nhoff, expected.nhoff, \
@@ -26,6 +31,7 @@
got.is_encap, expected.is_encap, \
got.ip_proto, expected.ip_proto, \
got.n_proto, expected.n_proto, \
+ got.flow_label, expected.flow_label, \
got.sport, expected.sport, \
got.dport, expected.dport)
@@ -35,6 +41,13 @@ struct ipv4_pkt {
struct tcphdr tcp;
} __packed;
+struct ipip_pkt {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct iphdr iph_inner;
+ struct tcphdr tcp;
+} __packed;
+
struct svlan_ipv4_pkt {
struct ethhdr eth;
__u16 vlan_tci;
@@ -49,6 +62,18 @@ struct ipv6_pkt {
struct tcphdr tcp;
} __packed;
+struct ipv6_frag_pkt {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct frag_hdr {
+ __u8 nexthdr;
+ __u8 reserved;
+ __be16 frag_off;
+ __be32 identification;
+ } ipf;
+ struct tcphdr tcp;
+} __packed;
+
struct dvlan_ipv6_pkt {
struct ethhdr eth;
__u16 vlan_tci;
@@ -64,10 +89,13 @@ struct test {
union {
struct ipv4_pkt ipv4;
struct svlan_ipv4_pkt svlan_ipv4;
+ struct ipip_pkt ipip;
struct ipv6_pkt ipv6;
+ struct ipv6_frag_pkt ipv6_frag;
struct dvlan_ipv6_pkt dvlan_ipv6;
} pkt;
struct bpf_flow_keys keys;
+ __u32 flags;
};
#define VLAN_HLEN 4
@@ -81,6 +109,8 @@ struct test tests[] = {
.iph.protocol = IPPROTO_TCP,
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
.tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
},
.keys = {
.nhoff = ETH_HLEN,
@@ -88,6 +118,8 @@ struct test tests[] = {
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IP),
+ .sport = 80,
+ .dport = 8080,
},
},
{
@@ -97,6 +129,8 @@ struct test tests[] = {
.iph.nexthdr = IPPROTO_TCP,
.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
.tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
},
.keys = {
.nhoff = ETH_HLEN,
@@ -104,6 +138,8 @@ struct test tests[] = {
.addr_proto = ETH_P_IPV6,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
},
},
{
@@ -115,6 +151,8 @@ struct test tests[] = {
.iph.protocol = IPPROTO_TCP,
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
.tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
},
.keys = {
.nhoff = ETH_HLEN + VLAN_HLEN,
@@ -122,6 +160,8 @@ struct test tests[] = {
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IP),
+ .sport = 80,
+ .dport = 8080,
},
},
{
@@ -133,6 +173,8 @@ struct test tests[] = {
.iph.nexthdr = IPPROTO_TCP,
.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
.tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
},
.keys = {
.nhoff = ETH_HLEN + VLAN_HLEN * 2,
@@ -141,8 +183,205 @@ struct test tests[] = {
.addr_proto = ETH_P_IPV6,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ },
+ },
+ {
+ .name = "ipv4-frag",
+ .pkt.ipv4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.frag_off = __bpf_constant_htons(IP_MF),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_frag = true,
+ .is_first_frag = true,
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ },
+ {
+ .name = "ipv4-no-frag",
+ .pkt.ipv4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.frag_off = __bpf_constant_htons(IP_MF),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_frag = true,
+ .is_first_frag = true,
+ },
+ },
+ {
+ .name = "ipv6-frag",
+ .pkt.ipv6_frag = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_FRAGMENT,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .ipf.nexthdr = IPPROTO_TCP,
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr) +
+ sizeof(struct frag_hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .is_frag = true,
+ .is_first_frag = true,
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ },
+ {
+ .name = "ipv6-no-frag",
+ .pkt.ipv6_frag = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_FRAGMENT,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .ipf.nexthdr = IPPROTO_TCP,
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr) +
+ sizeof(struct frag_hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .is_frag = true,
+ .is_first_frag = true,
+ },
+ },
+ {
+ .name = "ipv6-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0xb, 0xee, 0xef },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ .flow_label = __bpf_constant_htonl(0xbeeef),
},
},
+ {
+ .name = "ipv6-no-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0xb, 0xee, 0xef },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .flow_label = __bpf_constant_htonl(0xbeeef),
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ },
+ {
+ .name = "ipip-encap",
+ .pkt.ipip = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_IPIP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES) -
+ sizeof(struct iphdr),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr) +
+ sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_encap = true,
+ .sport = 80,
+ .dport = 8080,
+ },
+ },
+ {
+ .name = "ipip-no-encap",
+ .pkt.ipip = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_IPIP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES) -
+ sizeof(struct iphdr),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_IPIP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_encap = true,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ },
};
static int create_tap(const char *ifname)
@@ -212,10 +451,8 @@ void test_flow_dissector(void)
err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
"jmp_table", "last_dissection", &prog_fd, &keys_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
@@ -225,6 +462,13 @@ void test_flow_dissector(void)
.data_size_in = sizeof(tests[i].pkt),
.data_out = &flow_keys,
};
+ static struct bpf_flow_keys ctx = {};
+
+ if (tests[i].flags) {
+ tattr.ctx_in = &ctx;
+ tattr.ctx_size_in = sizeof(ctx);
+ ctx.flags = tests[i].flags;
+ }
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
@@ -251,9 +495,20 @@ void test_flow_dissector(void)
CHECK(err, "ifup", "err %d errno %d\n", err, errno);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- struct bpf_flow_keys flow_keys = {};
+ /* Keep in sync with 'flags' from eth_get_headlen. */
+ __u32 eth_get_headlen_flags =
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
struct bpf_prog_test_run_attr tattr = {};
- __u32 key = 0;
+ struct bpf_flow_keys flow_keys = {};
+ __u32 key = (__u32)(tests[i].keys.sport) << 16 |
+ tests[i].keys.dport;
+
+ /* For skb-less case we can't pass input flags; run
+ * only the tests that have a matching set of flags.
+ */
+
+ if (tests[i].flags != eth_get_headlen_flags)
+ continue;
err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
@@ -263,6 +518,9 @@ void test_flow_dissector(void)
CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+ err = bpf_map_delete_elem(keys_fd, &key);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
}
bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
new file mode 100644
index 000000000000..1f51ba66b98b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that the flow_dissector program can be updated with a single
+ * syscall by attaching a new program that replaces the existing one.
+ *
+ * Corner case - the same program cannot be attached twice.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+
+static bool is_attached(int netns)
+{
+ __u32 cnt;
+ int err;
+
+ err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_query");
+ return true; /* fail-safe */
+ }
+
+ return cnt > 0;
+}
+
+static int load_prog(void)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
+ BPF_EXIT_INSN(),
+ };
+ int fd;
+
+ fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ if (CHECK_FAIL(fd < 0))
+ perror("bpf_load_program");
+
+ return fd;
+}
+
+static void do_flow_dissector_reattach(void)
+{
+ int prog_fd[2] = { -1, -1 };
+ int err;
+
+ prog_fd[0] = load_prog();
+ if (prog_fd[0] < 0)
+ return;
+
+ prog_fd[1] = load_prog();
+ if (prog_fd[1] < 0)
+ goto out_close;
+
+ err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach-0");
+ goto out_close;
+ }
+
+ /* Expect success when attaching a different program */
+ err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach-1");
+ goto out_detach;
+ }
+
+ /* Expect failure when attaching the same program twice */
+ err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_prog_attach-2");
+
+out_detach:
+ err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err))
+ perror("bpf_prog_detach");
+
+out_close:
+ close(prog_fd[1]);
+ close(prog_fd[0]);
+}
+
+void test_flow_dissector_reattach(void)
+{
+ int init_net, self_net, err;
+
+ self_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(self_net < 0)) {
+ perror("open(/proc/self/ns/net");
+ return;
+ }
+
+ init_net = open("/proc/1/ns/net", O_RDONLY);
+ if (CHECK_FAIL(init_net < 0)) {
+ perror("open(/proc/1/ns/net)");
+ goto out_close;
+ }
+
+ err = setns(init_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(/proc/1/ns/net)");
+ goto out_close;
+ }
+
+ if (is_attached(init_net)) {
+ test__skip();
+ printf("Can't test with flow dissector attached to init_net\n");
+ goto out_setns;
+ }
+
+ /* First run tests in root network namespace */
+ do_flow_dissector_reattach();
+
+ /* Then repeat tests in a non-root namespace */
+ err = unshare(CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("unshare(CLONE_NEWNET)");
+ goto out_setns;
+ }
+ do_flow_dissector_reattach();
+
+out_setns:
+ /* Move back to netns we started in. */
+ err = setns(self_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err))
+ perror("setns(/proc/self/ns/net)");
+
+out_close:
+ close(init_net);
+ close(self_net);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index c2a0a9d5591b..eba9a970703b 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -1,8 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
#include <test_progs.h>
#define MAX_CNT_RAWTP 10ull
#define MAX_STACK_RAWTP 100
+
+static int duration = 0;
+
struct get_stack_trace_t {
int pid;
int kern_stack_size;
@@ -13,7 +20,7 @@ struct get_stack_trace_t {
struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
};
-static int get_stack_print_output(void *data, int size)
+static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
{
bool good_kern_stack = false, good_user_stack = false;
const char *nonjit_func = "___bpf_prog_run";
@@ -34,7 +41,7 @@ static int get_stack_print_output(void *data, int size)
* just assume it is good if the stack is not empty.
* This could be improved in the future.
*/
- if (jit_enabled) {
+ if (env.jit_enabled) {
found = num_stack > 0;
} else {
for (i = 0; i < num_stack; i++) {
@@ -51,7 +58,7 @@ static int get_stack_print_output(void *data, int size)
}
} else {
num_stack = e->kern_stack_size / sizeof(__u64);
- if (jit_enabled) {
+ if (env.jit_enabled) {
good_kern_stack = num_stack > 0;
} else {
for (i = 0; i < num_stack; i++) {
@@ -65,75 +72,73 @@ static int get_stack_print_output(void *data, int size)
if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
good_user_stack = true;
}
- if (!good_kern_stack || !good_user_stack)
- return LIBBPF_PERF_EVENT_ERROR;
- if (cnt == MAX_CNT_RAWTP)
- return LIBBPF_PERF_EVENT_DONE;
-
- return LIBBPF_PERF_EVENT_CONT;
+ if (!good_kern_stack)
+ CHECK(!good_kern_stack, "kern_stack", "corrupted kernel stack\n");
+ if (!good_user_stack)
+ CHECK(!good_user_stack, "user_stack", "corrupted user stack\n");
}
void test_get_stack_raw_tp(void)
{
const char *file = "./test_get_stack_rawtp.o";
- int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
- struct perf_event_attr attr = {};
+ const char *prog_name = "raw_tracepoint/sys_enter";
+ int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
+ struct perf_buffer_opts pb_opts = {};
+ struct perf_buffer *pb = NULL;
+ struct bpf_link *link = NULL;
struct timespec tv = {0, 10};
- __u32 key = 0, duration = 0;
+ struct bpf_program *prog;
struct bpf_object *obj;
+ struct bpf_map *map;
+ cpu_set_t cpu_set;
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
- if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
goto close_prog;
- perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
- if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
- perfmap_fd, errno))
+ map = bpf_object__find_map_by_name(obj, "perfmap");
+ if (CHECK(!map, "bpf_find_map", "not found\n"))
goto close_prog;
err = load_kallsyms();
if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
goto close_prog;
- attr.sample_type = PERF_SAMPLE_RAW;
- attr.type = PERF_TYPE_SOFTWARE;
- attr.config = PERF_COUNT_SW_BPF_OUTPUT;
- pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
- -1/*group_fd*/, 0);
- if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
- errno))
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
goto close_prog;
- err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
- if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
- errno))
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
goto close_prog;
- err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
- if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
- err, errno))
- goto close_prog;
-
- err = perf_event_mmap(pmu_fd);
- if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+ pb_opts.sample_cb = get_stack_print_output;
+ pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
goto close_prog;
/* trigger some syscall action */
for (i = 0; i < MAX_CNT_RAWTP; i++)
nanosleep(&tv, NULL);
- err = perf_event_poller(pmu_fd, get_stack_print_output);
- if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
- goto close_prog;
+ while (exp_cnt > 0) {
+ err = perf_buffer__poll(pb, 100);
+ if (err < 0 && CHECK(err < 0, "pb__poll", "err %d\n", err))
+ goto close_prog;
+ exp_cnt -= err;
+ }
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
+ if (!IS_ERR_OR_NULL(link))
+ bpf_link__destroy(link);
+ if (!IS_ERR_OR_NULL(pb))
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index d011079fb0bf..c680926fce73 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -7,10 +7,8 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
uint64_t num;
map_fd = bpf_find_map(__func__, obj, "result_number");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -44,10 +42,8 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
char str[32];
map_fd = bpf_find_map(__func__, obj, "result_string");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -81,10 +77,8 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
struct foo val;
map_fd = bpf_find_map(__func__, obj, "result_struct");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -112,16 +106,12 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
__u8 *buff;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
- if (!map || !bpf_map__is_internal(map)) {
- error_cnt++;
+ if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
return;
- }
map_fd = bpf_map__fd(map);
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
buff = malloc(bpf_map__def(map)->value_size);
if (buff)
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
new file mode 100644
index 000000000000..7507c8f689bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+struct meta {
+ int ifindex;
+ __u32 cb32_0;
+ __u8 cb8_0;
+};
+
+static union {
+ __u32 cb32[5];
+ __u8 cb8[20];
+} cb = {
+ .cb32[0] = 0x81828384,
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+ struct meta *meta = (struct meta *)data;
+ struct ipv6_packet *pkt_v6 = data + sizeof(*meta);
+ int duration = 0;
+
+ if (CHECK(size != 72 + sizeof(*meta), "check_size", "size %u != %zu\n",
+ size, 72 + sizeof(*meta)))
+ return;
+ if (CHECK(meta->ifindex != 1, "check_meta_ifindex",
+ "meta->ifindex = %d\n", meta->ifindex))
+ /* spurious kfree_skb not on loopback device */
+ return;
+ if (CHECK(meta->cb8_0 != cb.cb8[0], "check_cb8_0", "cb8_0 %x != %x\n",
+ meta->cb8_0, cb.cb8[0]))
+ return;
+ if (CHECK(meta->cb32_0 != cb.cb32[0], "check_cb32_0",
+ "cb32_0 %x != %x\n",
+ meta->cb32_0, cb.cb32[0]))
+ return;
+ if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+ "h_proto %x\n", pkt_v6->eth.h_proto))
+ return;
+ if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
+ "iph.nexthdr %x\n", pkt_v6->iph.nexthdr))
+ return;
+ if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp",
+ "tcp.doff %x\n", pkt_v6->tcp.doff))
+ return;
+
+ *(bool *)ctx = true;
+}
+
+void test_kfree_skb(void)
+{
+ struct __sk_buff skb = {};
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ };
+ struct bpf_prog_load_attr attr = {
+ .file = "./kfree_skb.o",
+ };
+
+ struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL;
+ struct bpf_map *perf_buf_map, *global_data;
+ struct bpf_program *prog, *fentry, *fexit;
+ struct bpf_object *obj, *obj2 = NULL;
+ struct perf_buffer_opts pb_opts = {};
+ struct perf_buffer *pb = NULL;
+ int err, kfree_skb_fd;
+ bool passed = false;
+ __u32 duration = 0;
+ const int zero = 0;
+ bool test_ok[2];
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &tattr.prog_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+
+ err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
+ if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
+ goto close_prog;
+ fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans");
+ if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n"))
+ goto close_prog;
+ fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans");
+ if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
+ goto close_prog;
+
+ global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss");
+ if (CHECK(!global_data, "find global data", "not found\n"))
+ goto close_prog;
+
+ link = bpf_program__attach_raw_tracepoint(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ goto close_prog;
+ link_fentry = bpf_program__attach_trace(fentry);
+ if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
+ PTR_ERR(link_fentry)))
+ goto close_prog;
+ link_fexit = bpf_program__attach_trace(fexit);
+ if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
+ PTR_ERR(link_fexit)))
+ goto close_prog;
+
+ perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
+ if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ goto close_prog;
+
+ /* set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+ pb_opts.ctx = &passed;
+ pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ goto close_prog;
+
+ memcpy(skb.cb, &cb, sizeof(cb));
+ err = bpf_prog_test_run_xattr(&tattr);
+ duration = tattr.duration;
+ CHECK(err || tattr.retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, tattr.retval, duration);
+
+ /* read perf buffer */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+ goto close_prog;
+
+ /* make sure kfree_skb program was triggered
+ * and it sent expected skb into ring buffer
+ */
+ CHECK_FAIL(!passed);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto close_prog;
+
+ CHECK_FAIL(!test_ok[0] || !test_ok[1]);
+close_prog:
+ perf_buffer__free(pb);
+ if (!IS_ERR_OR_NULL(link))
+ bpf_link__destroy(link);
+ if (!IS_ERR_OR_NULL(link_fentry))
+ bpf_link__destroy(link_fentry);
+ if (!IS_ERR_OR_NULL(link_fexit))
+ bpf_link__destroy(link_fexit);
+ bpf_object__close(obj);
+ bpf_object__close(obj2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 20ddca830e68..eaf64595be88 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -30,10 +30,8 @@ static void test_l4lb(const char *file)
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
@@ -72,10 +70,9 @@ static void test_l4lb(const char *file)
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
- error_cnt++;
+ if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+ pkts != NUM_ITER * 2))
printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
- }
out:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ee99368c595c..8f91f1881d11 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -8,14 +8,12 @@ static void *parallel_map_access(void *arg)
for (i = 0; i < 10000; i++) {
err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("lookup failed\n");
- error_cnt++;
goto out;
}
- if (vars[0] != 0) {
+ if (CHECK_FAIL(vars[0] != 0)) {
printf("lookup #%d var[0]=%d\n", i, vars[0]);
- error_cnt++;
goto out;
}
rnd = vars[1];
@@ -24,7 +22,7 @@ static void *parallel_map_access(void *arg)
continue;
printf("lookup #%d var[1]=%d var[%d]=%d\n",
i, rnd, j, vars[j]);
- error_cnt++;
+ CHECK_FAIL(vars[j] != rnd);
goto out;
}
}
@@ -42,34 +40,36 @@ void test_map_lock(void)
void *ret;
err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("test_map_lock:bpf_prog_load errno %d\n", errno);
goto close_prog;
}
map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
- if (map_fd[0] < 0)
+ if (CHECK_FAIL(map_fd[0] < 0))
goto close_prog;
map_fd[1] = bpf_find_map(__func__, obj, "array_map");
- if (map_fd[1] < 0)
+ if (CHECK_FAIL(map_fd[1] < 0))
goto close_prog;
bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK);
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &parallel_map_access, &map_fd[i - 4]) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &parallel_map_access,
+ &map_fd[i - 4])))
+ goto close_prog;
for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&map_fd[i - 4]);
- goto close_prog_noerr;
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&map_fd[i - 4]))
+ goto close_prog;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
new file mode 100644
index 000000000000..16a814eb4d64
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <sys/mman.h>
+#include "test_mmap.skel.h"
+
+struct map_data {
+ __u64 val[512 * 4];
+};
+
+static size_t roundup_page(size_t sz)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ return (sz + page_size - 1) / page_size * page_size;
+}
+
+void test_mmap(void)
+{
+ const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss));
+ const size_t map_sz = roundup_page(sizeof(struct map_data));
+ const int zero = 0, one = 1, two = 2, far = 1500;
+ const long page_size = sysconf(_SC_PAGE_SIZE);
+ int err, duration = 0, i, data_map_fd;
+ struct bpf_map *data_map, *bss_map;
+ void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+ struct test_mmap__bss *bss_data;
+ struct map_data *map_data;
+ struct test_mmap *skel;
+ __u64 val = 0;
+
+
+ skel = test_mmap__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ return;
+
+ bss_map = skel->maps.bss;
+ data_map = skel->maps.data_map;
+ data_map_fd = bpf_map__fd(data_map);
+
+ bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bpf_map__fd(bss_map), 0);
+ if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap",
+ ".bss mmap failed: %d\n", errno)) {
+ bss_mmaped = NULL;
+ goto cleanup;
+ }
+ /* map as R/W first */
+ map_mmaped = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(map_mmaped == MAP_FAILED, "data_mmap",
+ "data_map mmap failed: %d\n", errno)) {
+ map_mmaped = NULL;
+ goto cleanup;
+ }
+
+ bss_data = bss_mmaped;
+ map_data = map_mmaped;
+
+ CHECK_FAIL(bss_data->in_val);
+ CHECK_FAIL(bss_data->out_val);
+ CHECK_FAIL(skel->bss->in_val);
+ CHECK_FAIL(skel->bss->out_val);
+ CHECK_FAIL(map_data->val[0]);
+ CHECK_FAIL(map_data->val[1]);
+ CHECK_FAIL(map_data->val[2]);
+ CHECK_FAIL(map_data->val[far]);
+
+ err = test_mmap__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+ goto cleanup;
+
+ bss_data->in_val = 123;
+ val = 111;
+ CHECK_FAIL(bpf_map_update_elem(data_map_fd, &zero, &val, 0));
+
+ usleep(1);
+
+ CHECK_FAIL(bss_data->in_val != 123);
+ CHECK_FAIL(bss_data->out_val != 123);
+ CHECK_FAIL(skel->bss->in_val != 123);
+ CHECK_FAIL(skel->bss->out_val != 123);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 123);
+ CHECK_FAIL(map_data->val[far] != 3 * 123);
+
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &zero, &val));
+ CHECK_FAIL(val != 111);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &one, &val));
+ CHECK_FAIL(val != 222);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &two, &val));
+ CHECK_FAIL(val != 123);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &far, &val));
+ CHECK_FAIL(val != 3 * 123);
+
+ /* data_map freeze should fail due to R/W mmap() */
+ err = bpf_map_freeze(data_map_fd);
+ if (CHECK(!err || errno != EBUSY, "no_freeze",
+ "data_map freeze succeeded: err=%d, errno=%d\n", err, errno))
+ goto cleanup;
+
+ /* unmap R/W mapping */
+ err = munmap(map_mmaped, map_sz);
+ map_mmaped = NULL;
+ if (CHECK(err, "data_map_munmap", "data_map munmap failed: %d\n", errno))
+ goto cleanup;
+
+ /* re-map as R/O now */
+ map_mmaped = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0);
+ if (CHECK(map_mmaped == MAP_FAILED, "data_mmap",
+ "data_map R/O mmap failed: %d\n", errno)) {
+ map_mmaped = NULL;
+ goto cleanup;
+ }
+ map_data = map_mmaped;
+
+ /* map/unmap in a loop to test ref counting */
+ for (i = 0; i < 10; i++) {
+ int flags = i % 2 ? PROT_READ : PROT_WRITE;
+ void *p;
+
+ p = mmap(NULL, map_sz, flags, MAP_SHARED, data_map_fd, 0);
+ if (CHECK_FAIL(p == MAP_FAILED))
+ goto cleanup;
+ err = munmap(p, map_sz);
+ if (CHECK_FAIL(err))
+ goto cleanup;
+ }
+
+ /* data_map freeze should now succeed due to no R/W mapping */
+ err = bpf_map_freeze(data_map_fd);
+ if (CHECK(err, "freeze", "data_map freeze failed: err=%d, errno=%d\n",
+ err, errno))
+ goto cleanup;
+
+ /* mapping as R/W now should fail */
+ tmp1 = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(tmp1 != MAP_FAILED, "data_mmap", "mmap succeeded\n")) {
+ munmap(tmp1, map_sz);
+ goto cleanup;
+ }
+
+ bss_data->in_val = 321;
+ usleep(1);
+ CHECK_FAIL(bss_data->in_val != 321);
+ CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(skel->bss->in_val != 321);
+ CHECK_FAIL(skel->bss->out_val != 321);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 321);
+ CHECK_FAIL(map_data->val[far] != 3 * 321);
+
+ /* check some more advanced mmap() manipulations */
+
+ /* map all but last page: pages 1-3 mapped */
+ tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+ goto cleanup;
+
+ /* unmap second page: pages 1, 3 mapped */
+ err = munmap(tmp1 + page_size, page_size);
+ if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
+ munmap(tmp1, map_sz);
+ goto cleanup;
+ }
+
+ /* map page 2 back */
+ tmp2 = mmap(tmp1 + page_size, page_size, PROT_READ,
+ MAP_SHARED | MAP_FIXED, data_map_fd, 0);
+ if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
+ munmap(tmp1, page_size);
+ munmap(tmp1 + 2*page_size, page_size);
+ goto cleanup;
+ }
+ CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
+ "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
+
+ /* re-map all 4 pages */
+ tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
+ data_map_fd, 0);
+ if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
+ munmap(tmp1, 3 * page_size); /* unmap page 1 */
+ goto cleanup;
+ }
+ CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
+
+ map_data = tmp2;
+ CHECK_FAIL(bss_data->in_val != 321);
+ CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(skel->bss->in_val != 321);
+ CHECK_FAIL(skel->bss->out_val != 321);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 321);
+ CHECK_FAIL(map_data->val[far] != 3 * 321);
+
+ munmap(tmp2, 4 * page_size);
+cleanup:
+ if (bss_mmaped)
+ CHECK_FAIL(munmap(bss_mmaped, bss_sz));
+ if (map_mmaped)
+ CHECK_FAIL(munmap(map_mmaped, map_sz));
+ test_mmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 3003fddc0613..1450ea2dd4cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,6 +4,7 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
@@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
void test_perf_buffer(void)
{
- int err, prog_fd, nr_cpus, i, duration = 0;
+ int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
const char *prog_name = "kprobe/sys_nanosleep";
const char *file = "./test_perf_buffer.o";
struct perf_buffer_opts pb_opts = {};
@@ -29,15 +30,27 @@ void test_perf_buffer(void)
struct bpf_object *obj;
struct perf_buffer *pb;
struct bpf_link *link;
+ bool *online;
nr_cpus = libbpf_num_possible_cpus();
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
return;
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+ &online, &on_len);
+ if (CHECK(err, "nr_on_cpus", "err %d\n", err))
+ return;
+
+ for (i = 0; i < on_len; i++)
+ if (online[i])
+ nr_on_cpus++;
+
/* load program */
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
- return;
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out_close;
+ }
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
@@ -64,6 +77,11 @@ void test_perf_buffer(void)
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i]) {
+ printf("skipping offline CPU #%d\n", i);
+ continue;
+ }
+
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
@@ -81,8 +99,8 @@ void test_perf_buffer(void)
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out_free_pb;
- if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
- "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
+ if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt",
+ "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
out_free_pb:
@@ -91,4 +109,5 @@ out_detach:
bpf_link__destroy(link);
out_close:
bpf_object__close(obj);
+ free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
new file mode 100644
index 000000000000..041952524c55
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+__u32 get_map_id(struct bpf_object *obj, const char *name)
+{
+ struct bpf_map_info map_info = {};
+ __u32 map_info_len, duration = 0;
+ struct bpf_map *map;
+ int err;
+
+ map_info_len = sizeof(map_info);
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (CHECK(!map, "find map", "NULL map"))
+ return 0;
+
+ err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
+ &map_info, &map_info_len);
+ CHECK(err, "get map info", "err %d errno %d", err, errno);
+ return map_info.id;
+}
+
+void test_pinning(void)
+{
+ const char *file_invalid = "./test_pinning_invalid.o";
+ const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
+ const char *nopinpath = "/sys/fs/bpf/nopinmap";
+ const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
+ const char *custpath = "/sys/fs/bpf/custom";
+ const char *pinpath = "/sys/fs/bpf/pinmap";
+ const char *file = "./test_pinning.o";
+ __u32 map_id, map_id2, duration = 0;
+ struct stat statbuf = {};
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .pin_root_path = custpath,
+ );
+
+ /* check that opening fails with invalid pinning value in map def */
+ obj = bpf_object__open_file(file_invalid, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ /* open the valid object file */
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that pinmap was pinned */
+ err = stat(pinpath, &statbuf);
+ if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that nopinmap was *not* pinned */
+ err = stat(nopinpath, &statbuf);
+ if (CHECK(!err || errno != ENOENT, "stat nopinpath",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that nopinmap2 was *not* pinned */
+ err = stat(nopinpath2, &statbuf);
+ if (CHECK(!err || errno != ENOENT, "stat nopinpath2",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ map_id = get_map_id(obj, "pinmap");
+ if (!map_id)
+ goto out;
+
+ bpf_object__close(obj);
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK_FAIL(libbpf_get_error(obj))) {
+ obj = NULL;
+ goto out;
+ }
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "default load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that same map ID was reused for second load */
+ map_id2 = get_map_id(obj, "pinmap");
+ if (CHECK(map_id != map_id2, "check reuse",
+ "err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2))
+ goto out;
+
+ /* should be no-op to re-pin same map */
+ map = bpf_object__find_map_by_name(obj, "pinmap");
+ if (CHECK(!map, "find map", "NULL map"))
+ goto out;
+
+ err = bpf_map__pin(map, NULL);
+ if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* but error to pin at different location */
+ err = bpf_map__pin(map, "/sys/fs/bpf/other");
+ if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* unpin maps with a pin_path set */
+ err = bpf_object__unpin_maps(obj, NULL);
+ if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* and re-pin them... */
+ err = bpf_object__pin_maps(obj, NULL);
+ if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* set pinning path of other map and re-pin all */
+ map = bpf_object__find_map_by_name(obj, "nopinmap");
+ if (CHECK(!map, "find map", "NULL map"))
+ goto out;
+
+ err = bpf_map__set_pin_path(map, custpinpath);
+ if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* should only pin the one unpinned map */
+ err = bpf_object__pin_maps(obj, NULL);
+ if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that nopinmap was pinned at the custom path */
+ err = stat(custpinpath, &statbuf);
+ if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* remove the custom pin path to re-test it with auto-pinning below */
+ err = unlink(custpinpath);
+ if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ err = rmdir(custpath);
+ if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+ goto out;
+
+ bpf_object__close(obj);
+
+ /* open the valid object file again */
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ /* set pin paths so that nopinmap2 will attempt to reuse the map at
+ * pinpath (which will fail), but not before pinmap has already been
+ * reused
+ */
+ bpf_object__for_each_map(map, obj) {
+ if (!strcmp(bpf_map__name(map), "nopinmap"))
+ err = bpf_map__set_pin_path(map, nopinpath2);
+ else if (!strcmp(bpf_map__name(map), "nopinmap2"))
+ err = bpf_map__set_pin_path(map, pinpath);
+ else
+ continue;
+
+ if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+ goto out;
+ }
+
+ /* should fail because of map parameter mismatch */
+ err = bpf_object__load(obj);
+ if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* nopinmap2 should have been pinned and cleaned up again */
+ err = stat(nopinpath2, &statbuf);
+ if (CHECK(!err || errno != ENOENT, "stat nopinpath2",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* pinmap should still be there */
+ err = stat(pinpath, &statbuf);
+ if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ bpf_object__close(obj);
+
+ /* test auto-pinning at custom path with open opt */
+ obj = bpf_object__open_file(file, &opts);
+ if (CHECK_FAIL(libbpf_get_error(obj))) {
+ obj = NULL;
+ goto out;
+ }
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* check that pinmap was pinned at the custom path */
+ err = stat(custpinpath, &statbuf);
+ if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+out:
+ unlink(pinpath);
+ unlink(nopinpath);
+ unlink(nopinpath2);
+ unlink(custpinpath);
+ rmdir(custpath);
+ if (obj)
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 4ecfd721a044..a2537dfa899c 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -9,10 +9,8 @@ void test_pkt_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index ac0d43435806..5f7aea605019 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -9,10 +9,8 @@ void test_pkt_md_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
new file mode 100644
index 000000000000..7aecfd9e87d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_probe_user(void)
+{
+ const char *prog_name = "kprobe/__sys_connect";
+ const char *obj_file = "./test_probe_user.o";
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
+ int err, results_map_fd, sock_fd, duration = 0;
+ struct sockaddr curr, orig, tmp;
+ struct sockaddr_in *in = (struct sockaddr_in *)&curr;
+ struct bpf_link *kprobe_link = NULL;
+ struct bpf_program *kprobe_prog;
+ struct bpf_object *obj;
+ static const int zero = 0;
+
+ obj = bpf_object__open_file(obj_file, &opts);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ return;
+
+ kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!kprobe_prog, "find_probe",
+ "prog '%s' not found\n", prog_name))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup;
+
+ results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
+ if (CHECK(results_map_fd < 0, "find_bss_map",
+ "err %d\n", results_map_fd))
+ goto cleanup;
+
+ kprobe_link = bpf_program__attach(kprobe_prog);
+ if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
+ "err %ld\n", PTR_ERR(kprobe_link))) {
+ kprobe_link = NULL;
+ goto cleanup;
+ }
+
+ memset(&curr, 0, sizeof(curr));
+ in->sin_family = AF_INET;
+ in->sin_port = htons(5555);
+ in->sin_addr.s_addr = inet_addr("255.255.255.255");
+ memcpy(&orig, &curr, sizeof(curr));
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
+ goto cleanup;
+
+ connect(sock_fd, &curr, sizeof(curr));
+ close(sock_fd);
+
+ err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
+ if (CHECK(err, "get_kprobe_res",
+ "failed to get kprobe res: %d\n", err))
+ goto cleanup;
+
+ in = (struct sockaddr_in *)&tmp;
+ if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
+ "wrong kprobe res from probe read: %s:%u\n",
+ inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+ goto cleanup;
+
+ memset(&tmp, 0xab, sizeof(tmp));
+
+ in = (struct sockaddr_in *)&curr;
+ if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
+ "wrong kprobe res from probe write: %s:%u\n",
+ inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+ goto cleanup;
+cleanup:
+ bpf_link__destroy(kprobe_link);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index e60cd5ff1f55..faccc66f4e39 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -27,10 +27,8 @@ static void test_queue_stack_map_by_type(int type)
return;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_in_fd = bpf_find_map(__func__, obj, "map_in");
if (map_in_fd < 0)
@@ -43,10 +41,8 @@ static void test_queue_stack_map_by_type(int type)
/* Push 32 elements to the input map */
for (i = 0; i < MAP_SIZE; i++) {
err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
goto out;
- }
}
/* The eBPF program pushes iph.saddr in the output map,
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
new file mode 100644
index 000000000000..563e12120e77
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+struct bss {
+ unsigned did_run;
+ unsigned iters;
+ unsigned sum;
+};
+
+struct rdonly_map_subtest {
+ const char *subtest_name;
+ const char *prog_name;
+ unsigned exp_iters;
+ unsigned exp_sum;
+};
+
+void test_rdonly_maps(void)
+{
+ const char *file = "test_rdonly_maps.o";
+ struct rdonly_map_subtest subtests[] = {
+ { "skip loop", "skip_loop", 0, 0 },
+ { "part loop", "part_loop", 3, 2 + 3 + 4 },
+ { "full loop", "full_loop", 4, 2 + 3 + 4 + 5 },
+ };
+ int i, err, zero = 0, duration = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct bpf_object *obj;
+ struct bss bss;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ return;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss");
+ if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(subtests); i++) {
+ const struct rdonly_map_subtest *t = &subtests[i];
+
+ if (!test__start_subtest(t->subtest_name))
+ continue;
+
+ prog = bpf_object__find_program_by_name(obj, t->prog_name);
+ if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
+ t->prog_name))
+ goto cleanup;
+
+ memset(&bss, 0, sizeof(bss));
+ err = bpf_map_update_elem(bpf_map__fd(bss_map), &zero, &bss, 0);
+ if (CHECK(err, "set_bss", "failed to set bss data: %d\n", err))
+ goto cleanup;
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
+ t->prog_name, PTR_ERR(link))) {
+ link = NULL;
+ goto cleanup;
+ }
+
+ /* trigger probe */
+ usleep(1);
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, &bss);
+ if (CHECK(err, "get_bss", "failed to get bss data: %d\n", err))
+ goto cleanup;
+ if (CHECK(bss.did_run == 0, "check_run",
+ "prog '%s' didn't run?\n", t->prog_name))
+ goto cleanup;
+ if (CHECK(bss.iters != t->exp_iters, "check_iters",
+ "prog '%s' iters: %d, expected: %d\n",
+ t->prog_name, bss.iters, t->exp_iters))
+ goto cleanup;
+ if (CHECK(bss.sum != t->exp_sum, "check_sum",
+ "prog '%s' sum: %d, expected: %d\n",
+ t->prog_name, bss.sum, t->exp_sum))
+ goto cleanup;
+ }
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 5633be43828f..fc0d7f4f02cf 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,28 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-static int libbpf_debug_print(enum libbpf_print_level level,
- const char *format, va_list args)
-{
- if (level == LIBBPF_DEBUG)
- return 0;
-
- return vfprintf(stderr, format, args);
-}
-
void test_reference_tracking(void)
{
- const char *file = "./test_sk_lookup_kern.o";
+ const char *file = "test_sk_lookup_kern.o";
+ const char *obj_name = "ref_track";
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
+ .object_name = obj_name,
+ .relaxed_maps = true,
+ );
struct bpf_object *obj;
struct bpf_program *prog;
__u32 duration = 0;
int err = 0;
- obj = bpf_object__open(file);
- if (IS_ERR(obj)) {
- error_cnt++;
+ obj = bpf_object__open_file(file, &open_opts);
+ if (CHECK_FAIL(IS_ERR(obj)))
return;
- }
+
+ if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+ "wrong obj name '%s', expected '%s'\n",
+ bpf_object__name(obj), obj_name))
+ goto cleanup;
bpf_object__for_each_program(prog, obj) {
const char *title;
@@ -32,17 +31,22 @@ void test_reference_tracking(void)
if (strstr(title, ".text") != NULL)
continue;
- bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+ if (!test__start_subtest(title))
+ continue;
/* Expect verifier failure if test name has 'fail' */
if (strstr(title, "fail") != NULL) {
- libbpf_set_print(NULL);
+ libbpf_print_fn_t old_print_fn;
+
+ old_print_fn = libbpf_set_print(NULL);
err = !bpf_program__load(prog, "GPL", 0);
- libbpf_set_print(libbpf_debug_print);
+ libbpf_set_print(old_print_fn);
} else {
err = bpf_program__load(prog, "GPL", 0);
}
CHECK(err, title, "\n");
}
+
+cleanup:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 29833aeaf0de..9d9351dc2ded 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -1,10 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
+#include <test_progs.h>
-#include <err.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_util.h"
+static int duration = 0;
struct sec_name_test {
const char sec_name[32];
@@ -20,19 +18,23 @@ struct sec_name_test {
};
static struct sec_name_test tests[] = {
- {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} },
- {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+ {"InvAliD", {-ESRCH, 0, 0}, {-EINVAL, 0} },
+ {"cgroup", {-ESRCH, 0, 0}, {-EINVAL, 0} },
{"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
{"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+ {"uprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
{"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+ {"uretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
{"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
{"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
{"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+ {"tp/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
{
"raw_tracepoint/",
{0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
{-EINVAL, 0},
},
+ {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
{"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
@@ -146,7 +148,7 @@ static struct sec_name_test tests[] = {
},
};
-static int test_prog_type_by_name(const struct sec_name_test *test)
+static void test_prog_type_by_name(const struct sec_name_test *test)
{
enum bpf_attach_type expected_attach_type;
enum bpf_prog_type prog_type;
@@ -155,79 +157,47 @@ static int test_prog_type_by_name(const struct sec_name_test *test)
rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
&expected_attach_type);
- if (rc != test->expected_load.rc) {
- warnx("prog: unexpected rc=%d for %s", rc, test->sec_name);
- return -1;
- }
+ CHECK(rc != test->expected_load.rc, "check_code",
+ "prog: unexpected rc=%d for %s", rc, test->sec_name);
if (rc)
- return 0;
-
- if (prog_type != test->expected_load.prog_type) {
- warnx("prog: unexpected prog_type=%d for %s", prog_type,
- test->sec_name);
- return -1;
- }
+ return;
- if (expected_attach_type != test->expected_load.expected_attach_type) {
- warnx("prog: unexpected expected_attach_type=%d for %s",
- expected_attach_type, test->sec_name);
- return -1;
- }
+ CHECK(prog_type != test->expected_load.prog_type, "check_prog_type",
+ "prog: unexpected prog_type=%d for %s",
+ prog_type, test->sec_name);
- return 0;
+ CHECK(expected_attach_type != test->expected_load.expected_attach_type,
+ "check_attach_type", "prog: unexpected expected_attach_type=%d for %s",
+ expected_attach_type, test->sec_name);
}
-static int test_attach_type_by_name(const struct sec_name_test *test)
+static void test_attach_type_by_name(const struct sec_name_test *test)
{
enum bpf_attach_type attach_type;
int rc;
rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
- if (rc != test->expected_attach.rc) {
- warnx("attach: unexpected rc=%d for %s", rc, test->sec_name);
- return -1;
- }
+ CHECK(rc != test->expected_attach.rc, "check_ret",
+ "attach: unexpected rc=%d for %s", rc, test->sec_name);
if (rc)
- return 0;
-
- if (attach_type != test->expected_attach.attach_type) {
- warnx("attach: unexpected attach_type=%d for %s", attach_type,
- test->sec_name);
- return -1;
- }
+ return;
- return 0;
+ CHECK(attach_type != test->expected_attach.attach_type,
+ "check_attach_type", "attach: unexpected attach_type=%d for %s",
+ attach_type, test->sec_name);
}
-static int run_test_case(const struct sec_name_test *test)
+void test_section_names(void)
{
- if (test_prog_type_by_name(test))
- return -1;
- if (test_attach_type_by_name(test))
- return -1;
- return 0;
-}
-
-static int run_tests(void)
-{
- int passes = 0;
- int fails = 0;
int i;
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- if (run_test_case(&tests[i]))
- ++fails;
- else
- ++passes;
- }
- printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
- return fails ? -1 : 0;
-}
+ struct sec_name_test *test = &tests[i];
-int main(int argc, char **argv)
-{
- return run_tests();
+ test_prog_type_by_name(test);
+ test_attach_type_by_name(test);
+ }
}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 7566c13eb51a..098bcae5f827 100644
--- a/tools/testing/selftests/bpf/test_select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -20,8 +20,11 @@
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
+
+#include "test_progs.h"
#include "test_select_reuseport_common.h"
+#define MAX_TEST_NAME 80
#define MIN_TCPHDR_LEN 20
#define UDPHDR_LEN 8
@@ -30,13 +33,13 @@
#define REUSEPORT_ARRAY_SIZE 32
static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
-static enum result expected_results[NR_RESULTS];
+static __u32 expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
-static int reuseport_array, outer_map;
+static int reuseport_array = -1, outer_map = -1;
static int select_by_skb_data_prog;
-static int saved_tcp_syncookie;
+static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
-static int saved_tcp_fo;
+static int saved_tcp_fo = -1;
static __u32 index_zero;
static int epfd;
@@ -46,16 +49,21 @@ static union sa46 {
sa_family_t family;
} srv_sa;
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
- printf(format); \
- exit(-1); \
+#define RET_IF(condition, tag, format...) ({ \
+ if (CHECK_FAIL(condition)) { \
+ printf(tag " " format); \
+ return; \
+ } \
+})
+
+#define RET_ERR(condition, tag, format...) ({ \
+ if (CHECK_FAIL(condition)) { \
+ printf(tag " " format); \
+ return -1; \
} \
})
-static void create_maps(void)
+static int create_maps(void)
{
struct bpf_create_map_attr attr = {};
@@ -67,8 +75,8 @@ static void create_maps(void)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
- CHECK(reuseport_array == -1, "creating reuseport_array",
- "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+ RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ "reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
attr.name = "outer_map";
@@ -78,63 +86,61 @@ static void create_maps(void)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
- CHECK(outer_map == -1, "creating outer_map",
- "outer_map:%d errno:%d\n", outer_map, errno);
+ RET_ERR(outer_map == -1, "creating outer_map",
+ "outer_map:%d errno:%d\n", outer_map, errno);
+
+ return 0;
}
-static void prepare_bpf_obj(void)
+static int prepare_bpf_obj(void)
{
struct bpf_program *prog;
struct bpf_map *map;
int err;
- struct bpf_object_open_attr attr = {
- .file = "test_select_reuseport_kern.o",
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- };
-
- obj = bpf_object__open_xattr(&attr);
- CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
- prog = bpf_program__next(NULL, obj);
- CHECK(!prog, "get first bpf_program", "!prog\n");
- bpf_program__set_type(prog, attr.prog_type);
+ obj = bpf_object__open("test_select_reuseport_kern.o");
+ RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
map = bpf_object__find_map_by_name(obj, "outer_map");
- CHECK(!map, "find outer_map", "!map\n");
+ RET_ERR(!map, "find outer_map", "!map\n");
err = bpf_map__reuse_fd(map, outer_map);
- CHECK(err, "reuse outer_map", "err:%d\n", err);
+ RET_ERR(err, "reuse outer_map", "err:%d\n", err);
err = bpf_object__load(obj);
- CHECK(err, "load bpf_object", "err:%d\n", err);
+ RET_ERR(err, "load bpf_object", "err:%d\n", err);
+ prog = bpf_program__next(NULL, obj);
+ RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- CHECK(select_by_skb_data_prog == -1, "get prog fd",
- "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+ RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
- CHECK(!map, "find result_map", "!map\n");
+ RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- CHECK(result_map == -1, "get result_map fd",
- "result_map:%d\n", result_map);
+ RET_ERR(result_map == -1, "get result_map fd",
+ "result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
- CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+ RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
- "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+ RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
- CHECK(!map, "find linum_map", "!map\n");
+ RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- CHECK(linum_map == -1, "get linum_map fd",
- "linum_map:%d\n", linum_map);
+ RET_ERR(linum_map == -1, "get linum_map fd",
+ "linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
- CHECK(!map, "find data_check_map", "!map\n");
+ RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- CHECK(data_check_map == -1, "get data_check_map fd",
- "data_check_map:%d\n", data_check_map);
+ RET_ERR(data_check_map == -1, "get data_check_map fd",
+ "data_check_map:%d\n", data_check_map);
+
+ return 0;
}
static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
@@ -163,65 +169,73 @@ static int read_int_sysctl(const char *sysctl)
int fd, ret;
fd = open(sysctl, 0);
- CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
- sysctl, fd, errno);
+ RET_ERR(fd == -1, "open(sysctl)",
+ "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
ret = read(fd, buf, sizeof(buf));
- CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
- sysctl, ret, errno);
- close(fd);
+ RET_ERR(ret <= 0, "read(sysctl)",
+ "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
+ close(fd);
return atoi(buf);
}
-static void write_int_sysctl(const char *sysctl, int v)
+static int write_int_sysctl(const char *sysctl, int v)
{
int fd, ret, size;
char buf[16];
fd = open(sysctl, O_RDWR);
- CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
- sysctl, fd, errno);
+ RET_ERR(fd == -1, "open(sysctl)",
+ "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
size = snprintf(buf, sizeof(buf), "%d", v);
ret = write(fd, buf, size);
- CHECK(ret != size, "write(sysctl)",
- "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+ RET_ERR(ret != size, "write(sysctl)",
+ "sysctl:%s ret:%d size:%d errno:%d\n",
+ sysctl, ret, size, errno);
+
close(fd);
+ return 0;
}
static void restore_sysctls(void)
{
- write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+ if (saved_tcp_fo != -1)
+ write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+ if (saved_tcp_syncookie != -1)
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
}
-static void enable_fastopen(void)
+static int enable_fastopen(void)
{
int fo;
fo = read_int_sysctl(TCP_FO_SYSCTL);
- write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+ if (fo < 0)
+ return -1;
+
+ return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
}
-static void enable_syncookie(void)
+static int enable_syncookie(void)
{
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+ return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
}
-static void disable_syncookie(void)
+static int disable_syncookie(void)
{
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+ return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
}
-static __u32 get_linum(void)
+static long get_linum(void)
{
__u32 linum;
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
- err, errno);
+ RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ err, errno);
return linum;
}
@@ -237,12 +251,12 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
- err, errno);
+ RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
- err, errno);
+ RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ err, errno);
if (type == SOCK_STREAM) {
expected.len = MIN_TCPHDR_LEN;
@@ -284,22 +298,42 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
printf("expected: (0x%x, %u, %u)\n",
expected.eth_protocol, expected.ip_protocol,
expected.bind_inany);
- CHECK(1, "data_check result != expected",
- "bpf_prog_linum:%u\n", get_linum());
+ RET_IF(1, "data_check result != expected",
+ "bpf_prog_linum:%ld\n", get_linum());
}
- CHECK(!result.hash, "data_check result.hash empty",
- "result.hash:%u", result.hash);
+ RET_IF(!result.hash, "data_check result.hash empty",
+ "result.hash:%u", result.hash);
expected.len += cmd ? sizeof(*cmd) : 0;
if (type == SOCK_STREAM)
- CHECK(expected.len > result.len, "expected.len > result.len",
- "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
- expected.len, result.len, get_linum());
+ RET_IF(expected.len > result.len, "expected.len > result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
+ expected.len, result.len, get_linum());
else
- CHECK(expected.len != result.len, "expected.len != result.len",
- "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
- expected.len, result.len, get_linum());
+ RET_IF(expected.len != result.len, "expected.len != result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
+ expected.len, result.len, get_linum());
+}
+
+static const char *result_to_str(enum result res)
+{
+ switch (res) {
+ case DROP_ERR_INNER_MAP:
+ return "DROP_ERR_INNER_MAP";
+ case DROP_ERR_SKB_DATA:
+ return "DROP_ERR_SKB_DATA";
+ case DROP_ERR_SK_SELECT_REUSEPORT:
+ return "DROP_ERR_SK_SELECT_REUSEPORT";
+ case DROP_MISC:
+ return "DROP_MISC";
+ case PASS:
+ return "PASS";
+ case PASS_ERR_SK_SELECT_REUSEPORT:
+ return "PASS_ERR_SK_SELECT_REUSEPORT";
+ default:
+ return "UNKNOWN";
+ }
}
static void check_results(void)
@@ -310,8 +344,8 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
}
for (i = 0; i < NR_RESULTS; i++) {
@@ -337,10 +371,10 @@ static void check_results(void)
printf(", %u", expected_results[i]);
printf("]\n");
- CHECK(expected_results[broken] != results[broken],
- "unexpected result",
- "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
- broken, broken, get_linum());
+ printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
+ get_linum());
+
+ CHECK_FAIL(true);
}
static int send_data(int type, sa_family_t family, void *data, size_t len,
@@ -350,17 +384,17 @@ static int send_data(int type, sa_family_t family, void *data, size_t len,
int fd, err;
fd = socket(family, type, 0);
- CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+ RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
sa46_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
- CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+ RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
- CHECK(err != len && expected >= PASS,
- "sendto()", "family:%u err:%d errno:%d expected:%d\n",
- family, err, errno, expected);
+ RET_ERR(err != len && expected >= PASS,
+ "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+ family, err, errno, expected);
return fd;
}
@@ -375,47 +409,49 @@ static void do_test(int type, sa_family_t family, struct cmd *cmd,
cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
expected);
+ if (cli_fd < 0)
+ return;
nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
- CHECK((nev <= 0 && expected >= PASS) ||
- (nev > 0 && expected < PASS),
- "nev <> expected",
- "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
- nev, expected, type, family,
- cmd ? cmd->reuseport_index : -1,
- cmd ? cmd->pass_on_failure : -1);
+ RET_IF((nev <= 0 && expected >= PASS) ||
+ (nev > 0 && expected < PASS),
+ "nev <> expected",
+ "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+ nev, expected, type, family,
+ cmd ? cmd->reuseport_index : -1,
+ cmd ? cmd->pass_on_failure : -1);
check_results();
check_data(type, family, cmd, cli_fd);
if (expected < PASS)
return;
- CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
- cmd->reuseport_index != ev.data.u32,
- "check cmd->reuseport_index",
- "cmd:(%u, %u) ev.data.u32:%u\n",
- cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+ RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+ cmd->reuseport_index != ev.data.u32,
+ "check cmd->reuseport_index",
+ "cmd:(%u, %u) ev.data.u32:%u\n",
+ cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
srv_fd = sk_fds[ev.data.u32];
if (type == SOCK_STREAM) {
int new_fd = accept(srv_fd, NULL, 0);
- CHECK(new_fd == -1, "accept(srv_fd)",
- "ev.data.u32:%u new_fd:%d errno:%d\n",
- ev.data.u32, new_fd, errno);
+ RET_IF(new_fd == -1, "accept(srv_fd)",
+ "ev.data.u32:%u new_fd:%d errno:%d\n",
+ ev.data.u32, new_fd, errno);
nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
- CHECK(nread != sizeof(rcv_cmd),
- "recv(new_fd)",
- "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
- ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ RET_IF(nread != sizeof(rcv_cmd),
+ "recv(new_fd)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
close(new_fd);
} else {
nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
- CHECK(nread != sizeof(rcv_cmd),
- "recv(sk_fds)",
- "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
- ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ RET_IF(nread != sizeof(rcv_cmd),
+ "recv(sk_fds)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
}
close(cli_fd);
@@ -428,18 +464,14 @@ static void test_err_inner_map(int type, sa_family_t family)
.pass_on_failure = 0,
};
- printf("%s: ", __func__);
expected_results[DROP_ERR_INNER_MAP]++;
do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
- printf("OK\n");
}
static void test_err_skb_data(int type, sa_family_t family)
{
- printf("%s: ", __func__);
expected_results[DROP_ERR_SKB_DATA]++;
do_test(type, family, NULL, DROP_ERR_SKB_DATA);
- printf("OK\n");
}
static void test_err_sk_select_port(int type, sa_family_t family)
@@ -449,10 +481,8 @@ static void test_err_sk_select_port(int type, sa_family_t family)
.pass_on_failure = 0,
};
- printf("%s: ", __func__);
expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
- printf("OK\n");
}
static void test_pass(int type, sa_family_t family)
@@ -460,14 +490,12 @@ static void test_pass(int type, sa_family_t family)
struct cmd cmd;
int i;
- printf("%s: ", __func__);
cmd.pass_on_failure = 0;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
expected_results[PASS]++;
cmd.reuseport_index = i;
do_test(type, family, &cmd, PASS);
}
- printf("OK\n");
}
static void test_syncookie(int type, sa_family_t family)
@@ -481,7 +509,6 @@ static void test_syncookie(int type, sa_family_t family)
if (type != SOCK_STREAM)
return;
- printf("%s: ", __func__);
/*
* +1 for TCP-SYN and
* +1 for the TCP-ACK (ack the syncookie)
@@ -497,17 +524,16 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ "err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- CHECK(err == -1 || tmp_index != -1,
- "lookup_elem(tmp_index_ovr_map)",
- "err:%d errno:%d tmp_index:%d\n",
- err, errno, tmp_index);
+ RET_IF(err == -1 || tmp_index != -1,
+ "lookup_elem(tmp_index_ovr_map)",
+ "err:%d errno:%d tmp_index:%d\n",
+ err, errno, tmp_index);
disable_syncookie();
- printf("OK\n");
}
static void test_pass_on_err(int type, sa_family_t family)
@@ -517,10 +543,8 @@ static void test_pass_on_err(int type, sa_family_t family)
.pass_on_failure = 1,
};
- printf("%s: ", __func__);
expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
- printf("OK\n");
}
static void test_detach_bpf(int type, sa_family_t family)
@@ -532,46 +556,47 @@ static void test_detach_bpf(int type, sa_family_t family)
struct cmd cmd = {};
int optvalue = 0;
- printf("%s: ", __func__);
err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
- CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+ "err:%d errno:%d\n", err, errno);
err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
- CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == 0 || errno != ENOENT,
+ "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+ "err:%d errno:%d\n", err, errno);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
+ if (cli_fd < 0)
+ return;
nev = epoll_wait(epfd, &ev, 1, 5);
- CHECK(nev <= 0, "nev <= 0",
- "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
- nev, type, family);
+ RET_IF(nev <= 0, "nev <= 0",
+ "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
+ nev, type, family);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
- CHECK(nr_run_before != nr_run_after,
- "nr_run_before != nr_run_after",
- "nr_run_before:%u nr_run_after:%u\n",
- nr_run_before, nr_run_after);
+ RET_IF(nr_run_before != nr_run_after,
+ "nr_run_before != nr_run_after",
+ "nr_run_before:%u nr_run_after:%u\n",
+ nr_run_before, nr_run_after);
- printf("OK\n");
close(cli_fd);
#else
- printf("%s: SKIP\n", __func__);
+ test__skip();
#endif
}
@@ -594,149 +619,220 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
*/
for (i = first; i >= 0; i--) {
sk_fds[i] = socket(family, type, 0);
- CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
- i, sk_fds[i], errno);
+ RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+ i, sk_fds[i], errno);
err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
&optval, sizeof(optval));
- CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
- "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
if (i == first) {
err = setsockopt(sk_fds[i], SOL_SOCKET,
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ "err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- CHECK(err == -1, "listen()",
- "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "listen()",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- CHECK(err == -1, "update_elem(reuseport_array)",
- "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "update_elem(reuseport_array)",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
socklen_t addrlen = sizeof(srv_sa);
err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
&addrlen);
- CHECK(err == -1, "getsockname()",
- "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "getsockname()",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
}
}
epfd = epoll_create(1);
- CHECK(epfd == -1, "epoll_create(1)",
- "epfd:%d errno:%d\n", epfd, errno);
+ RET_IF(epfd == -1, "epoll_create(1)",
+ "epfd:%d errno:%d\n", epfd, errno);
ev.events = EPOLLIN;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
ev.data.u32 = i;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+ RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
}
}
-static void setup_per_test(int type, unsigned short family, bool inany)
+static void setup_per_test(int type, sa_family_t family, bool inany,
+ bool no_inner_map)
{
int ovr = -1, err;
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ "err:%d errno:%d\n", err, errno);
+
+ /* Install reuseport_array to outer_map? */
+ if (no_inner_map)
+ return;
+
+ err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
+ BPF_ANY);
+ RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ "err:%d errno:%d\n", err, errno);
}
-static void cleanup_per_test(void)
+static void cleanup_per_test(bool no_inner_map)
{
- int i, err;
+ int i, err, zero = 0;
+
+ memset(expected_results, 0, sizeof(expected_results));
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
+ RET_IF(err, "reset elem in result_map",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
+ RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
+ err, errno);
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
close(sk_fds[i]);
close(epfd);
+ /* Delete reuseport_array from outer_map? */
+ if (no_inner_map)
+ return;
+
err = bpf_map_delete_elem(outer_map, &index_zero);
- CHECK(err == -1, "delete_elem(outer_map)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "delete_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- close(outer_map);
- close(reuseport_array);
- bpf_object__close(obj);
+ if (outer_map != -1)
+ close(outer_map);
+ if (reuseport_array != -1)
+ close(reuseport_array);
+ if (obj)
+ bpf_object__close(obj);
}
-static void test_all(void)
+static const char *family_str(sa_family_t family)
{
- /* Extra SOCK_STREAM to test bind_inany==true */
- const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
- const char * const type_strings[] = { "TCP", "UDP", "TCP" };
- const char * const family_strings[] = { "IPv6", "IPv4" };
- const unsigned short families[] = { AF_INET6, AF_INET };
- const bool bind_inany[] = { false, false, true };
- int t, f, err;
-
- for (f = 0; f < ARRAY_SIZE(families); f++) {
- unsigned short family = families[f];
-
- for (t = 0; t < ARRAY_SIZE(types); t++) {
- bool inany = bind_inany[t];
- int type = types[t];
-
- printf("######## %s/%s %s ########\n",
- family_strings[f], type_strings[t],
- inany ? " INANY " : "LOOPBACK");
-
- setup_per_test(type, family, inany);
-
- test_err_inner_map(type, family);
-
- /* Install reuseport_array to the outer_map */
- err = bpf_map_update_elem(outer_map, &index_zero,
- &reuseport_array, BPF_ANY);
- CHECK(err == -1, "update_elem(outer_map)",
- "err:%d errno:%d\n", err, errno);
-
- test_err_skb_data(type, family);
- test_err_sk_select_port(type, family);
- test_pass(type, family);
- test_syncookie(type, family);
- test_pass_on_err(type, family);
- /* Must be the last test */
- test_detach_bpf(type, family);
-
- cleanup_per_test();
- printf("\n");
- }
+ switch (family) {
+ case AF_INET:
+ return "IPv4";
+ case AF_INET6:
+ return "IPv6";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *sotype_str(int sotype)
+{
+ switch (sotype) {
+ case SOCK_STREAM:
+ return "TCP";
+ case SOCK_DGRAM:
+ return "UDP";
+ default:
+ return "unknown";
}
}
-int main(int argc, const char **argv)
+#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
+
+static void test_config(int sotype, sa_family_t family, bool inany)
{
- create_maps();
- prepare_bpf_obj();
+ const struct test {
+ void (*fn)(int sotype, sa_family_t family);
+ const char *name;
+ bool no_inner_map;
+ } tests[] = {
+ TEST_INIT(test_err_inner_map, true /* no_inner_map */),
+ TEST_INIT(test_err_skb_data),
+ TEST_INIT(test_err_sk_select_port),
+ TEST_INIT(test_pass),
+ TEST_INIT(test_syncookie),
+ TEST_INIT(test_pass_on_err),
+ TEST_INIT(test_detach_bpf),
+ };
+ char s[MAX_TEST_NAME];
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s/%s %s %s",
+ family_str(family), sotype_str(sotype),
+ inany ? "INANY" : "LOOPBACK", t->name);
+
+ if (!test__start_subtest(s))
+ continue;
+
+ setup_per_test(sotype, family, inany, t->no_inner_map);
+ t->fn(sotype, family);
+ cleanup_per_test(t->no_inner_map);
+ }
+}
+
+#define BIND_INANY true
+
+static void test_all(void)
+{
+ const struct config {
+ int sotype;
+ sa_family_t family;
+ bool inany;
+ } configs[] = {
+ { SOCK_STREAM, AF_INET },
+ { SOCK_STREAM, AF_INET, BIND_INANY },
+ { SOCK_STREAM, AF_INET6 },
+ { SOCK_STREAM, AF_INET6, BIND_INANY },
+ { SOCK_DGRAM, AF_INET },
+ { SOCK_DGRAM, AF_INET6 },
+ };
+ const struct config *c;
+
+ for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
+ test_config(c->sotype, c->family, c->inany);
+}
+
+void test_select_reuseport(void)
+{
+ if (create_maps())
+ goto out;
+ if (prepare_bpf_obj())
+ goto out;
+
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
- enable_fastopen();
- disable_syncookie();
- atexit(restore_sysctls);
+ if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
+ goto out;
- test_all();
+ if (enable_fastopen())
+ goto out;
+ if (disable_syncookie())
+ goto out;
+ test_all();
+out:
cleanup();
- return 0;
+ restore_sysctls();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 54218ee3c004..504abb7bfb95 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_send_signal_kern.skel.h"
static volatile int sigusr1_received = 0;
@@ -8,28 +9,26 @@ static void sigusr1_handler(int signum)
sigusr1_received++;
}
-static int test_send_signal_common(struct perf_event_attr *attr,
- int prog_type,
+static void test_send_signal_common(struct perf_event_attr *attr,
+ bool signal_thread,
const char *test_name)
{
- int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
- const char *file = "./test_send_signal_kern.o";
- struct bpf_object *obj = NULL;
+ struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
- __u32 key = 0, duration = 0;
+ int err = -1, pmu_fd = -1;
+ __u32 duration = 0;
char buf[256];
pid_t pid;
- __u64 val;
if (CHECK(pipe(pipe_c2p), test_name,
"pipe pipe_c2p error: %s\n", strerror(errno)))
- goto no_fork_done;
+ return;
if (CHECK(pipe(pipe_p2c), test_name,
"pipe pipe_p2c error: %s\n", strerror(errno))) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
- goto no_fork_done;
+ return;
}
pid = fork();
@@ -38,7 +37,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
close(pipe_c2p[1]);
close(pipe_p2c[0]);
close(pipe_p2c[1]);
- goto no_fork_done;
+ return;
}
if (pid == 0) {
@@ -73,45 +72,39 @@ static int test_send_signal_common(struct perf_event_attr *attr,
close(pipe_c2p[1]); /* close write */
close(pipe_p2c[0]); /* close read */
- err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
- if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
- strerror(errno)))
- goto prog_load_failure;
-
- pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
- -1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
- err = -1;
- goto close_prog;
- }
-
- err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
- if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
- strerror(errno)))
- goto disable_pmu;
-
- err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
- if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
- strerror(errno)))
- goto disable_pmu;
+ skel = test_send_signal_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ goto skel_open_load_failure;
- err = -1;
- info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
- if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
- goto disable_pmu;
+ if (!attr) {
+ err = test_send_signal_kern__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ err = -1;
+ goto destroy_skel;
+ }
+ } else {
+ pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+ -1 /* group id */, 0 /* flags */);
+ if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
+ strerror(errno))) {
+ err = -1;
+ goto destroy_skel;
+ }
- status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
- if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
- goto disable_pmu;
+ skel->links.send_signal_perf =
+ bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
+ if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ goto disable_pmu;
+ }
/* wait until child signal handler installed */
read(pipe_c2p[0], buf, 1);
/* trigger the bpf send_signal */
- key = 0;
- val = (((__u64)(SIGUSR1)) << 32) | pid;
- bpf_map_update_elem(info_map_fd, &key, &val, 0);
+ skel->bss->pid = pid;
+ skel->bss->sig = SIGUSR1;
+ skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
write(pipe_p2c[1], buf, 1);
@@ -125,55 +118,27 @@ static int test_send_signal_common(struct perf_event_attr *attr,
goto disable_pmu;
}
- err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
write(pipe_p2c[1], buf, 1);
disable_pmu:
close(pmu_fd);
-close_prog:
- bpf_object__close(obj);
-prog_load_failure:
+destroy_skel:
+ test_send_signal_kern__destroy(skel);
+skel_open_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
wait(NULL);
-no_fork_done:
- return err;
}
-static int test_send_signal_tracepoint(void)
+static void test_send_signal_tracepoint(bool signal_thread)
{
- const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
- struct perf_event_attr attr = {
- .type = PERF_TYPE_TRACEPOINT,
- .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
- .sample_period = 1,
- .wakeup_events = 1,
- };
- __u32 duration = 0;
- int bytes, efd;
- char buf[256];
-
- efd = open(id_path, O_RDONLY, 0);
- if (CHECK(efd < 0, "tracepoint",
- "open syscalls/sys_enter_nanosleep/id failure: %s\n",
- strerror(errno)))
- return -1;
-
- bytes = read(efd, buf, sizeof(buf));
- close(efd);
- if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
- "read syscalls/sys_enter_nanosleep/id failure: %s\n",
- strerror(errno)))
- return -1;
-
- attr.config = strtol(buf, NULL, 0);
-
- return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+ test_send_signal_common(NULL, signal_thread, "tracepoint");
}
-static int test_send_signal_perf(void)
+static void test_send_signal_perf(bool signal_thread)
{
struct perf_event_attr attr = {
.sample_period = 1,
@@ -181,15 +146,13 @@ static int test_send_signal_perf(void)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread, "perf_sw_event");
}
-static int test_send_signal_nmi(void)
+static void test_send_signal_nmi(bool signal_thread)
{
struct perf_event_attr attr = {
- .sample_freq = 50,
- .freq = 1,
+ .sample_period = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
@@ -203,27 +166,30 @@ static int test_send_signal_nmi(void)
if (pmu_fd == -1) {
if (errno == ENOENT) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
- __func__);
- return 0;
+ __func__);
+ test__skip();
+ return;
}
/* Let the test fail with a more informative message */
} else {
close(pmu_fd);
}
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread, "perf_hw_event");
}
void test_send_signal(void)
{
- int ret = 0;
-
- ret |= test_send_signal_tracepoint();
- ret |= test_send_signal_perf();
- ret |= test_send_signal_nmi();
- if (!ret)
- printf("test_send_signal:OK\n");
- else
- printf("test_send_signal:FAIL\n");
+ if (test__start_subtest("send_signal_tracepoint"))
+ test_send_signal_tracepoint(false);
+ if (test__start_subtest("send_signal_perf"))
+ test_send_signal_perf(false);
+ if (test__start_subtest("send_signal_nmi"))
+ test_send_signal_nmi(false);
+ if (test__start_subtest("send_signal_tracepoint_thread"))
+ test_send_signal_tracepoint(true);
+ if (test__start_subtest("send_signal_perf_thread"))
+ test_send_signal_perf(true);
+ if (test__start_subtest("send_signal_nmi_thread"))
+ test_send_signal_nmi(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index e95baa32e277..c6d6b685a946 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -10,6 +10,10 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .tstamp = 7,
+ .wire_len = 100,
+ .gso_segs = 8,
+ .mark = 9,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,
@@ -86,4 +90,12 @@ void test_skb_ctx(void)
"ctx_out_priority",
"skb->priority == %d, expected %d\n",
skb.priority, 7);
+ CHECK_ATTR(skb.tstamp != 8,
+ "ctx_out_tstamp",
+ "skb->tstamp == %lld, expected %d\n",
+ skb.tstamp, 8);
+ CHECK_ATTR(skb.mark != 10,
+ "ctx_out_mark",
+ "skb->mark == %u, expected %d\n",
+ skb.mark, 10);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
new file mode 100644
index 000000000000..9264a2736018
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+
+struct s {
+ int a;
+ long long b;
+} __attribute__((packed));
+
+#include "test_skeleton.skel.h"
+
+void test_skeleton(void)
+{
+ int duration = 0, err;
+ struct test_skeleton* skel;
+ struct test_skeleton__bss *bss;
+ struct test_skeleton__kconfig *kcfg;
+
+ skel = test_skeleton__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n"))
+ goto cleanup;
+
+ err = test_skeleton__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->in1 = 1;
+ bss->in2 = 2;
+ bss->in3 = 3;
+ bss->in4 = 4;
+ bss->in5.a = 5;
+ bss->in5.b = 6;
+ kcfg = skel->kconfig;
+
+ err = test_skeleton__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1);
+ CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2);
+ CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
+ CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
+ CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
+ bss->handler_out5.a, 5);
+ CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
+ bss->handler_out5.b, 6);
+
+ CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
+ "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL);
+ CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
+ "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
+
+cleanup:
+ test_skeleton__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
new file mode 100644
index 000000000000..07f5b462c2ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+
+#include "test_progs.h"
+
+static int connected_socket_v4(void)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(80),
+ .sin_addr = { inet_addr("127.0.0.1") },
+ };
+ socklen_t len = sizeof(addr);
+ int s, repair, err;
+
+ s = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK_FAIL(s == -1))
+ goto error;
+
+ repair = TCP_REPAIR_ON;
+ err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
+ if (CHECK_FAIL(err))
+ goto error;
+
+ err = connect(s, (struct sockaddr *)&addr, len);
+ if (CHECK_FAIL(err))
+ goto error;
+
+ repair = TCP_REPAIR_OFF_NO_WP;
+ err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
+ if (CHECK_FAIL(err))
+ goto error;
+
+ return s;
+error:
+ perror(__func__);
+ close(s);
+ return -1;
+}
+
+/* Create a map, populate it with one socket, and free the map. */
+static void test_sockmap_create_update_free(enum bpf_map_type map_type)
+{
+ const int zero = 0;
+ int s, map, err;
+
+ s = connected_socket_v4();
+ if (CHECK_FAIL(s == -1))
+ return;
+
+ map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+ if (CHECK_FAIL(map == -1)) {
+ perror("bpf_create_map");
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_map_update");
+ goto out;
+ }
+
+out:
+ close(map);
+ close(s);
+}
+
+void test_sockmap_basic(void)
+{
+ if (test__start_subtest("sockmap create_update_free"))
+ test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash create_update_free"))
+ test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 23bd0819382d..3e8517a8395a 100644
--- a/tools/testing/selftests/bpf/test_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -1,22 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <errno.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include <linux/filter.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+#include <test_progs.h>
#include "cgroup_helpers.h"
-#define CG_PATH "/sockopt"
-
static char bpf_log_buf[4096];
static bool verbose;
@@ -983,39 +968,18 @@ close_prog_fd:
return ret;
}
-int main(int args, char **argv)
+void test_sockopt(void)
{
- int err = EXIT_FAILURE, error_cnt = 0;
int cgroup_fd, i;
- if (setup_cgroup_environment())
- goto cleanup_obj;
-
- cgroup_fd = create_and_get_cgroup(CG_PATH);
- if (cgroup_fd < 0)
- goto cleanup_cgroup_env;
-
- if (join_cgroup(CG_PATH))
- goto cleanup_cgroup;
+ cgroup_fd = test__join_cgroup("/sockopt");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- int err = run_test(cgroup_fd, &tests[i]);
-
- if (err)
- error_cnt++;
-
- printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS",
- tests[i].descr);
+ test__start_subtest(tests[i].descr);
+ CHECK_FAIL(run_test(cgroup_fd, &tests[i]));
}
- printf("Summary: %ld PASSED, %d FAILED\n",
- ARRAY_SIZE(tests) - error_cnt, error_cnt);
- err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-
-cleanup_cgroup:
close(cgroup_fd);
-cleanup_cgroup_env:
- cleanup_cgroup_environment();
-cleanup_obj:
- return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
new file mode 100644
index 000000000000..8547ecbdc61f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#define SOL_CUSTOM 0xdeadbeef
+#define CUSTOM_INHERIT1 0
+#define CUSTOM_INHERIT2 1
+#define CUSTOM_LISTENER 2
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Fail to connect to server");
+ goto out;
+ }
+
+ return fd;
+
+out:
+ close(fd);
+ return -1;
+}
+
+static int verify_sockopt(int fd, int optname, const char *msg, char expected)
+{
+ socklen_t optlen = 1;
+ char buf = 0;
+ int err;
+
+ err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen);
+ if (err) {
+ log_err("%s: failed to call getsockopt", msg);
+ return 1;
+ }
+
+ printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected);
+
+ if (buf != expected) {
+ log_err("%s: unexpected getsockopt value %d != %d", msg,
+ buf, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+
+static void *server_thread(void *arg)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd = *(int *)arg;
+ int client_fd;
+ int err = 0;
+
+ err = listen(fd, 1);
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_signal(&server_started);
+ pthread_mutex_unlock(&server_started_mtx);
+
+ if (CHECK_FAIL(err < 0)) {
+ perror("Failed to listed on socket");
+ return NULL;
+ }
+
+ err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
+ err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
+ err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
+
+ client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ if (CHECK_FAIL(client_fd < 0)) {
+ perror("Failed to accept client");
+ return NULL;
+ }
+
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
+ err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0);
+
+ close(client_fd);
+
+ return (void *)(long)err;
+}
+
+static int start_server(void)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ char buf;
+ int err;
+ int fd;
+ int i;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
+ buf = 0x01;
+ err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
+ if (err) {
+ log_err("Failed to call setsockopt(%d)", i);
+ close(fd);
+ return -1;
+ }
+ }
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ log_err("Failed to bind socket");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+ enum bpf_attach_type attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_program *prog;
+ int err;
+
+ err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+ if (err) {
+ log_err("Failed to deduct types for %s BPF program", title);
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, title);
+ if (!prog) {
+ log_err("Failed to find %s BPF program", title);
+ return -1;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+ attach_type, 0);
+ if (err) {
+ log_err("Failed to attach %s BPF program", title);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void run_test(int cgroup_fd)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./sockopt_inherit.o",
+ };
+ int server_fd = -1, client_fd;
+ struct bpf_object *obj;
+ void *server_err;
+ pthread_t tid;
+ int ignored;
+ int err;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+ if (CHECK_FAIL(err))
+ goto close_bpf_object;
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+ if (CHECK_FAIL(err))
+ goto close_bpf_object;
+
+ server_fd = start_server();
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_bpf_object;
+
+ if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
+ (void *)&server_fd)))
+ goto close_server_fd;
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_wait(&server_started, &server_started_mtx);
+ pthread_mutex_unlock(&server_started_mtx);
+
+ client_fd = connect_to_server(server_fd);
+ if (CHECK_FAIL(client_fd < 0))
+ goto close_server_fd;
+
+ CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0));
+ CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0));
+ CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0));
+
+ pthread_join(tid, &server_err);
+
+ err = (int)(long)server_err;
+ CHECK_FAIL(err);
+
+ close(client_fd);
+
+close_server_fd:
+ close(server_fd);
+close_bpf_object:
+ bpf_object__close(obj);
+}
+
+void test_sockopt_inherit(void)
+{
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/sockopt_inherit");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
+
+ run_test(cgroup_fd);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 4be3441db867..29188d6f5c8d 100644
--- a/tools/testing/selftests/bpf/test_sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -1,19 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <error.h>
-#include <errno.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include <linux/filter.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+#include <test_progs.h>
#include "cgroup_helpers.h"
static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
@@ -308,7 +294,7 @@ detach:
return err;
}
-int main(int argc, char **argv)
+void test_sockopt_multi(void)
{
struct bpf_prog_load_attr attr = {
.file = "./sockopt_multi.o",
@@ -319,56 +305,28 @@ int main(int argc, char **argv)
int err = -1;
int ignored;
- if (setup_cgroup_environment()) {
- log_err("Failed to setup cgroup environment\n");
- goto out;
- }
-
- cg_parent = create_and_get_cgroup("/parent");
- if (cg_parent < 0) {
- log_err("Failed to create cgroup /parent\n");
- goto out;
- }
-
- cg_child = create_and_get_cgroup("/parent/child");
- if (cg_child < 0) {
- log_err("Failed to create cgroup /parent/child\n");
+ cg_parent = test__join_cgroup("/parent");
+ if (CHECK_FAIL(cg_parent < 0))
goto out;
- }
- if (join_cgroup("/parent/child")) {
- log_err("Failed to join cgroup /parent/child\n");
+ cg_child = test__join_cgroup("/parent/child");
+ if (CHECK_FAIL(cg_child < 0))
goto out;
- }
err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (err) {
- log_err("Failed to load BPF object");
+ if (CHECK_FAIL(err))
goto out;
- }
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
- if (sock_fd < 0) {
- log_err("Failed to create socket");
+ if (CHECK_FAIL(sock_fd < 0))
goto out;
- }
- if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd))
- err = -1;
- printf("test_sockopt_multi: getsockopt %s\n",
- err ? "FAILED" : "PASSED");
-
- if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd))
- err = -1;
- printf("test_sockopt_multi: setsockopt %s\n",
- err ? "FAILED" : "PASSED");
+ CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd));
+ CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd));
out:
close(sock_fd);
bpf_object__close(obj);
close(cg_child);
close(cg_parent);
-
- printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED");
- return err ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 036b652e5ca9..2061a6beac0f 100644
--- a/tools/testing/selftests/bpf/test_sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -1,22 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <errno.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include <linux/filter.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+#include <test_progs.h>
#include "cgroup_helpers.h"
-#define CG_PATH "/sockopt"
-
#define SOL_CUSTOM 0xdeadbeef
static int getsetsockopt(void)
@@ -25,6 +10,7 @@ static int getsetsockopt(void)
union {
char u8[4];
__u32 u32;
+ char cc[16]; /* TCP_CA_NAME_MAX */
} buf = {};
socklen_t optlen;
@@ -115,6 +101,29 @@ static int getsetsockopt(void)
goto err;
}
+ /* TCP_CONGESTION can extend the string */
+
+ strcpy(buf.cc, "nv");
+ err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv"));
+ if (err) {
+ log_err("Failed to call setsockopt(TCP_CONGESTION)");
+ goto err;
+ }
+
+
+ optlen = sizeof(buf.cc);
+ err = getsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(TCP_CONGESTION)");
+ goto err;
+ }
+
+ if (strcmp(buf.cc, "cubic") != 0) {
+ log_err("Unexpected getsockopt(TCP_CONGESTION) %s != %s",
+ buf.cc, "cubic");
+ goto err;
+ }
+
close(fd);
return 0;
err:
@@ -151,7 +160,7 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
return 0;
}
-static int run_test(int cgroup_fd)
+static void run_test(int cgroup_fd)
{
struct bpf_prog_load_attr attr = {
.file = "./sockopt_sk.o",
@@ -161,51 +170,31 @@ static int run_test(int cgroup_fd)
int err;
err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (err) {
- log_err("Failed to load BPF object");
- return -1;
- }
+ if (CHECK_FAIL(err))
+ return;
err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (err)
+ if (CHECK_FAIL(err))
goto close_bpf_object;
err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (err)
+ if (CHECK_FAIL(err))
goto close_bpf_object;
- err = getsetsockopt();
+ CHECK_FAIL(getsetsockopt());
close_bpf_object:
bpf_object__close(obj);
- return err;
}
-int main(int args, char **argv)
+void test_sockopt_sk(void)
{
int cgroup_fd;
- int err = EXIT_SUCCESS;
-
- if (setup_cgroup_environment())
- goto cleanup_obj;
-
- cgroup_fd = create_and_get_cgroup(CG_PATH);
- if (cgroup_fd < 0)
- goto cleanup_cgroup_env;
-
- if (join_cgroup(CG_PATH))
- goto cleanup_cgroup;
-
- if (run_test(cgroup_fd))
- err = EXIT_FAILURE;
- printf("test_sockopt_sk: %s\n",
- err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+ cgroup_fd = test__join_cgroup("/sockopt_sk");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
-cleanup_cgroup:
+ run_test(cgroup_fd);
close(cgroup_fd);
-cleanup_cgroup_env:
- cleanup_cgroup_environment();
-cleanup_obj:
- return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 114ebe6a438e..1ae00cd3174e 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -11,19 +11,19 @@ void test_spinlock(void)
void *ret;
err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
goto close_prog;
}
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
+
for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
- goto close_prog_noerr;
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index ac44fda84833..e8399ae50e77 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -1,16 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
void test_stacktrace_build_id(void)
{
+
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/random/urandom_read";
- const char *file = "./test_stacktrace_build_id.o";
- int err, prog_fd, stack_trace_len;
+ struct test_stacktrace_build_id *skel;
+ int err, stack_trace_len;
__u32 key, previous_key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link = NULL;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -18,42 +16,24 @@ void test_stacktrace_build_id(void)
int retry = 1;
retry:
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = test_stacktrace_build_id__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "random", "urandom_read");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
- goto close_prog;
+ err = test_stacktrace_build_id__attach(skel);
+ if (CHECK(err, "attach_tp", "err %d\n", err))
+ goto cleanup;
/* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
- err, errno))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("./urandom_read") == 0);
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+
+ if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto cleanup;
+ if (CHECK_FAIL(system("./urandom_read")))
+ goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@@ -65,23 +45,23 @@ retry:
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
do {
char build_id[64];
@@ -89,7 +69,7 @@ retry:
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@@ -107,8 +87,7 @@ retry:
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
- bpf_link__destroy(link);
- bpf_object__close(obj);
+ test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@@ -116,17 +95,14 @@ retry:
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
- goto disable_pmu;
+ goto cleanup;
- stack_trace_len = PERF_MAX_STACK_DEPTH
- * sizeof(struct bpf_stack_build_id);
+ stack_trace_len = PERF_MAX_STACK_DEPTH *
+ sizeof(struct bpf_stack_build_id);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno);
-disable_pmu:
- bpf_link__destroy(link);
-
-close_prog:
- bpf_object__close(obj);
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 9557b7dfb782..f002e3090d92 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
static __u64 read_perf_max_sample_freq(void)
{
@@ -16,19 +17,15 @@ static __u64 read_perf_max_sample_freq(void)
void test_stacktrace_build_id_nmi(void)
{
- int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/random/urandom_read";
- const char *file = "./test_stacktrace_build_id.o";
- int err, pmu_fd, prog_fd;
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ struct test_stacktrace_build_id *skel;
+ int err, pmu_fd;
struct perf_event_attr attr = {
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
__u32 key, previous_key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -38,53 +35,46 @@ void test_stacktrace_build_id_nmi(void)
attr.sample_freq = read_perf_max_sample_freq();
retry:
- err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
- if (CHECK(pmu_fd < 0, "perf_event_open",
- "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+ if (pmu_fd < 0 && errno == ENOENT) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
pmu_fd, errno))
- goto close_prog;
+ goto cleanup;
- link = bpf_program__attach_perf_event(prog, pmu_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event",
- "err %ld\n", PTR_ERR(link))) {
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
close(pmu_fd);
- goto close_prog;
+ goto cleanup;
}
/* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
- err, errno))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+
+ if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto cleanup;
+ if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000")))
+ goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@@ -96,23 +86,23 @@ retry:
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
do {
char build_id[64];
@@ -120,7 +110,7 @@ retry:
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@@ -138,8 +128,7 @@ retry:
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
- bpf_link__destroy(link);
- bpf_object__close(obj);
+ test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@@ -147,7 +136,7 @@ retry:
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
- goto disable_pmu;
+ goto cleanup;
/*
* We intentionally skip compare_stack_ips(). This is because we
@@ -156,8 +145,6 @@ retry:
* BPF_STACK_BUILD_ID_IP;
*/
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index fc539335c5b3..37269d23df93 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -26,19 +26,19 @@ void test_stacktrace_map(void)
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (control_map_fd < 0)
+ if (CHECK_FAIL(control_map_fd < 0))
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (stackid_hmap_fd < 0)
+ if (CHECK_FAIL(stackid_hmap_fd < 0))
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (stackmap_fd < 0)
+ if (CHECK_FAIL(stackmap_fd < 0))
goto disable_pmu;
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (stack_amap_fd < 0)
+ if (CHECK_FAIL(stack_amap_fd < 0))
goto disable_pmu;
/* give some time for bpf program run */
@@ -55,23 +55,20 @@ void test_stacktrace_map(void)
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
- goto disable_pmu_noerr;
disable_pmu:
- error_cnt++;
-disable_pmu_noerr:
bpf_link__destroy(link);
close_prog:
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index fbfa8e76cf63..404a5498e1a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -26,15 +26,15 @@ void test_stacktrace_map_raw_tp(void)
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (control_map_fd < 0)
+ if (CHECK_FAIL(control_map_fd < 0))
goto close_prog;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (stackid_hmap_fd < 0)
+ if (CHECK_FAIL(stackid_hmap_fd < 0))
goto close_prog;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (stackmap_fd < 0)
+ if (CHECK_FAIL(stackmap_fd < 0))
goto close_prog;
/* give some time for bpf program run */
@@ -58,10 +58,7 @@ void test_stacktrace_map_raw_tp(void)
"err %d errno %d\n", err, errno))
goto close_prog;
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
if (!IS_ERR_OR_NULL(link))
bpf_link__destroy(link);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
new file mode 100644
index 000000000000..bb8fe646dd9f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+/* test_tailcall_1 checks basic functionality by patching multiple locations
+ * in a single program for a single tail call slot with nop->jmp, jmp->nop
+ * and jmp->jmp rewrites. Also checks for nop->nop.
+ */
+static void test_tailcall_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i, j;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", j);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ j = bpf_map__def(prog_array)->max_entries - 1 - i;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != j, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err >= 0 || errno != ENOENT))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_2 checks that patching multiple programs for a single
+ * tail call slot works. It also jumps through several programs and tests
+ * the tail call limit counter.
+ */
+static void test_tailcall_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 2;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations.
+ */
+static void test_tailcall_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_4 checks that the kernel properly selects indirect jump
+ * for the case where the key is not known. Latter is passed via global
+ * data to select different targets we can compare return value of.
+ */
+static void test_tailcall_4(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ static const int zero = 0;
+ char buff[128] = {};
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_5 probes similarly to test_tailcall_4 that the kernel generates
+ * an indirect jump when the keys are const but different from different branches.
+ */
+static void test_tailcall_5(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, key[] = { 1111, 1234, 5678 };
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ static const int zero = 0;
+ char buff[128] = {};
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+void test_tailcalls(void)
+{
+ if (test__start_subtest("tailcall_1"))
+ test_tailcall_1();
+ if (test__start_subtest("tailcall_2"))
+ test_tailcall_2();
+ if (test__start_subtest("tailcall_3"))
+ test_tailcall_3();
+ if (test__start_subtest("tailcall_4"))
+ test_tailcall_4();
+ if (test__start_subtest("tailcall_5"))
+ test_tailcall_5();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 958a3d88de99..1bdc1d86a50c 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -70,9 +70,6 @@ void test_task_fd_query_rawtp(void)
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
goto close_prog;
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index f9b70e81682b..3f131b8fe328 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -62,14 +62,9 @@ static void test_task_fd_query_tp_core(const char *probe_name,
fd_type, buf))
goto close_pmu;
- close(pmu_fd);
- goto close_prog_noerr;
-
close_pmu:
close(pmu_fd);
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index bb8759d69099..594307dffd13 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -10,10 +10,8 @@ void test_tcp_estats(void)
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
CHECK(err, "", "err %d errno %d\n", err, errno);
- if (err) {
- error_cnt++;
+ if (err)
return;
- }
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 90c3862f74a8..f4cd60d6fba2 100644
--- a/tools/testing/selftests/bpf/test_tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,23 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <error.h>
-#include <errno.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <pthread.h>
-
-#include <linux/filter.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+#include <test_progs.h>
#include "cgroup_helpers.h"
-#define CG_PATH "/tcp_rtt"
-
struct tcp_rtt_storage {
__u32 invoked;
__u32 dsack_dups;
@@ -30,8 +14,32 @@ static void send_byte(int fd)
{
char b = 0x55;
- if (write(fd, &b, sizeof(b)) != 1)
- error(1, errno, "Failed to send single byte");
+ if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1))
+ perror("Failed to send single byte");
+}
+
+static int wait_for_ack(int fd, int retries)
+{
+ struct tcp_info info;
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < retries; i++) {
+ optlen = sizeof(info);
+ err = getsockopt(fd, SOL_TCP, TCP_INFO, &info, &optlen);
+ if (err < 0) {
+ log_err("Failed to lookup TCP stats");
+ return err;
+ }
+
+ if (info.tcpi_unacked == 0)
+ return 0;
+
+ usleep(10);
+ }
+
+ log_err("Did not receive ACK");
+ return -1;
}
static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
@@ -41,8 +49,10 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
int err = 0;
struct tcp_rtt_storage val;
- if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)
- error(1, errno, "Failed to read socket storage");
+ if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) {
+ perror("Failed to read socket storage");
+ return -1;
+ }
if (val.invoked != invoked) {
log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
@@ -149,6 +159,11 @@ static int run_test(int cgroup_fd, int server_fd)
/*icsk_retransmits=*/0);
send_byte(client_fd);
+ if (wait_for_ack(client_fd, 100) < 0) {
+ err = -1;
+ goto close_client_fd;
+ }
+
err += verify_sk(map_fd, client_fd, "first payload byte",
/*invoked=*/2,
@@ -157,6 +172,7 @@ static int run_test(int cgroup_fd, int server_fd)
/*delivered_ce=*/0,
/*icsk_retransmits=*/0);
+close_client_fd:
close(client_fd);
close_bpf_object:
@@ -187,68 +203,72 @@ static int start_server(void)
return fd;
}
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+
static void *server_thread(void *arg)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
int fd = *(int *)arg;
int client_fd;
+ int err;
+
+ err = listen(fd, 1);
+
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_signal(&server_started);
+ pthread_mutex_unlock(&server_started_mtx);
- if (listen(fd, 1) < 0)
- error(1, errno, "Failed to listed on socket");
+ if (CHECK_FAIL(err < 0)) {
+ perror("Failed to listed on socket");
+ return NULL;
+ }
client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (client_fd < 0)
- error(1, errno, "Failed to accept client");
+ if (CHECK_FAIL(client_fd < 0)) {
+ perror("Failed to accept client");
+ return NULL;
+ }
/* Wait for the next connection (that never arrives)
* to keep this thread alive to prevent calling
* close() on client_fd.
*/
- if (accept(fd, (struct sockaddr *)&addr, &len) >= 0)
- error(1, errno, "Unexpected success in second accept");
+ if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
+ perror("Unexpected success in second accept");
+ return NULL;
+ }
close(client_fd);
return NULL;
}
-int main(int args, char **argv)
+void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
- int err = EXIT_SUCCESS;
pthread_t tid;
- if (setup_cgroup_environment())
- goto cleanup_obj;
-
- cgroup_fd = create_and_get_cgroup(CG_PATH);
- if (cgroup_fd < 0)
- goto cleanup_cgroup_env;
-
- if (join_cgroup(CG_PATH))
- goto cleanup_cgroup;
+ cgroup_fd = test__join_cgroup("/tcp_rtt");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
server_fd = start_server();
- if (server_fd < 0) {
- err = EXIT_FAILURE;
- goto cleanup_cgroup;
- }
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
- pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+ if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
+ (void *)&server_fd)))
+ goto close_server_fd;
- if (run_test(cgroup_fd, server_fd))
- err = EXIT_FAILURE;
+ pthread_mutex_lock(&server_started_mtx);
+ pthread_cond_wait(&server_started, &server_started_mtx);
+ pthread_mutex_unlock(&server_started_mtx);
+ CHECK_FAIL(run_test(cgroup_fd, server_fd));
+close_server_fd:
close(server_fd);
-
- printf("test_sockopt_sk: %s\n",
- err == EXIT_SUCCESS ? "PASSED" : "FAILED");
-
-cleanup_cgroup:
+close_cgroup_fd:
close(cgroup_fd);
-cleanup_cgroup_env:
- cleanup_cgroup_environment();
-cleanup_obj:
- return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
new file mode 100644
index 000000000000..25b068591e9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+
+const char *err_str;
+bool found;
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ char *log_buf;
+
+ if (level != LIBBPF_WARN ||
+ strcmp(format, "libbpf: \n%s\n")) {
+ vprintf(format, args);
+ return 0;
+ }
+
+ log_buf = va_arg(args, char *);
+ if (!log_buf)
+ goto out;
+ if (strstr(log_buf, err_str) == 0)
+ found = true;
+out:
+ printf(format, log_buf);
+ return 0;
+}
+
+extern int extra_prog_load_log_flags;
+
+static int check_load(const char *file)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj = NULL;
+ int err, prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+ attr.log_level = extra_prog_load_log_flags;
+ attr.prog_flags = BPF_F_TEST_RND_HI32;
+ found = false;
+ err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ bpf_object__close(obj);
+ return err;
+}
+
+struct test_def {
+ const char *file;
+ const char *err_str;
+};
+
+void test_test_global_funcs(void)
+{
+ struct test_def tests[] = {
+ { "test_global_func1.o", "combined stack size of 4 calls is 544" },
+ { "test_global_func2.o" },
+ { "test_global_func3.o" , "the call stack of 8 frames" },
+ { "test_global_func4.o" },
+ { "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
+ { "test_global_func6.o" , "modified ctx ptr R2" },
+ { "test_global_func7.o" , "foo() doesn't return scalar" },
+ };
+ libbpf_print_fn_t old_print_fn = NULL;
+ int err, i, duration = 0;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ const struct test_def *test = &tests[i];
+
+ if (!test__start_subtest(test->file))
+ continue;
+
+ err_str = test->err_str;
+ err = check_load(test->file);
+ CHECK_FAIL(!!err ^ !!err_str);
+ if (err_str)
+ CHECK(found, "", "expected string '%s'", err_str);
+ }
+ libbpf_set_print(old_print_fn);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
new file mode 100644
index 000000000000..465b371a561d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/prctl.h>
+#include <test_progs.h>
+
+#define MAX_CNT 100000
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static int test_task_rename(const char *prog)
+{
+ int i, fd, duration = 0, err;
+ char buf[] = "test_overhead";
+ __u64 start_time;
+
+ fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (CHECK(fd < 0, "open /proc", "err %d", errno))
+ return -1;
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++) {
+ err = write(fd, buf, sizeof(buf));
+ if (err < 0) {
+ CHECK(err < 0, "task rename", "err %d", errno);
+ close(fd);
+ return -1;
+ }
+ }
+ printf("task_rename %s\t%lluK events per sec\n", prog,
+ MAX_CNT * 1000000ll / (time_get_ns() - start_time));
+ close(fd);
+ return 0;
+}
+
+static void test_run(const char *prog)
+{
+ test_task_rename(prog);
+}
+
+static void setaffinity(void)
+{
+ cpu_set_t cpuset;
+ int cpu = 0;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+}
+
+void test_test_overhead(void)
+{
+ const char *kprobe_name = "kprobe/__set_task_comm";
+ const char *kretprobe_name = "kretprobe/__set_task_comm";
+ const char *raw_tp_name = "raw_tp/task_rename";
+ const char *fentry_name = "fentry/__set_task_comm";
+ const char *fexit_name = "fexit/__set_task_comm";
+ const char *kprobe_func = "__set_task_comm";
+ struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
+ struct bpf_program *fentry_prog, *fexit_prog;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ int err, duration = 0;
+ char comm[16] = {};
+
+ if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
+ return;
+
+ obj = bpf_object__open_file("./test_overhead.o", NULL);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ return;
+
+ kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+ if (CHECK(!kprobe_prog, "find_probe",
+ "prog '%s' not found\n", kprobe_name))
+ goto cleanup;
+ kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+ if (CHECK(!kretprobe_prog, "find_probe",
+ "prog '%s' not found\n", kretprobe_name))
+ goto cleanup;
+ raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name);
+ if (CHECK(!raw_tp_prog, "find_probe",
+ "prog '%s' not found\n", raw_tp_name))
+ goto cleanup;
+ fentry_prog = bpf_object__find_program_by_title(obj, fentry_name);
+ if (CHECK(!fentry_prog, "find_probe",
+ "prog '%s' not found\n", fentry_name))
+ goto cleanup;
+ fexit_prog = bpf_object__find_program_by_title(obj, fexit_name);
+ if (CHECK(!fexit_prog, "find_probe",
+ "prog '%s' not found\n", fexit_name))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup;
+
+ setaffinity();
+
+ /* base line run */
+ test_run("base");
+
+ /* attach kprobe */
+ link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
+ kprobe_func);
+ if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("kprobe");
+ bpf_link__destroy(link);
+
+ /* attach kretprobe */
+ link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
+ kprobe_func);
+ if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("kretprobe");
+ bpf_link__destroy(link);
+
+ /* attach raw_tp */
+ link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
+ if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("raw_tp");
+ bpf_link__destroy(link);
+
+ /* attach fentry */
+ link = bpf_program__attach_trace(fentry_prog);
+ if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("fentry");
+ bpf_link__destroy(link);
+
+ /* attach fexit */
+ link = bpf_program__attach_trace(fexit_prog);
+ if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("fexit");
+ bpf_link__destroy(link);
+cleanup:
+ prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
new file mode 100644
index 000000000000..1f6ccdaed1ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/prctl.h>
+#include <test_progs.h>
+
+#define MAX_TRAMP_PROGS 40
+
+struct inst {
+ struct bpf_object *obj;
+ struct bpf_link *link_fentry;
+ struct bpf_link *link_fexit;
+};
+
+static int test_task_rename(void)
+{
+ int fd, duration = 0, err;
+ char buf[] = "test_overhead";
+
+ fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (CHECK(fd < 0, "open /proc", "err %d", errno))
+ return -1;
+ err = write(fd, buf, sizeof(buf));
+ if (err < 0) {
+ CHECK(err < 0, "task rename", "err %d", errno);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
+static struct bpf_link *load(struct bpf_object *obj, const char *name)
+{
+ struct bpf_program *prog;
+ int duration = 0;
+
+ prog = bpf_object__find_program_by_title(obj, name);
+ if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
+ return ERR_PTR(-EINVAL);
+ return bpf_program__attach_trace(prog);
+}
+
+void test_trampoline_count(void)
+{
+ const char *fentry_name = "fentry/__set_task_comm";
+ const char *fexit_name = "fexit/__set_task_comm";
+ const char *object = "test_trampoline_count.o";
+ struct inst inst[MAX_TRAMP_PROGS] = {};
+ int err, i = 0, duration = 0;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ char comm[16] = {};
+
+ /* attach 'allowed' 40 trampoline programs */
+ for (i = 0; i < MAX_TRAMP_PROGS; i++) {
+ obj = bpf_object__open_file(object, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup;
+ inst[i].obj = obj;
+
+ if (rand() % 2) {
+ link = load(obj, fentry_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ inst[i].link_fentry = link;
+ } else {
+ link = load(obj, fexit_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ inst[i].link_fexit = link;
+ }
+ }
+
+ /* and try 1 extra.. */
+ obj = bpf_object__open_file(object, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ goto cleanup;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d\n", err))
+ goto cleanup_extra;
+
+ /* ..that needs to fail */
+ link = load(obj, fentry_name);
+ if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+ bpf_link__destroy(link);
+ goto cleanup_extra;
+ }
+
+ /* with E2BIG error */
+ CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+
+ /* and finaly execute the probe */
+ if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
+ goto cleanup_extra;
+ CHECK_FAIL(test_task_rename());
+ CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+
+cleanup_extra:
+ bpf_object__close(obj);
+cleanup:
+ while (--i) {
+ bpf_link__destroy(inst[i].link_fentry);
+ bpf_link__destroy(inst[i].link_fexit);
+ bpf_object__close(inst[i].obj);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index a74167289545..dcb5ecac778e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -16,10 +16,8 @@ void test_xdp(void)
int err, prog_fd, map_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip2tnl");
if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 922aa0a19764..3744196d7cba 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -10,10 +10,8 @@ void test_xdp_adjust_tail(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
new file mode 100644
index 000000000000..6b56bdc73ebc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <net/if.h>
+#include "test_xdp.skel.h"
+#include "test_xdp_bpf2bpf.skel.h"
+
+void test_xdp_bpf2bpf(void)
+{
+ __u32 duration = 0, retval, size;
+ char buf[128];
+ int err, pkt_fd, map_fd;
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct iptnl_info value4 = {.family = AF_INET};
+ struct test_xdp *pkt_skel = NULL;
+ struct test_xdp_bpf2bpf *ftrace_skel = NULL;
+ struct vip key4 = {.protocol = 6, .family = AF_INET};
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* Load XDP program to introspect */
+ pkt_skel = test_xdp__open_and_load();
+ if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+ return;
+
+ pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
+
+ map_fd = bpf_map__fd(pkt_skel->maps.vip2tnl);
+ bpf_map_update_elem(map_fd, &key4, &value4, 0);
+
+ /* Load trace program */
+ opts.attach_prog_fd = pkt_fd,
+ ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
+ if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+ goto out;
+
+ err = test_xdp_bpf2bpf__load(ftrace_skel);
+ if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+ goto out;
+
+ err = test_xdp_bpf2bpf__attach(ftrace_skel);
+ if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+ goto out;
+
+ /* Run test program */
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ if (CHECK(err || retval != XDP_TX || size != 74 ||
+ iph->protocol != IPPROTO_IPIP, "ipv4",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size))
+ goto out;
+
+ /* Verify test results */
+ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+ "result", "fentry failed err %llu\n",
+ ftrace_skel->bss->test_result_fentry))
+ goto out;
+
+ CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
+ "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
+
+out:
+ test_xdp__destroy(pkt_skel);
+ test_xdp_bpf2bpf__destroy(ftrace_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 09e6b46f5515..c9404e6b226e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -31,10 +31,8 @@ void test_xdp_noinline(void)
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
@@ -73,9 +71,10 @@ void test_xdp_noinline(void)
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
- error_cnt++;
- printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+ if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+ pkts != NUM_ITER * 2)) {
+ printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
+ bytes, pkts);
}
out:
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
new file mode 100644
index 000000000000..7185bee16fe4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_xdp_perf(void)
+{
+ const char *file = "./xdp_dummy.o";
+ __u32 duration, retval, size;
+ struct bpf_object *obj;
+ char in[128], out[128];
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
+ out, &size, &retval, &duration);
+
+ CHECK(err || retval != XDP_PASS || size != 128,
+ "xdp-perf",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
new file mode 100644
index 000000000000..7897c8f4d363
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* WARNING: This implemenation is not necessarily the same
+ * as the tcp_cubic.c. The purpose is mainly for testing
+ * the kernel BPF logic.
+ *
+ * Highlights:
+ * 1. CONFIG_HZ .kconfig map is used.
+ * 2. In bictcp_update(), calculation is changed to use usec
+ * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies.
+ * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c.
+ * 3. In bitctcp_update() [under tcp_friendliness], the original
+ * "while (ca->ack_cnt > delta)" loop is changed to the equivalent
+ * "ca->ack_cnt / delta" operation.
+ */
+
+#include <linux/bpf.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+
+#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
+ * max_cwnd = snd_cwnd * beta
+ */
+#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
+
+/* Two methods of hybrid slow start */
+#define HYSTART_ACK_TRAIN 0x1
+#define HYSTART_DELAY 0x2
+
+/* Number of delay samples for detecting the increase of delay */
+#define HYSTART_MIN_SAMPLES 8
+#define HYSTART_DELAY_MIN (4000U) /* 4ms */
+#define HYSTART_DELAY_MAX (16000U) /* 16 ms */
+#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
+
+static int fast_convergence = 1;
+static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh;
+static const int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static int hystart = 1;
+static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY;
+static int hystart_low_window = 16;
+static int hystart_ack_delta_us = 2000;
+
+static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */
+static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
+ / (BICTCP_BETA_SCALE - beta);
+/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ * so K = cubic_root( (wmax-cwnd)*rtt/c )
+ * the unit of K is bictcp_HZ=2^10, not HZ
+ *
+ * c = bic_scale >> 10
+ * rtt = 100ms
+ *
+ * the following code has been designed and tested for
+ * cwnd < 1 million packets
+ * RTT < 100 seconds
+ * HZ < 1,000,00 (corresponding to 10 nano-second)
+ */
+
+/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */
+static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
+ / (bic_scale * 10);
+
+/* BIC TCP Parameters */
+struct bictcp {
+ __u32 cnt; /* increase cwnd by 1 after ACKs */
+ __u32 last_max_cwnd; /* last maximum snd_cwnd */
+ __u32 last_cwnd; /* the last snd_cwnd */
+ __u32 last_time; /* time when updated last_cwnd */
+ __u32 bic_origin_point;/* origin point of bic function */
+ __u32 bic_K; /* time to origin point
+ from the beginning of the current epoch */
+ __u32 delay_min; /* min delay (usec) */
+ __u32 epoch_start; /* beginning of an epoch */
+ __u32 ack_cnt; /* number of acks */
+ __u32 tcp_cwnd; /* estimated tcp cwnd */
+ __u16 unused;
+ __u8 sample_cnt; /* number of samples to decide curr_rtt */
+ __u8 found; /* the exit point is found? */
+ __u32 round_start; /* beginning of each round */
+ __u32 end_seq; /* end_seq of the round */
+ __u32 last_ack; /* last time when the ACK spacing is close */
+ __u32 curr_rtt; /* the minimum rtt of current round */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+ ca->cnt = 0;
+ ca->last_max_cwnd = 0;
+ ca->last_cwnd = 0;
+ ca->last_time = 0;
+ ca->bic_origin_point = 0;
+ ca->bic_K = 0;
+ ca->delay_min = 0;
+ ca->epoch_start = 0;
+ ca->ack_cnt = 0;
+ ca->tcp_cwnd = 0;
+ ca->found = 0;
+}
+
+extern unsigned long CONFIG_HZ __kconfig;
+#define HZ CONFIG_HZ
+#define USEC_PER_MSEC 1000UL
+#define USEC_PER_SEC 1000000UL
+#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
+
+static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+ return dividend / divisor;
+}
+
+#define div64_ul div64_u64
+
+#define BITS_PER_U64 (sizeof(__u64) * 8)
+static __always_inline int fls64(__u64 x)
+{
+ int num = BITS_PER_U64 - 1;
+
+ if (x == 0)
+ return 0;
+
+ if (!(x & (~0ull << (BITS_PER_U64-32)))) {
+ num -= 32;
+ x <<= 32;
+ }
+ if (!(x & (~0ull << (BITS_PER_U64-16)))) {
+ num -= 16;
+ x <<= 16;
+ }
+ if (!(x & (~0ull << (BITS_PER_U64-8)))) {
+ num -= 8;
+ x <<= 8;
+ }
+ if (!(x & (~0ull << (BITS_PER_U64-4)))) {
+ num -= 4;
+ x <<= 4;
+ }
+ if (!(x & (~0ull << (BITS_PER_U64-2)))) {
+ num -= 2;
+ x <<= 2;
+ }
+ if (!(x & (~0ull << (BITS_PER_U64-1))))
+ num -= 1;
+
+ return num + 1;
+}
+
+static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
+{
+ return tcp_sk(sk)->tcp_mstamp;
+}
+
+static __always_inline void bictcp_hystart_reset(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ ca->round_start = ca->last_ack = bictcp_clock_us(sk);
+ ca->end_seq = tp->snd_nxt;
+ ca->curr_rtt = ~0U;
+ ca->sample_cnt = 0;
+}
+
+/* "struct_ops/" prefix is not a requirement
+ * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
+ * as long as it is used in one of the func ptr
+ * under SEC(".struct_ops").
+ */
+SEC("struct_ops/bictcp_init")
+void BPF_PROG(bictcp_init, struct sock *sk)
+{
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ bictcp_reset(ca);
+
+ if (hystart)
+ bictcp_hystart_reset(sk);
+
+ if (!hystart && initial_ssthresh)
+ tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* No prefix in SEC will also work.
+ * The remaining tcp-cubic functions have an easier way.
+ */
+SEC("no-sec-prefix-bictcp_cwnd_event")
+void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ if (event == CA_EVENT_TX_START) {
+ struct bictcp *ca = inet_csk_ca(sk);
+ __u32 now = tcp_jiffies32;
+ __s32 delta;
+
+ delta = now - tcp_sk(sk)->lsndtime;
+
+ /* We were application limited (idle) for a while.
+ * Shift epoch_start to keep cwnd growth to cubic curve.
+ */
+ if (ca->epoch_start && delta > 0) {
+ ca->epoch_start += delta;
+ if (after(ca->epoch_start, now))
+ ca->epoch_start = now;
+ }
+ return;
+ }
+}
+
+/*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ * v = cbrt(x << 18) - 1
+ * cbrt(x) = (v[x] + 10) >> 6
+ */
+static const __u8 v[] = {
+ /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
+ /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
+ /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
+ /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
+ /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
+ /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
+ /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
+ /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
+};
+
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
+ */
+static __always_inline __u32 cubic_root(__u64 a)
+{
+ __u32 x, b, shift;
+
+ if (a < 64) {
+ /* a in [0..63] */
+ return ((__u32)v[(__u32)a] + 35) >> 6;
+ }
+
+ b = fls64(a);
+ b = ((b * 84) >> 8) - 1;
+ shift = (a >> (b * 3));
+
+ /* it is needed for verifier's bound check on v */
+ if (shift >= 64)
+ return 0;
+
+ x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6;
+
+ /*
+ * Newton-Raphson iteration
+ * 2
+ * x = ( 2 * x + a / x ) / 3
+ * k+1 k k
+ */
+ x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1)));
+ x = ((x * 341) >> 10);
+ return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
+ __u32 acked)
+{
+ __u32 delta, bic_target, max_cnt;
+ __u64 offs, t;
+
+ ca->ack_cnt += acked; /* count the number of ACKed packets */
+
+ if (ca->last_cwnd == cwnd &&
+ (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
+ return;
+
+ /* The CUBIC function can update ca->cnt at most once per jiffy.
+ * On all cwnd reduction events, ca->epoch_start is set to 0,
+ * which will force a recalculation of ca->cnt.
+ */
+ if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
+ goto tcp_friendliness;
+
+ ca->last_cwnd = cwnd;
+ ca->last_time = tcp_jiffies32;
+
+ if (ca->epoch_start == 0) {
+ ca->epoch_start = tcp_jiffies32; /* record beginning */
+ ca->ack_cnt = acked; /* start counting */
+ ca->tcp_cwnd = cwnd; /* syn with cubic */
+
+ if (ca->last_max_cwnd <= cwnd) {
+ ca->bic_K = 0;
+ ca->bic_origin_point = cwnd;
+ } else {
+ /* Compute new K based on
+ * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+ */
+ ca->bic_K = cubic_root(cube_factor
+ * (ca->last_max_cwnd - cwnd));
+ ca->bic_origin_point = ca->last_max_cwnd;
+ }
+ }
+
+ /* cubic function - calc*/
+ /* calculate c * time^3 / rtt,
+ * while considering overflow in calculation of time^3
+ * (so time^3 is done by using 64 bit)
+ * and without the support of division of 64bit numbers
+ * (so all divisions are done by using 32 bit)
+ * also NOTE the unit of those veriables
+ * time = (t - K) / 2^bictcp_HZ
+ * c = bic_scale >> 10
+ * rtt = (srtt >> 3) / HZ
+ * !!! The following code does not have overflow problems,
+ * if the cwnd < 1 million packets !!!
+ */
+
+ t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY;
+ t += ca->delay_min;
+ /* change the unit from usec to bictcp_HZ */
+ t <<= BICTCP_HZ;
+ t /= USEC_PER_SEC;
+
+ if (t < ca->bic_K) /* t - K */
+ offs = ca->bic_K - t;
+ else
+ offs = t - ca->bic_K;
+
+ /* c/rtt * (t-K)^3 */
+ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+ if (t < ca->bic_K) /* below origin*/
+ bic_target = ca->bic_origin_point - delta;
+ else /* above origin*/
+ bic_target = ca->bic_origin_point + delta;
+
+ /* cubic function - calc bictcp_cnt*/
+ if (bic_target > cwnd) {
+ ca->cnt = cwnd / (bic_target - cwnd);
+ } else {
+ ca->cnt = 100 * cwnd; /* very small increment*/
+ }
+
+ /*
+ * The initial growth of cubic function may be too conservative
+ * when the available bandwidth is still unknown.
+ */
+ if (ca->last_max_cwnd == 0 && ca->cnt > 20)
+ ca->cnt = 20; /* increase cwnd 5% per RTT */
+
+tcp_friendliness:
+ /* TCP Friendly */
+ if (tcp_friendliness) {
+ __u32 scale = beta_scale;
+ __u32 n;
+
+ /* update tcp cwnd */
+ delta = (cwnd * scale) >> 3;
+ if (ca->ack_cnt > delta && delta) {
+ n = ca->ack_cnt / delta;
+ ca->ack_cnt -= n * delta;
+ ca->tcp_cwnd += n;
+ }
+
+ if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */
+ delta = ca->tcp_cwnd - cwnd;
+ max_cnt = cwnd / delta;
+ if (ca->cnt > max_cnt)
+ ca->cnt = max_cnt;
+ }
+ }
+
+ /* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+ * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+ */
+ ca->cnt = max(ca->cnt, 2U);
+}
+
+/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
+void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ if (!tcp_is_cwnd_limited(sk))
+ return;
+
+ if (tcp_in_slow_start(tp)) {
+ if (hystart && after(ack, ca->end_seq))
+ bictcp_hystart_reset(sk);
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
+ }
+ bictcp_update(ca, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, ca->cnt, acked);
+}
+
+__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ ca->epoch_start = 0; /* end of epoch */
+
+ /* Wmax and fast convergence */
+ if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ / (2 * BICTCP_BETA_SCALE);
+ else
+ ca->last_max_cwnd = tp->snd_cwnd;
+
+ return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+{
+ if (new_state == TCP_CA_Loss) {
+ bictcp_reset(inet_csk_ca(sk));
+ bictcp_hystart_reset(sk);
+ }
+}
+
+#define GSO_MAX_SIZE 65536
+
+/* Account for TSO/GRO delays.
+ * Otherwise short RTT flows could get too small ssthresh, since during
+ * slow start we begin with small TSO packets and ca->delay_min would
+ * not account for long aggregation delay when TSO packets get bigger.
+ * Ideally even with a very small RTT we would like to have at least one
+ * TSO packet being sent and received by GRO, and another one in qdisc layer.
+ * We apply another 100% factor because @rate is doubled at this point.
+ * We cap the cushion to 1ms.
+ */
+static __always_inline __u32 hystart_ack_delay(struct sock *sk)
+{
+ unsigned long rate;
+
+ rate = sk->sk_pacing_rate;
+ if (!rate)
+ return 0;
+ return min((__u64)USEC_PER_MSEC,
+ div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+}
+
+static __always_inline void hystart_update(struct sock *sk, __u32 delay)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+ __u32 threshold;
+
+ if (hystart_detect & HYSTART_ACK_TRAIN) {
+ __u32 now = bictcp_clock_us(sk);
+
+ /* first detection parameter - ack-train detection */
+ if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
+ ca->last_ack = now;
+
+ threshold = ca->delay_min + hystart_ack_delay(sk);
+
+ /* Hystart ack train triggers if we get ack past
+ * ca->delay_min/2.
+ * Pacing might have delayed packets up to RTT/2
+ * during slow start.
+ */
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ threshold >>= 1;
+
+ if ((__s32)(now - ca->round_start) > threshold) {
+ ca->found = 1;
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
+ }
+ }
+
+ if (hystart_detect & HYSTART_DELAY) {
+ /* obtain the minimum delay of more than sampling packets */
+ if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
+ if (ca->curr_rtt > delay)
+ ca->curr_rtt = delay;
+
+ ca->sample_cnt++;
+ } else {
+ if (ca->curr_rtt > ca->delay_min +
+ HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
+ ca->found = 1;
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
+ }
+ }
+}
+
+void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+ const struct ack_sample *sample)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+ __u32 delay;
+
+ /* Some calls are for duplicates without timetamps */
+ if (sample->rtt_us < 0)
+ return;
+
+ /* Discard delay samples right after fast recovery */
+ if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
+ return;
+
+ delay = sample->rtt_us;
+ if (delay == 0)
+ delay = 1;
+
+ /* first time call or link delay decreases */
+ if (ca->delay_min == 0 || ca->delay_min > delay)
+ ca->delay_min = delay;
+
+ /* hystart triggers when cwnd is larger than some threshold */
+ if (!ca->found && tcp_in_slow_start(tp) && hystart &&
+ tp->snd_cwnd >= hystart_low_window)
+ hystart_update(sk, delay);
+}
+
+__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return max(tp->snd_cwnd, tp->prior_cwnd);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops cubic = {
+ .init = (void *)bictcp_init,
+ .ssthresh = (void *)bictcp_recalc_ssthresh,
+ .cong_avoid = (void *)bictcp_cong_avoid,
+ .set_state = (void *)bictcp_state,
+ .undo_cwnd = (void *)tcp_reno_undo_cwnd,
+ .cwnd_event = (void *)bictcp_cwnd_event,
+ .pkts_acked = (void *)bictcp_acked,
+ .name = "bpf_cubic",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
new file mode 100644
index 000000000000..b631fb5032d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+/* WARNING: This implemenation is not necessarily the same
+ * as the tcp_dctcp.c. The purpose is mainly for testing
+ * the kernel BPF logic.
+ */
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define DCTCP_MAX_ALPHA 1024U
+
+struct dctcp {
+ __u32 old_delivered;
+ __u32 old_delivered_ce;
+ __u32 prior_rcv_nxt;
+ __u32 dctcp_alpha;
+ __u32 next_seq;
+ __u32 ce_state;
+ __u32 loss_cwnd;
+};
+
+static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
+static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
+
+static __always_inline void dctcp_reset(const struct tcp_sock *tp,
+ struct dctcp *ca)
+{
+ ca->next_seq = tp->snd_nxt;
+
+ ca->old_delivered = tp->delivered;
+ ca->old_delivered_ce = tp->delivered_ce;
+}
+
+SEC("struct_ops/dctcp_init")
+void BPF_PROG(dctcp_init, struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+ ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+ ca->loss_cwnd = 0;
+ ca->ce_state = 0;
+
+ dctcp_reset(tp, ca);
+}
+
+SEC("struct_ops/dctcp_ssthresh")
+__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ ca->loss_cwnd = tp->snd_cwnd;
+ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+}
+
+SEC("struct_ops/dctcp_update_alpha")
+void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ /* Expired RTT */
+ if (!before(tp->snd_una, ca->next_seq)) {
+ __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
+ __u32 alpha = ca->dctcp_alpha;
+
+ /* alpha = (1 - g) * alpha + g * F */
+
+ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
+ if (delivered_ce) {
+ __u32 delivered = tp->delivered - ca->old_delivered;
+
+ /* If dctcp_shift_g == 1, a 32bit value would overflow
+ * after 8 M packets.
+ */
+ delivered_ce <<= (10 - dctcp_shift_g);
+ delivered_ce /= max(1U, delivered);
+
+ alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
+ }
+ ca->dctcp_alpha = alpha;
+ dctcp_reset(tp, ca);
+ }
+}
+
+static __always_inline void dctcp_react_to_loss(struct sock *sk)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ ca->loss_cwnd = tp->snd_cwnd;
+ tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+SEC("struct_ops/dctcp_state")
+void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
+{
+ if (new_state == TCP_CA_Recovery &&
+ new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
+ dctcp_react_to_loss(sk);
+ /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ * one loss-adjustment per RTT.
+ */
+}
+
+static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (ce_state == 1)
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ else
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S: 0 <- last pkt was non-CE
+ * 1 <- last pkt was CE
+ */
+static __always_inline
+void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+ __u32 *prior_rcv_nxt, __u32 *ce_state)
+{
+ __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
+
+ if (*ce_state != new_ce_state) {
+ /* CE state has changed, force an immediate ACK to
+ * reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+ dctcp_ece_ack_cwr(sk, *ce_state);
+ bpf_tcp_send_ack(sk, *prior_rcv_nxt);
+ }
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ }
+ *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
+ *ce_state = new_ce_state;
+ dctcp_ece_ack_cwr(sk, new_ce_state);
+}
+
+SEC("struct_ops/dctcp_cwnd_event")
+void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ switch (ev) {
+ case CA_EVENT_ECN_IS_CE:
+ case CA_EVENT_ECN_NO_CE:
+ dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
+ break;
+ case CA_EVENT_LOSS:
+ dctcp_react_to_loss(sk);
+ break;
+ default:
+ /* Don't care for the rest. */
+ break;
+ }
+}
+
+SEC("struct_ops/dctcp_cwnd_undo")
+__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
+{
+ const struct dctcp *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
+SEC("struct_ops/tcp_reno_cong_avoid")
+void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!tcp_is_cwnd_limited(sk))
+ return;
+
+ /* In "safe" area, increase. */
+ if (tcp_in_slow_start(tp)) {
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
+ }
+ /* In dangerous area, increase slowly. */
+ tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_nouse = {
+ .init = (void *)dctcp_init,
+ .set_state = (void *)dctcp_state,
+ .flags = TCP_CONG_NEEDS_ECN,
+ .name = "bpf_dctcp_nouse",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp = {
+ .init = (void *)dctcp_init,
+ .in_ack_event = (void *)dctcp_update_alpha,
+ .cwnd_event = (void *)dctcp_cwnd_event,
+ .ssthresh = (void *)dctcp_ssthresh,
+ .cong_avoid = (void *)tcp_reno_cong_avoid,
+ .undo_cwnd = (void *)dctcp_cwnd_undo,
+ .set_state = (void *)dctcp_state,
+ .flags = TCP_CONG_NEEDS_ECN,
+ .name = "bpf_dctcp",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 5ae485a6af3f..9941f0ba471e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -16,8 +16,8 @@
#include <sys/socket.h>
#include <linux/if_tunnel.h>
#include <linux/mpls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
#define PROG(F) SEC(#F) int bpf_func_##F
@@ -65,8 +65,8 @@ struct {
} jmp_table SEC(".maps");
struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
__type(key, __u32);
__type(value, struct bpf_flow_keys);
} last_dissection SEC(".maps");
@@ -74,15 +74,20 @@ struct {
static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
int ret)
{
- struct bpf_flow_keys *val;
- __u32 key = 0;
+ __u32 key = (__u32)(keys->sport) << 16 | keys->dport;
+ struct bpf_flow_keys val;
- val = bpf_map_lookup_elem(&last_dissection, &key);
- if (val)
- memcpy(val, keys, sizeof(*val));
+ memcpy(&val, keys, sizeof(val));
+ bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
return ret;
}
+#define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF)
+static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
+{
+ return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
+}
+
static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
__u16 hdr_size,
void *buffer)
@@ -153,7 +158,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
struct tcphdr *tcp, _tcp;
struct udphdr *udp, _udp;
- keys->ip_proto = proto;
switch (proto) {
case IPPROTO_ICMP:
icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
@@ -162,9 +166,15 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
return export_flow_keys(keys, BPF_OK);
case IPPROTO_IPIP:
keys->is_encap = true;
+ if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return export_flow_keys(keys, BPF_OK);
+
return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
case IPPROTO_IPV6:
keys->is_encap = true;
+ if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return export_flow_keys(keys, BPF_OK);
+
return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
case IPPROTO_GRE:
gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
@@ -184,6 +194,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
keys->thoff += 4; /* Step over sequence number */
keys->is_encap = true;
+ if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return export_flow_keys(keys, BPF_OK);
if (gre->proto == bpf_htons(ETH_P_TEB)) {
eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
@@ -231,7 +243,6 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
{
struct bpf_flow_keys *keys = skb->flow_keys;
- keys->ip_proto = nexthdr;
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
@@ -266,6 +277,7 @@ PROG(IP)(struct __sk_buff *skb)
keys->addr_proto = ETH_P_IP;
keys->ipv4_src = iph->saddr;
keys->ipv4_dst = iph->daddr;
+ keys->ip_proto = iph->protocol;
keys->thoff += iph->ihl << 2;
if (data + keys->thoff > data_end)
@@ -273,13 +285,20 @@ PROG(IP)(struct __sk_buff *skb)
if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
keys->is_frag = true;
- if (iph->frag_off & bpf_htons(IP_OFFSET))
+ if (iph->frag_off & bpf_htons(IP_OFFSET)) {
/* From second fragment on, packets do not have headers
* we can parse.
*/
done = true;
- else
+ } else {
keys->is_first_frag = true;
+ /* No need to parse fragmented packet unless
+ * explicitly asked for.
+ */
+ if (!(keys->flags &
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
+ done = true;
+ }
}
if (done)
@@ -301,6 +320,11 @@ PROG(IPV6)(struct __sk_buff *skb)
memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
keys->thoff += sizeof(struct ipv6hdr);
+ keys->ip_proto = ip6h->nexthdr;
+ keys->flow_label = ip6_flowlabel(ip6h);
+
+ if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ return export_flow_keys(keys, BPF_OK);
return parse_ipv6_proto(skb, ip6h->nexthdr);
}
@@ -317,7 +341,8 @@ PROG(IPV6OP)(struct __sk_buff *skb)
/* hlen is in 8-octets and does not include the first 8 bytes
* of the header
*/
- skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
+ keys->thoff += (1 + ip6h->hdrlen) << 3;
+ keys->ip_proto = ip6h->nexthdr;
return parse_ipv6_proto(skb, ip6h->nexthdr);
}
@@ -333,9 +358,18 @@ PROG(IPV6FR)(struct __sk_buff *skb)
keys->thoff += sizeof(*fragh);
keys->is_frag = true;
- if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+ keys->ip_proto = fragh->nexthdr;
+
+ if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
keys->is_first_frag = true;
+ /* No need to parse fragmented packet unless
+ * explicitly asked for.
+ */
+ if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
+ return export_flow_keys(keys, BPF_OK);
+ }
+
return parse_ipv6_proto(skb, fragh->nexthdr);
}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
new file mode 100644
index 000000000000..018ed7fbba3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
new file mode 100644
index 000000000000..13d662c57014
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_dim x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
new file mode 100644
index 000000000000..a351f418c85d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_val_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c
new file mode 100644
index 000000000000..65eac371b061
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c
new file mode 100644
index 000000000000..ecda2b545ac2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
new file mode 100644
index 000000000000..a8735009becc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_non_array x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
new file mode 100644
index 000000000000..2a67c28b1e75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_shallow x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
new file mode 100644
index 000000000000..1142c08c925f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_small x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c
new file mode 100644
index 000000000000..f5a7c832d0f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_wrong_val_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c
new file mode 100644
index 000000000000..fe1d01232c22
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___fixed_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c
new file mode 100644
index 000000000000..cff6f1836cc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_bitfields x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c
new file mode 100644
index 000000000000..a1cd157d5451
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_bitfields___bit_sz_change x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c
new file mode 100644
index 000000000000..3f2c7b07c456
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_bitfields___bitfield_vs_int x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c
new file mode 100644
index 000000000000..f9746d6be399
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_bitfields___err_too_big_bitfield x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c
new file mode 100644
index 000000000000..e7c75a6953dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_bitfields___just_big_enough x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c
new file mode 100644
index 000000000000..0b62315ad46c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
new file mode 100644
index 000000000000..dd0ffa518f36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
new file mode 100644
index 000000000000..bc83372088ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
new file mode 100644
index 000000000000..917bec41be08
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
new file mode 100644
index 000000000000..6ec7e6ec1c91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
new file mode 100644
index 000000000000..7bbcacf2b0d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
new file mode 100644
index 000000000000..f384dd38ec70
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c
new file mode 100644
index 000000000000..aec2dec20e90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___minimal x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
new file mode 100644
index 000000000000..b74455b91227
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
new file mode 100644
index 000000000000..7b6035f86ee6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors__err_wrong_name x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
new file mode 100644
index 000000000000..7d0f041042c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
new file mode 100644
index 000000000000..f9359450186e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___bool x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
new file mode 100644
index 000000000000..aafb1c5819d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___reverse_sign x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c
new file mode 100644
index 000000000000..ed9ad8b5b4f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c
@@ -0,0 +1,5 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_misc___a x) {}
+void f2(struct core_reloc_misc___b x) {}
+void f3(struct core_reloc_misc_extensible x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
new file mode 100644
index 000000000000..124197a2e813
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
new file mode 100644
index 000000000000..f8a6592ca75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___mod_swap x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
new file mode 100644
index 000000000000..5c0d73687247
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___typedefs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
new file mode 100644
index 000000000000..4480fcc0f183
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
new file mode 100644
index 000000000000..13e108f76ece
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___anon_embed x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
new file mode 100644
index 000000000000..76b54fda5fbb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
@@ -0,0 +1,5 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___dup_compat_types x) {}
+void f2(struct core_reloc_nesting___dup_compat_types__2 x) {}
+void f3(struct core_reloc_nesting___dup_compat_types__3 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
new file mode 100644
index 000000000000..975fb95db810
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
new file mode 100644
index 000000000000..ad66c67e7980
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
new file mode 100644
index 000000000000..35c5f8da6812
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_dup_incompat_types__1 x) {}
+void f2(struct core_reloc_nesting___err_dup_incompat_types__2 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
new file mode 100644
index 000000000000..142e332041db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
new file mode 100644
index 000000000000..efcae167fab9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
new file mode 100644
index 000000000000..97aaaedd8ada
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_nonstruct_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
new file mode 100644
index 000000000000..ffde35086e90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_partial_match_dups__a x) {}
+void f2(struct core_reloc_nesting___err_partial_match_dups__b x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
new file mode 100644
index 000000000000..39a2fadd8e95
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_too_deep x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
new file mode 100644
index 000000000000..a09d9dfb20df
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___extra_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
new file mode 100644
index 000000000000..3d8a1a74012f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___struct_union_mixup x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
new file mode 100644
index 000000000000..96b90e39242a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
new file mode 100644
index 000000000000..6e87233a3ed0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_enum_def x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
new file mode 100644
index 000000000000..d9f48e80b9d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_func_proto x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
new file mode 100644
index 000000000000..c718f75f8f3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_ptr_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
new file mode 100644
index 000000000000..b8a120830891
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_enum x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
new file mode 100644
index 000000000000..ad8b3c9aa76f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_int x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
new file mode 100644
index 000000000000..e20bc1d42d0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_ptr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
new file mode 100644
index 000000000000..8da52432ba17
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ptr_as_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
new file mode 100644
index 000000000000..003acfc9a3e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ptr_as_arr___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c
new file mode 100644
index 000000000000..3c80903da5a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c
new file mode 100644
index 000000000000..6dbd14436b52
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 3a62119c7498..35c512818a56 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -62,6 +62,10 @@ struct padded_a_lot {
* long: 64;
* long: 64;
* int b;
+ * long: 32;
+ * long: 64;
+ * long: 64;
+ * long: 64;
*};
*
*/
@@ -95,7 +99,6 @@ struct zone_padding {
struct zone {
int a;
short b;
- short: 16;
struct zone_padding __pad__;
};
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1fd244d35ba9..75085119c5bb 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -9,8 +9,8 @@
#include <linux/in6.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 26397ab7b3c7..506d0f81a375 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -9,8 +9,8 @@
#include <linux/in6.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define SRC_REWRITE_IP6_0 0
#define SRC_REWRITE_IP6_1 0
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
new file mode 100644
index 000000000000..6d598cfbdb3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -0,0 +1,806 @@
+#include <stdint.h>
+#include <stdbool.h>
+/*
+ * KERNEL
+ */
+
+struct core_reloc_kernel_output {
+ int valid[10];
+ char comm[sizeof("test_progs")];
+ int comm_len;
+};
+
+/*
+ * FLAVORS
+ */
+struct core_reloc_flavors {
+ int a;
+ int b;
+ int c;
+};
+
+/* this is not a flavor, as it doesn't have triple underscore */
+struct core_reloc_flavors__err_wrong_name {
+ int a;
+ int b;
+ int c;
+};
+
+/*
+ * NESTING
+ */
+/* original set up, used to record relocations in BPF program */
+struct core_reloc_nesting_substruct {
+ int a;
+};
+
+union core_reloc_nesting_subunion {
+ int b;
+};
+
+struct core_reloc_nesting {
+ union {
+ struct core_reloc_nesting_substruct a;
+ } a;
+ struct {
+ union core_reloc_nesting_subunion b;
+ } b;
+};
+
+/* inlined anonymous struct/union instead of named structs in original */
+struct core_reloc_nesting___anon_embed {
+ int __just_for_padding;
+ union {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ union {
+ int b;
+ } b;
+ } b;
+};
+
+/* different mix of nested structs/unions than in original */
+struct core_reloc_nesting___struct_union_mixup {
+ int __a;
+ struct {
+ int __a;
+ union {
+ char __a;
+ int a;
+ } a;
+ } a;
+ int __b;
+ union {
+ int __b;
+ union {
+ char __b;
+ int b;
+ } b;
+ } b;
+};
+
+/* extra anon structs/unions, but still valid a.a.a and b.b.b accessors */
+struct core_reloc_nesting___extra_nesting {
+ int __padding;
+ struct {
+ struct {
+ struct {
+ struct {
+ union {
+ int a;
+ } a;
+ };
+ };
+ } a;
+ int __some_more;
+ struct {
+ union {
+ union {
+ union {
+ struct {
+ int b;
+ };
+ } b;
+ };
+ } b;
+ };
+ };
+};
+
+/* three flavors of same struct with different structure but same layout for
+ * a.a.a and b.b.b, thus successfully resolved and relocatable */
+struct core_reloc_nesting___dup_compat_types {
+ char __just_for_padding;
+ /* 3 more bytes of padding */
+ struct {
+ struct {
+ int a; /* offset 4 */
+ } a;
+ } a;
+ long long __more_padding;
+ struct {
+ struct {
+ int b; /* offset 16 */
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___dup_compat_types__2 {
+ int __aligned_padding;
+ struct {
+ int __trickier_noop[0];
+ struct {
+ char __some_more_noops[0];
+ int a; /* offset 4 */
+ } a;
+ } a;
+ int __more_padding;
+ struct {
+ struct {
+ struct {
+ int __critical_padding;
+ int b; /* offset 16 */
+ } b;
+ int __does_not_matter;
+ };
+ } b;
+ int __more_irrelevant_stuff;
+};
+
+struct core_reloc_nesting___dup_compat_types__3 {
+ char __correct_padding[4];
+ struct {
+ struct {
+ int a; /* offset 4 */
+ } a;
+ } a;
+ /* 8 byte padding due to next struct's alignment */
+ struct {
+ struct {
+ int b;
+ } b;
+ } b __attribute__((aligned(16)));
+};
+
+/* b.b.b field is missing */
+struct core_reloc_nesting___err_missing_field {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int x;
+ } b;
+ } b;
+};
+
+/* b.b.b field is an array of integers instead of plain int */
+struct core_reloc_nesting___err_array_field {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b[1];
+ } b;
+ } b;
+};
+
+/* middle b container is missing */
+struct core_reloc_nesting___err_missing_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ int x;
+ } b;
+};
+
+/* middle b container is referenced through pointer instead of being embedded */
+struct core_reloc_nesting___err_nonstruct_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b;
+ } *b;
+ } b;
+};
+
+/* middle b container is an array of structs instead of plain struct */
+struct core_reloc_nesting___err_array_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b;
+ } b[1];
+ } b;
+};
+
+/* two flavors of same struct with incompatible layout for b.b.b */
+struct core_reloc_nesting___err_dup_incompat_types__1 {
+ struct {
+ struct {
+ int a; /* offset 0 */
+ } a;
+ } a;
+ struct {
+ struct {
+ int b; /* offset 4 */
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___err_dup_incompat_types__2 {
+ struct {
+ struct {
+ int a; /* offset 0 */
+ } a;
+ } a;
+ int __extra_padding;
+ struct {
+ struct {
+ int b; /* offset 8 (!) */
+ } b;
+ } b;
+};
+
+/* two flavors of same struct having one of a.a.a and b.b.b, but not both */
+struct core_reloc_nesting___err_partial_match_dups__a {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+};
+
+struct core_reloc_nesting___err_partial_match_dups__b {
+ struct {
+ struct {
+ int b;
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___err_too_deep {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ /* 65 levels of nestedness for b.b.b */
+ struct {
+ struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ /* this one is one too much */
+ struct {
+ int b;
+ };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ } b;
+ } b;
+};
+
+/*
+ * ARRAYS
+ */
+struct core_reloc_arrays_output {
+ int a2;
+ char b123;
+ int c1c;
+ int d00d;
+ int f10c;
+};
+
+struct core_reloc_arrays_substruct {
+ int c;
+ int d;
+};
+
+struct core_reloc_arrays {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+/* bigger array dimensions */
+struct core_reloc_arrays___diff_arr_dim {
+ int a[7];
+ char b[3][4][5];
+ struct core_reloc_arrays_substruct c[4];
+ struct core_reloc_arrays_substruct d[2][3];
+ struct core_reloc_arrays_substruct f[1][3];
+};
+
+/* different size of array's value (struct) */
+struct core_reloc_arrays___diff_arr_val_sz {
+ int a[5];
+ char b[2][3][4];
+ struct {
+ int __padding1;
+ int c;
+ int __padding2;
+ } c[3];
+ struct {
+ int __padding1;
+ int d;
+ int __padding2;
+ } d[1][2];
+ struct {
+ int __padding1;
+ int c;
+ int __padding2;
+ } f[][2];
+};
+
+struct core_reloc_arrays___equiv_zero_sz_arr {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ /* equivalent to flexible array */
+ struct core_reloc_arrays_substruct f[0][2];
+};
+
+struct core_reloc_arrays___fixed_arr {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ /* not a flexible array anymore, but within access bounds */
+ struct core_reloc_arrays_substruct f[1][2];
+};
+
+struct core_reloc_arrays___err_too_small {
+ int a[2]; /* this one is too small */
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+struct core_reloc_arrays___err_too_shallow {
+ int a[5];
+ char b[2][3]; /* this one lacks one dimension */
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+struct core_reloc_arrays___err_non_array {
+ int a; /* not an array */
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+struct core_reloc_arrays___err_wrong_val_type {
+ int a[5];
+ char b[2][3][4];
+ int c[3]; /* value is not a struct */
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+struct core_reloc_arrays___err_bad_zero_sz_arr {
+ /* zero-sized array, but not at the end */
+ struct core_reloc_arrays_substruct f[0][2];
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+/*
+ * PRIMITIVES
+ */
+enum core_reloc_primitives_enum {
+ A = 0,
+ B = 1,
+};
+
+struct core_reloc_primitives {
+ char a;
+ int b;
+ enum core_reloc_primitives_enum c;
+ void *d;
+ int (*f)(const char *);
+};
+
+struct core_reloc_primitives___diff_enum_def {
+ char a;
+ int b;
+ void *d;
+ int (*f)(const char *);
+ enum {
+ X = 100,
+ Y = 200,
+ } c; /* inline enum def with differing set of values */
+};
+
+struct core_reloc_primitives___diff_func_proto {
+ void (*f)(int); /* incompatible function prototype */
+ void *d;
+ enum core_reloc_primitives_enum c;
+ int b;
+ char a;
+};
+
+struct core_reloc_primitives___diff_ptr_type {
+ const char * const d; /* different pointee type + modifiers */
+ char a;
+ int b;
+ enum core_reloc_primitives_enum c;
+ int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_enum {
+ char a[1];
+ int b;
+ int c; /* int instead of enum */
+ void *d;
+ int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_int {
+ char a[1];
+ int *b; /* ptr instead of int */
+ enum core_reloc_primitives_enum c;
+ void *d;
+ int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_ptr {
+ char a[1];
+ int b;
+ enum core_reloc_primitives_enum c;
+ int d; /* int instead of ptr */
+ int (*f)(const char *);
+};
+
+/*
+ * MODS
+ */
+struct core_reloc_mods_output {
+ int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+ int x;
+ int y;
+};
+
+typedef struct {
+ int x;
+ int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+ int a;
+ int_t b;
+ char *c;
+ char_ptr_t d;
+ int e[3];
+ arr_t f;
+ struct core_reloc_mods_substruct g;
+ core_reloc_mods_substruct_t h;
+};
+
+/* a/b, c/d, e/f, and g/h pairs are swapped */
+struct core_reloc_mods___mod_swap {
+ int b;
+ int_t a;
+ char *d;
+ char_ptr_t c;
+ int f[3];
+ arr_t e;
+ struct {
+ int y;
+ int x;
+ } h;
+ core_reloc_mods_substruct_t g;
+};
+
+typedef int int1_t;
+typedef int1_t int2_t;
+typedef int2_t int3_t;
+
+typedef int arr1_t[5];
+typedef arr1_t arr2_t;
+typedef arr2_t arr3_t;
+typedef arr3_t arr4_t;
+
+typedef const char * const volatile fancy_char_ptr_t;
+
+typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
+
+/* we need more typedefs */
+struct core_reloc_mods___typedefs {
+ core_reloc_mods_substruct_tt g;
+ core_reloc_mods_substruct_tt h;
+ arr4_t f;
+ arr4_t e;
+ fancy_char_ptr_t d;
+ fancy_char_ptr_t c;
+ int3_t b;
+ int3_t a;
+};
+
+/*
+ * PTR_AS_ARR
+ */
+struct core_reloc_ptr_as_arr {
+ int a;
+};
+
+struct core_reloc_ptr_as_arr___diff_sz {
+ int :32; /* padding */
+ char __some_more_padding;
+ int a;
+};
+
+/*
+ * INTS
+ */
+struct core_reloc_ints {
+ uint8_t u8_field;
+ int8_t s8_field;
+ uint16_t u16_field;
+ int16_t s16_field;
+ uint32_t u32_field;
+ int32_t s32_field;
+ uint64_t u64_field;
+ int64_t s64_field;
+};
+
+/* signed/unsigned types swap */
+struct core_reloc_ints___reverse_sign {
+ int8_t u8_field;
+ uint8_t s8_field;
+ int16_t u16_field;
+ uint16_t s16_field;
+ int32_t u32_field;
+ uint32_t s32_field;
+ int64_t u64_field;
+ uint64_t s64_field;
+};
+
+struct core_reloc_ints___bool {
+ bool u8_field; /* bool instead of uint8 */
+ int8_t s8_field;
+ uint16_t u16_field;
+ int16_t s16_field;
+ uint32_t u32_field;
+ int32_t s32_field;
+ uint64_t u64_field;
+ int64_t s64_field;
+};
+
+/*
+ * MISC
+ */
+struct core_reloc_misc_output {
+ int a, b, c;
+};
+
+struct core_reloc_misc___a {
+ int a1;
+ int a2;
+};
+
+struct core_reloc_misc___b {
+ int b1;
+ int b2;
+};
+
+/* this one extends core_reloc_misc_extensible struct from BPF prog */
+struct core_reloc_misc_extensible {
+ int a;
+ int b;
+ int c;
+ int d;
+};
+
+/*
+ * EXISTENCE
+ */
+struct core_reloc_existence_output {
+ int a_exists;
+ int a_value;
+ int b_exists;
+ int b_value;
+ int c_exists;
+ int c_value;
+ int arr_exists;
+ int arr_value;
+ int s_exists;
+ int s_value;
+};
+
+struct core_reloc_existence {
+ int a;
+ struct {
+ int b;
+ };
+ int c;
+ int arr[1];
+ struct {
+ int x;
+ } s;
+};
+
+struct core_reloc_existence___minimal {
+ int a;
+};
+
+struct core_reloc_existence___err_wrong_int_sz {
+ short a;
+};
+
+struct core_reloc_existence___err_wrong_int_type {
+ int b[1];
+};
+
+struct core_reloc_existence___err_wrong_int_kind {
+ struct{ int x; } c;
+};
+
+struct core_reloc_existence___err_wrong_arr_kind {
+ int arr;
+};
+
+struct core_reloc_existence___err_wrong_arr_value_type {
+ short arr[1];
+};
+
+struct core_reloc_existence___err_wrong_struct_type {
+ int s;
+};
+
+/*
+ * BITFIELDS
+ */
+/* bitfield read results, all as plain integers */
+struct core_reloc_bitfields_output {
+ int64_t ub1;
+ int64_t ub2;
+ int64_t ub7;
+ int64_t sb4;
+ int64_t sb20;
+ int64_t u32;
+ int64_t s32;
+};
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+};
+
+/* different bit sizes (both up and down) */
+struct core_reloc_bitfields___bit_sz_change {
+ /* unsigned bitfields */
+ uint16_t ub1: 3; /* 1 -> 3 */
+ uint32_t ub2: 20; /* 2 -> 20 */
+ uint8_t ub7: 1; /* 7 -> 1 */
+ /* signed bitfields */
+ int8_t sb4: 1; /* 4 -> 1 */
+ int32_t sb20: 30; /* 20 -> 30 */
+ /* non-bitfields */
+ uint16_t u32; /* 32 -> 16 */
+ int64_t s32; /* 32 -> 64 */
+};
+
+/* turn bitfield into non-bitfield and vice versa */
+struct core_reloc_bitfields___bitfield_vs_int {
+ uint64_t ub1; /* 3 -> 64 non-bitfield */
+ uint8_t ub2; /* 20 -> 8 non-bitfield */
+ int64_t ub7; /* 7 -> 64 non-bitfield signed */
+ int64_t sb4; /* 4 -> 64 non-bitfield signed */
+ uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */
+ int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
+ uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */
+};
+
+struct core_reloc_bitfields___just_big_enough {
+ uint64_t ub1: 4;
+ uint64_t ub2: 60; /* packed tightly */
+ uint32_t ub7;
+ uint32_t sb4;
+ uint32_t sb20;
+ uint32_t u32;
+ uint32_t s32;
+} __attribute__((packed)) ;
+
+struct core_reloc_bitfields___err_too_big_bitfield {
+ uint64_t ub1: 4;
+ uint64_t ub2: 61; /* packed tightly */
+ uint32_t ub7;
+ uint32_t sb4;
+ uint32_t sb20;
+ uint32_t u32;
+ uint32_t s32;
+} __attribute__((packed)) ;
+
+/*
+ * SIZE
+ */
+struct core_reloc_size_output {
+ int int_sz;
+ int struct_sz;
+ int union_sz;
+ int arr_sz;
+ int arr_elem_sz;
+ int ptr_sz;
+ int enum_sz;
+};
+
+struct core_reloc_size {
+ int int_field;
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE = 123 } enum_field;
+};
+
+struct core_reloc_size___diff_sz {
+ uint64_t int_field;
+ struct { int x; int y; int z; } struct_field;
+ union { int x; char bla[123]; } union_field;
+ char arr_field[10];
+ void *ptr_field;
+ enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+};
diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c
index ce41a3475f27..8924e06bdef0 100644
--- a/tools/testing/selftests/bpf/progs/dev_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c
@@ -7,7 +7,7 @@
#include <linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("cgroup/dev")
int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
new file mode 100644
index 000000000000..38d3a82144ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ test1_result = a == 1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, __u64 b)
+{
+ test2_result = a == 2 && b == 3;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test3, char a, int b, __u64 c)
+{
+ test3_result = a == 4 && b == 5 && c == 6;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG(test4, void *a, char b, int c, __u64 d)
+{
+ test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e)
+{
+ test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15;
+ return 0;
+}
+
+__u64 test6_result = 0;
+SEC("fentry/bpf_fentry_test6")
+int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f)
+{
+ test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
new file mode 100644
index 000000000000..c329fccf9842
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_trace_helpers.h"
+
+struct sk_buff {
+ unsigned int len;
+};
+
+__u64 test_result = 0;
+SEC("fexit/test_pkt_access")
+int BPF_PROG(test_main, struct sk_buff *skb, int ret)
+{
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ret != 0)
+ return 0;
+ test_result = 1;
+ return 0;
+}
+
+__u64 test_result_subprog1 = 0;
+SEC("fexit/test_pkt_access_subprog1")
+int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret)
+{
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ret != 148)
+ return 0;
+ test_result_subprog1 = 1;
+ return 0;
+}
+
+/* Though test_pkt_access_subprog2() is defined in C as:
+ * static __attribute__ ((noinline))
+ * int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb)
+ * {
+ * return skb->len * val;
+ * }
+ * llvm optimizations remove 'int val' argument and generate BPF assembly:
+ * r0 = *(u32 *)(r1 + 0)
+ * w0 <<= 1
+ * exit
+ * In such case the verifier falls back to conservative and
+ * tracing program can access arguments and return value as u64
+ * instead of accurate types.
+ */
+struct args_subprog2 {
+ __u64 args[5];
+ __u64 ret;
+};
+__u64 test_result_subprog2 = 0;
+SEC("fexit/test_pkt_access_subprog2")
+int test_subprog2(struct args_subprog2 *ctx)
+{
+ struct sk_buff *skb = (void *)ctx->args[0];
+ __u64 ret;
+ int len;
+
+ bpf_probe_read_kernel(&len, sizeof(len),
+ __builtin_preserve_access_index(&skb->len));
+
+ ret = ctx->ret;
+ /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
+ * which randomizes upper 32 bits after BPF_ALU32 insns.
+ * Hence after 'w0 <<= 1' upper bits of $rax are random.
+ * That is expected and correct. Trim them.
+ */
+ ret = (__u32) ret;
+ if (len != 74 || ret != 148)
+ return 0;
+ test_result_subprog2 = 1;
+ return 0;
+}
+
+__u64 test_result_subprog3 = 0;
+SEC("fexit/test_pkt_access_subprog3")
+int BPF_PROG(test_subprog3, int val, struct sk_buff *skb, int ret)
+{
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ret != 74 * val || val != 3)
+ return 0;
+ test_result_subprog3 = 1;
+ return 0;
+}
+
+__u64 test_get_skb_len = 0;
+SEC("freplace/get_skb_len")
+int new_get_skb_len(struct __sk_buff *skb)
+{
+ int len = skb->len;
+
+ if (len != 74)
+ return 0;
+ test_get_skb_len = 1;
+ return 74; /* original get_skb_len() returns skb->len */
+}
+
+__u64 test_get_skb_ifindex = 0;
+SEC("freplace/get_skb_ifindex")
+int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ipv6hdr ip6, *ip6p;
+ int ifindex = skb->ifindex;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ /* check that BPF extension can read packet via direct packet access */
+ if (data + 14 + sizeof(ip6) > data_end)
+ return 0;
+ ip6p = data + 14;
+
+ if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123))
+ return 0;
+
+ /* check that legacy packet access helper works too */
+ if (bpf_skb_load_bytes(skb, 14, &ip6, sizeof(ip6)) < 0)
+ return 0;
+ ip6p = &ip6;
+ if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123))
+ return 0;
+
+ if (ifindex != 1 || val != 3 || var != 1)
+ return 0;
+ test_get_skb_ifindex = 1;
+ return 3; /* original get_skb_ifindex() returns val * ifindex * var */
+}
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int new_get_constant(long val)
+{
+ if (val != 123)
+ return 0;
+ test_get_constant = 1;
+ return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
new file mode 100644
index 000000000000..92f3fa47cf40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+
+struct sk_buff {
+ unsigned int len;
+};
+
+__u64 test_result = 0;
+
+SEC("fexit/test_pkt_md_access")
+int BPF_PROG(test_main2, struct sk_buff *skb, int ret)
+{
+ int len;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ }));
+ if (len != 74 || ret != 0)
+ return 0;
+
+ test_result = 1;
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
new file mode 100644
index 000000000000..348109b9ea07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(test1, int a, int ret)
+{
+ test1_result = a == 1 && ret == 2;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a, __u64 b, int ret)
+{
+ test2_result = a == 2 && b == 3 && ret == 5;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fexit/bpf_fentry_test3")
+int BPF_PROG(test3, char a, int b, __u64 c, int ret)
+{
+ test3_result = a == 4 && b == 5 && c == 6 && ret == 15;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("fexit/bpf_fentry_test4")
+int BPF_PROG(test4, void *a, char b, int c, __u64 d, int ret)
+{
+ test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10 &&
+ ret == 34;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fexit/bpf_fentry_test5")
+int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e, int ret)
+{
+ test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 &&
+ e == 15 && ret == 65;
+ return 0;
+}
+
+__u64 test6_result = 0;
+SEC("fexit/bpf_fentry_test6")
+int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret)
+{
+ test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && ret == 111;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 16c54ade6888..6b42db2fe391 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -2,7 +2,7 @@
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
new file mode 100644
index 000000000000..8f48a909f079
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
+#define _(P) (__builtin_preserve_access_index(P))
+
+/* define few struct-s that bpf program needs to access */
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+};
+struct dev_ifalias {
+ struct callback_head rcuhead;
+};
+
+struct net_device /* same as kernel's struct net_device */ {
+ int ifindex;
+ struct dev_ifalias *ifalias;
+};
+
+typedef struct {
+ int counter;
+} atomic_t;
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+struct sk_buff {
+ /* field names and sizes should match to those in the kernel */
+ unsigned int len, data_len;
+ __u16 mac_len, hdr_len, queue_mapping;
+ struct net_device *dev;
+ /* order of the fields doesn't matter */
+ refcount_t users;
+ unsigned char *data;
+ char __pkt_type_offset[0];
+ char cb[48];
+};
+
+struct meta {
+ int ifindex;
+ __u32 cb32_0;
+ __u8 cb8_0;
+};
+
+/* TRACE_EVENT(kfree_skb,
+ * TP_PROTO(struct sk_buff *skb, void *location),
+ */
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
+{
+ struct net_device *dev;
+ struct callback_head *ptr;
+ void *func;
+ int users;
+ unsigned char *data;
+ unsigned short pkt_data;
+ struct meta meta = {};
+ char pkt_type;
+ __u32 *cb32;
+ __u8 *cb8;
+
+ __builtin_preserve_access_index(({
+ users = skb->users.refs.counter;
+ data = skb->data;
+ dev = skb->dev;
+ ptr = dev->ifalias->rcuhead.next;
+ func = ptr->func;
+ cb8 = (__u8 *)&skb->cb;
+ cb32 = (__u32 *)&skb->cb;
+ }));
+
+ meta.ifindex = _(dev->ifindex);
+ meta.cb8_0 = cb8[8];
+ meta.cb32_0 = cb32[2];
+
+ bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
+ pkt_type &= 7;
+
+ /* read eth proto */
+ bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
+
+ bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
+ bpf_printk("skb->len %d users %d pkt_type %x\n",
+ _(skb->len), users, pkt_type);
+ bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping));
+ bpf_printk("dev->ifindex %d data %llx pkt_data %x\n",
+ meta.ifindex, data, pkt_data);
+ bpf_printk("cb8_0:%x cb32_0:%x\n", meta.cb8_0, meta.cb32_0);
+
+ if (users != 1 || pkt_data != bpf_htons(0x86dd) || meta.ifindex != 1)
+ /* raw tp ignores return value */
+ return 0;
+
+ /* send first 72 byte of the packet to user space */
+ bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU,
+ &meta, sizeof(meta));
+ return 0;
+}
+
+static volatile struct {
+ bool fentry_test_ok;
+ bool fexit_test_ok;
+} result;
+
+SEC("fentry/eth_type_trans")
+int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
+ unsigned short protocol)
+{
+ int len, ifindex;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ ifindex = dev->ifindex;
+ }));
+
+ /* fentry sees full packet including L2 header */
+ if (len != 74 || ifindex != 1)
+ return 0;
+ result.fentry_test_ok = true;
+ return 0;
+}
+
+SEC("fexit/eth_type_trans")
+int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
+ unsigned short protocol)
+{
+ int len, ifindex;
+
+ __builtin_preserve_access_index(({
+ len = skb->len;
+ ifindex = dev->ifindex;
+ }));
+
+ /* fexit sees packet without L2 header that eth_type_trans should have
+ * consumed.
+ */
+ if (len != 60 || protocol != bpf_htons(0x86dd) || ifindex != 1)
+ return 0;
+ result.fexit_test_ok = true;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 7cdb7f878310..50e66772c046 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -6,7 +6,8 @@
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 9b2f808a2863..947bb7e988c2 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -6,7 +6,8 @@
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index d727657d51e2..76e93b31c14b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -6,7 +6,8 @@
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
new file mode 100644
index 000000000000..b35337926d66
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("socket")
+int combinations(volatile struct __sk_buff* skb)
+{
+ int ret = 0, i;
+
+#pragma nounroll
+ for (i = 0; i < 20; i++)
+ if (skb->len)
+ ret |= 1 << i;
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
new file mode 100644
index 000000000000..913791923fa3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+char _license[] SEC("license") = "GPL";
+
+SEC("socket")
+int while_true(volatile struct __sk_buff* skb)
+{
+ int i = 0;
+
+ while (1) {
+ if (skb->len)
+ i += 3;
+ else
+ i += 7;
+ if (i == 9)
+ break;
+ barrier();
+ if (i == 10)
+ break;
+ barrier();
+ if (i == 13)
+ break;
+ barrier();
+ if (i == 14)
+ break;
+ }
+ return i;
+}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index 38a997852cad..d071adf178bd 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -2,7 +2,7 @@
#include <linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "netcnt_common.h"
#define MAX_BPS (3 * 1024 * 1024)
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 003fe106fc70..cc615b82b56e 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -6,7 +6,7 @@
#include <stddef.h>
#include <stdbool.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
void* thread_state;
int key;
- bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
- bpf_probe_read(&thread_state, sizeof(thread_state),
- tls_base + 0x310 + key * 0x10 + 0x08);
+ bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+ bpf_probe_read_user(&thread_state, sizeof(thread_state),
+ tls_base + 0x310 + key * 0x10 + 0x08);
return thread_state;
}
@@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
FrameData *frame, Symbol *symbol)
{
// read data from PyFrameObject
- bpf_probe_read(&frame->f_back,
- sizeof(frame->f_back),
- frame_ptr + pidData->offsets.PyFrameObject_back);
- bpf_probe_read(&frame->f_code,
- sizeof(frame->f_code),
- frame_ptr + pidData->offsets.PyFrameObject_code);
+ bpf_probe_read_user(&frame->f_back,
+ sizeof(frame->f_back),
+ frame_ptr + pidData->offsets.PyFrameObject_back);
+ bpf_probe_read_user(&frame->f_code,
+ sizeof(frame->f_code),
+ frame_ptr + pidData->offsets.PyFrameObject_code);
// read data from PyCodeObject
if (!frame->f_code)
return false;
- bpf_probe_read(&frame->co_filename,
- sizeof(frame->co_filename),
- frame->f_code + pidData->offsets.PyCodeObject_filename);
- bpf_probe_read(&frame->co_name,
- sizeof(frame->co_name),
- frame->f_code + pidData->offsets.PyCodeObject_name);
+ bpf_probe_read_user(&frame->co_filename,
+ sizeof(frame->co_filename),
+ frame->f_code + pidData->offsets.PyCodeObject_filename);
+ bpf_probe_read_user(&frame->co_name,
+ sizeof(frame->co_name),
+ frame->f_code + pidData->offsets.PyCodeObject_name);
// read actual names into symbol
if (frame->co_filename)
- bpf_probe_read_str(&symbol->file,
- sizeof(symbol->file),
- frame->co_filename + pidData->offsets.String_data);
+ bpf_probe_read_user_str(&symbol->file,
+ sizeof(symbol->file),
+ frame->co_filename +
+ pidData->offsets.String_data);
if (frame->co_name)
- bpf_probe_read_str(&symbol->name,
- sizeof(symbol->name),
- frame->co_name + pidData->offsets.String_data);
+ bpf_probe_read_user_str(&symbol->name,
+ sizeof(symbol->name),
+ frame->co_name +
+ pidData->offsets.String_data);
return true;
}
@@ -152,7 +154,12 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");
-static __always_inline int __on_event(struct pt_regs *ctx)
+#ifdef GLOBAL_FUNC
+__attribute__((noinline))
+#else
+static __always_inline
+#endif
+int __on_event(struct bpf_raw_tracepoint_args *ctx)
{
uint64_t pid_tgid = bpf_get_current_pid_tgid();
pid_t pid = (pid_t)(pid_tgid >> 32);
@@ -174,9 +181,9 @@ static __always_inline int __on_event(struct pt_regs *ctx)
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
void* thread_state_current = (void*)0;
- bpf_probe_read(&thread_state_current,
- sizeof(thread_state_current),
- (void*)(long)pidData->current_state_addr);
+ bpf_probe_read_user(&thread_state_current,
+ sizeof(thread_state_current),
+ (void*)(long)pidData->current_state_addr);
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
void* tls_base = (void*)task;
@@ -188,11 +195,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
if (pidData->use_tls) {
uint64_t pthread_created;
uint64_t pthread_self;
- bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+ bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
+ tls_base + 0x10);
- bpf_probe_read(&pthread_created,
- sizeof(pthread_created),
- thread_state + pidData->offsets.PyThreadState_thread);
+ bpf_probe_read_user(&pthread_created,
+ sizeof(pthread_created),
+ thread_state +
+ pidData->offsets.PyThreadState_thread);
event->pthread_match = pthread_created == pthread_self;
} else {
event->pthread_match = 1;
@@ -204,9 +213,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
Symbol sym = {};
int cur_cpu = bpf_get_smp_processor_id();
- bpf_probe_read(&frame_ptr,
- sizeof(frame_ptr),
- thread_state + pidData->offsets.PyThreadState_frame);
+ bpf_probe_read_user(&frame_ptr,
+ sizeof(frame_ptr),
+ thread_state +
+ pidData->offsets.PyThreadState_frame);
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
@@ -249,7 +259,7 @@ static __always_inline int __on_event(struct pt_regs *ctx)
}
SEC("raw_tracepoint/kfree_skb")
-int on_event(struct pt_regs* ctx)
+int on_event(struct bpf_raw_tracepoint_args* ctx)
{
int i, ret = 0;
ret |= __on_event(ctx);
diff --git a/tools/testing/selftests/bpf/progs/pyperf_global.c b/tools/testing/selftests/bpf/progs/pyperf_global.c
new file mode 100644
index 000000000000..079e78a7562b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_global.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define GLOBAL_FUNC
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
index 0756303676ac..1612a32007b6 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct bpf_map_def SEC("maps") htab = {
.type = BPF_MAP_TYPE_HASH,
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index a91536b1c47e..092d9da536f3 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -5,8 +5,8 @@
#include <linux/bpf.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
#define SRC2_IP4 0x00000000U
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index a68062820410..255a432bc163 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -5,8 +5,8 @@
#include <linux/bpf.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define SRC_REWRITE_IP6_0 0
#define SRC_REWRITE_IP6_1 0
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index e4440fdd94cb..0cb5656a22b0 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -4,8 +4,8 @@
#include <linux/bpf.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct socket_cookie {
__u64 cookie_key;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 9390e0244259..a5c6d5903b22 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -1,6 +1,6 @@
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index e80484d98a1a..fdb4bf4408fa 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -1,7 +1,7 @@
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 433e23918a62..4797dc985064 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,6 +1,6 @@
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
new file mode 100644
index 000000000000..c6d428a8d785
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM 0xdeadbeef
+#define CUSTOM_INHERIT1 0
+#define CUSTOM_INHERIT2 1
+#define CUSTOM_LISTENER 2
+
+struct sockopt_inherit {
+ __u8 val;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} cloned1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} cloned2_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} listener_only_map SEC(".maps");
+
+static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx)
+{
+ if (ctx->optname == CUSTOM_INHERIT1)
+ return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ else if (ctx->optname == CUSTOM_INHERIT2)
+ return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ else
+ return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ struct sockopt_inherit *storage;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_CUSTOM)
+ return 1; /* only interested in SOL_CUSTOM */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = get_storage(ctx);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ optval[0] = storage->val;
+ ctx->optlen = 1;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ struct sockopt_inherit *storage;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_CUSTOM)
+ return 1; /* only interested in SOL_CUSTOM */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = get_storage(ctx);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ storage->val = optval[0];
+ ctx->optlen = -1;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 4afd2595c08e..9d8c212dde9f 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <netinet/in.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 076122c898e9..d5a5eeb5fb52 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1;
@@ -12,13 +14,12 @@ struct sockopt_sk {
__u8 val;
};
-struct bpf_map_def SEC("maps") socket_storage_map = {
- .type = BPF_MAP_TYPE_SK_STORAGE,
- .key_size = sizeof(int),
- .value_size = sizeof(struct sockopt_sk),
- .map_flags = BPF_F_NO_PREALLOC,
-};
-BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk);
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sockopt_sk);
+} socket_storage_map SEC(".maps");
SEC("cgroup/getsockopt")
int _getsockopt(struct bpf_sockopt *ctx)
@@ -42,6 +43,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
return 1;
}
+ if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
+ /* Not interested in SOL_TCP:TCP_CONGESTION;
+ * let next BPF program in the cgroup chain or kernel
+ * handle it.
+ */
+ return 1;
+ }
+
if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */
@@ -91,6 +100,18 @@ int _setsockopt(struct bpf_sockopt *ctx)
return 1;
}
+ if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
+ /* Always use cubic */
+
+ if (optval + 5 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ memcpy(optval, "cubic", 5);
+ ctx->optlen = 5;
+
+ return 1;
+ }
+
if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 8a399bdfd920..ad61b722a9de 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -8,7 +8,7 @@
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/types.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
typedef uint32_t pid_t;
struct task_struct {};
@@ -98,7 +98,7 @@ struct strobe_map_raw {
/*
* having volatile doesn't change anything on BPF side, but clang
* emits warnings for passing `volatile const char *` into
- * bpf_probe_read_str that expects just `const char *`
+ * bpf_probe_read_user_str that expects just `const char *`
*/
const char* tag;
/*
@@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
dtv_t *dtv;
void *tls_ptr;
- bpf_probe_read(&tls_index, sizeof(struct tls_index),
- (void *)loc->offset);
+ bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
+ (void *)loc->offset);
/* valid module index is always positive */
if (tls_index.module > 0) {
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
- bpf_probe_read(&dtv, sizeof(dtv),
- &((struct tcbhead *)tls_base)->dtv);
+ bpf_probe_read_user(&dtv, sizeof(dtv),
+ &((struct tcbhead *)tls_base)->dtv);
dtv += tls_index.module;
} else {
dtv = NULL;
}
- bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
+ bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
return tls_ptr && tls_ptr != (void *)-1
? tls_ptr + tls_index.offset
@@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
if (!location)
return;
- bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+ bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
data->int_vals[idx] = value->val;
if (value->header.len)
data->int_vals_set_mask |= (1 << idx);
@@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
if (!location)
return 0;
- bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
- len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+ len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
/*
- * if bpf_probe_read_str returns error (<0), due to casting to
+ * if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is
* sufficient to check for errors AND prove to BPF verifier, that
- * bpf_probe_read_str won't return anything bigger than
+ * bpf_probe_read_user_str won't return anything bigger than
* STROBE_MAX_STR_LEN
*/
if (len > STROBE_MAX_STR_LEN)
@@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
if (!location)
return payload;
- bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
- if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
+ bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+ if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
return payload;
descr->id = map.id;
@@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
- len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
payload += len;
@@ -413,17 +413,20 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
#else
#pragma unroll
#endif
- for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+ for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
+ if (i >= map.cnt)
+ break;
+
descr->key_lens[i] = 0;
- len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
- map.entries[i].key);
+ len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
payload += len;
}
descr->val_lens[i] = 0;
- len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
- map.entries[i].val);
+ len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
payload += len;
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
new file mode 100644
index 000000000000..1f407e65ae52
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ /* Multiple locations to make sure we patch
+ * all of them.
+ */
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
new file mode 100644
index 000000000000..a093e739cf0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 5);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 1);
+ return 0;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 2);
+ return 1;
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+ return 2;
+}
+
+SEC("classifier/3")
+int bpf_func_3(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 4);
+ return 3;
+}
+
+SEC("classifier/4")
+int bpf_func_4(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 3);
+ return 4;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+ /* Check multi-prog update. */
+ bpf_tail_call(skb, &jmp_table, 2);
+ /* Check tail call limit. */
+ bpf_tail_call(skb, &jmp_table, 3);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
new file mode 100644
index 000000000000..cabda877cf0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ count++;
+ bpf_tail_call(skb, &jmp_table, 0);
+ return 1;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
new file mode 100644
index 000000000000..f82075b47d7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int selector;
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, selector);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
new file mode 100644
index 000000000000..ce5450744fd4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int selector;
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ int idx = 0;
+
+ if (selector == 1234)
+ idx = 1;
+ else if (selector == 5678)
+ idx = 2;
+
+ bpf_tail_call(skb, &jmp_table, idx);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 233bdcb1659e..0cb3204ddb18 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1;
@@ -13,13 +13,12 @@ struct tcp_rtt_storage {
__u32 icsk_retransmits;
};
-struct bpf_map_def SEC("maps") socket_storage_map = {
- .type = BPF_MAP_TYPE_SK_STORAGE,
- .key_size = sizeof(int),
- .value_size = sizeof(struct tcp_rtt_storage),
- .map_flags = BPF_F_NO_PREALLOC,
-};
-BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage);
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tcp_rtt_storage);
+} socket_storage_map SEC(".maps");
SEC("sockops")
int _sockops(struct bpf_sock_ops *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c
index 4cd5e860c903..b7fc85769bdc 100644
--- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_adjust_tail.c
@@ -7,7 +7,7 @@
*/
#include <linux/bpf.h>
#include <linux/if_ether.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 63a8dfef893b..dd8fae6660ab 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -3,50 +3,39 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4);
- __type(key, int);
- __type(value, int);
-} results_map SEC(".maps");
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
SEC("kprobe/sys_nanosleep")
-int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+int handle_kprobe(struct pt_regs *ctx)
{
- const int key = 0, value = 1;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ kprobe_res = 1;
return 0;
}
SEC("kretprobe/sys_nanosleep")
-int handle_sys_getpid_return(struct pt_regs *ctx)
+int handle_kretprobe(struct pt_regs *ctx)
{
- const int key = 1, value = 2;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ kretprobe_res = 2;
return 0;
}
SEC("uprobe/trigger_func")
-int handle_uprobe_entry(struct pt_regs *ctx)
+int handle_uprobe(struct pt_regs *ctx)
{
- const int key = 2, value = 3;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ uprobe_res = 3;
return 0;
}
SEC("uretprobe/trigger_func")
-int handle_uprobe_return(struct pt_regs *ctx)
+int handle_uretprobe(struct pt_regs *ctx)
{
- const int key = 3, value = 4;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ uretprobe_res = 4;
return 0;
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
index e5c79fe0ffdb..88b0566da13d 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
int _version SEC("version") = 1;
@@ -25,7 +26,7 @@ struct dummy_tracepoint_args {
};
__attribute__((noinline))
-static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(struct dummy_tracepoint_args *arg)
{
struct ipv_counts *counts;
int key = 0;
@@ -43,7 +44,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(struct dummy_tracepoint_args *arg)
{
return test_long_fname_2(arg);
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 5ee3622ddebb..a924e53c8e9d 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
int _version SEC("version") = 1;
@@ -33,7 +34,7 @@ struct dummy_tracepoint_args {
};
__attribute__((noinline))
-static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(struct dummy_tracepoint_args *arg)
{
struct ipv_counts *counts;
int key = 0;
@@ -56,7 +57,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(struct dummy_tracepoint_args *arg)
{
return test_long_fname_2(arg);
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 434188c37774..983aedd1c072 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
@@ -23,7 +23,7 @@ struct dummy_tracepoint_args {
};
__attribute__((noinline))
-static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(struct dummy_tracepoint_args *arg)
{
struct ipv_counts *counts;
int key = 0;
@@ -41,7 +41,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(struct dummy_tracepoint_args *arg)
{
return test_long_fname_2(arg);
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c
new file mode 100644
index 000000000000..3ac3603ad53d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_extern.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* non-existing BPF helper, to test dead code elimination */
+static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */
+extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak;
+extern bool CONFIG_BOOL __kconfig __weak;
+extern char CONFIG_CHAR __kconfig __weak;
+extern uint16_t CONFIG_USHORT __kconfig __weak;
+extern int CONFIG_INT __kconfig __weak;
+extern uint64_t CONFIG_ULONG __kconfig __weak;
+extern const char CONFIG_STR[8] __kconfig __weak;
+extern uint64_t CONFIG_MISSING __kconfig __weak;
+
+uint64_t kern_ver = -1;
+uint64_t bpf_syscall = -1;
+uint64_t tristate_val = -1;
+uint64_t bool_val = -1;
+uint64_t char_val = -1;
+uint64_t ushort_val = -1;
+uint64_t int_val = -1;
+uint64_t ulong_val = -1;
+char str_val[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+uint64_t missing_val = -1;
+
+SEC("raw_tp/sys_enter")
+int handle_sys_enter(struct pt_regs *ctx)
+{
+ int i;
+
+ kern_ver = LINUX_KERNEL_VERSION;
+ bpf_syscall = CONFIG_BPF_SYSCALL;
+ tristate_val = CONFIG_TRISTATE;
+ bool_val = CONFIG_BOOL;
+ char_val = CONFIG_CHAR;
+ ushort_val = CONFIG_USHORT;
+ int_val = CONFIG_INT;
+ ulong_val = CONFIG_ULONG;
+
+ for (i = 0; i < sizeof(CONFIG_STR); i++) {
+ str_val[i] = CONFIG_STR[i];
+ }
+
+ if (CONFIG_MISSING)
+ /* invalid, but dead code - never executed */
+ missing_val = bpf_missing_helper(ctx, 123);
+ else
+ missing_val = 0xDEADC0DE;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
new file mode 100644
index 000000000000..51b3f79df523
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_arrays_output {
+ int a2;
+ char b123;
+ int c1c;
+ int d00d;
+ int f01c;
+};
+
+struct core_reloc_arrays_substruct {
+ int c;
+ int d;
+};
+
+struct core_reloc_arrays {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_arrays(void *ctx)
+{
+ struct core_reloc_arrays *in = (void *)&data.in;
+ struct core_reloc_arrays_output *out = (void *)&data.out;
+
+ if (CORE_READ(&out->a2, &in->a[2]))
+ return 1;
+ if (CORE_READ(&out->b123, &in->b[1][2][3]))
+ return 1;
+ if (CORE_READ(&out->c1c, &in->c[1].c))
+ return 1;
+ if (CORE_READ(&out->d00d, &in->d[0][0].d))
+ return 1;
+ if (CORE_READ(&out->f01c, &in->f[0][1].c))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c
new file mode 100644
index 000000000000..56aec20212b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+};
+
+/* bitfield read results, all as plain integers */
+struct core_reloc_bitfields_output {
+ int64_t ub1;
+ int64_t ub2;
+ int64_t ub7;
+ int64_t sb4;
+ int64_t sb20;
+ int64_t u32;
+ int64_t s32;
+};
+
+struct pt_regs;
+
+struct trace_sys_enter {
+ struct pt_regs *regs;
+ long id;
+};
+
+SEC("tp_btf/sys_enter")
+int test_core_bitfields_direct(void *ctx)
+{
+ struct core_reloc_bitfields *in = (void *)&data.in;
+ struct core_reloc_bitfields_output *out = (void *)&data.out;
+
+ out->ub1 = BPF_CORE_READ_BITFIELD(in, ub1);
+ out->ub2 = BPF_CORE_READ_BITFIELD(in, ub2);
+ out->ub7 = BPF_CORE_READ_BITFIELD(in, ub7);
+ out->sb4 = BPF_CORE_READ_BITFIELD(in, sb4);
+ out->sb20 = BPF_CORE_READ_BITFIELD(in, sb20);
+ out->u32 = BPF_CORE_READ_BITFIELD(in, u32);
+ out->s32 = BPF_CORE_READ_BITFIELD(in, s32);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
new file mode 100644
index 000000000000..ab1e647aeb31
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+};
+
+/* bitfield read results, all as plain integers */
+struct core_reloc_bitfields_output {
+ int64_t ub1;
+ int64_t ub2;
+ int64_t ub7;
+ int64_t sb4;
+ int64_t sb20;
+ int64_t u32;
+ int64_t s32;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_bitfields(void *ctx)
+{
+ struct core_reloc_bitfields *in = (void *)&data.in;
+ struct core_reloc_bitfields_output *out = (void *)&data.out;
+ uint64_t res;
+
+ out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1);
+ out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2);
+ out->ub7 = BPF_CORE_READ_BITFIELD_PROBED(in, ub7);
+ out->sb4 = BPF_CORE_READ_BITFIELD_PROBED(in, sb4);
+ out->sb20 = BPF_CORE_READ_BITFIELD_PROBED(in, sb20);
+ out->u32 = BPF_CORE_READ_BITFIELD_PROBED(in, u32);
+ out->s32 = BPF_CORE_READ_BITFIELD_PROBED(in, s32);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
new file mode 100644
index 000000000000..7e45e2bdf6cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_existence_output {
+ int a_exists;
+ int a_value;
+ int b_exists;
+ int b_value;
+ int c_exists;
+ int c_value;
+ int arr_exists;
+ int arr_value;
+ int s_exists;
+ int s_value;
+};
+
+struct core_reloc_existence {
+ struct {
+ int x;
+ } s;
+ int arr[1];
+ int a;
+ struct {
+ int b;
+ };
+ int c;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_existence(void *ctx)
+{
+ struct core_reloc_existence *in = (void *)&data.in;
+ struct core_reloc_existence_output *out = (void *)&data.out;
+
+ out->a_exists = bpf_core_field_exists(in->a);
+ if (bpf_core_field_exists(in->a))
+ out->a_value = BPF_CORE_READ(in, a);
+ else
+ out->a_value = 0xff000001u;
+
+ out->b_exists = bpf_core_field_exists(in->b);
+ if (bpf_core_field_exists(in->b))
+ out->b_value = BPF_CORE_READ(in, b);
+ else
+ out->b_value = 0xff000002u;
+
+ out->c_exists = bpf_core_field_exists(in->c);
+ if (bpf_core_field_exists(in->c))
+ out->c_value = BPF_CORE_READ(in, c);
+ else
+ out->c_value = 0xff000003u;
+
+ out->arr_exists = bpf_core_field_exists(in->arr);
+ if (bpf_core_field_exists(in->arr))
+ out->arr_value = BPF_CORE_READ(in, arr[0]);
+ else
+ out->arr_value = 0xff000004u;
+
+ out->s_exists = bpf_core_field_exists(in->s);
+ if (bpf_core_field_exists(in->s))
+ out->s_value = BPF_CORE_READ(in, s.x);
+ else
+ out->s_value = 0xff000005u;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
new file mode 100644
index 000000000000..525acc2f841b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_flavors {
+ int a;
+ int b;
+ int c;
+};
+
+/* local flavor with reversed layout */
+struct core_reloc_flavors___reversed {
+ int c;
+ int b;
+ int a;
+};
+
+/* local flavor with nested/overlapping layout */
+struct core_reloc_flavors___weird {
+ struct {
+ int b;
+ };
+ /* a and c overlap in local flavor, but this should still work
+ * correctly with target original flavor
+ */
+ union {
+ int a;
+ int c;
+ };
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_flavors(void *ctx)
+{
+ struct core_reloc_flavors *in_orig = (void *)&data.in;
+ struct core_reloc_flavors___reversed *in_rev = (void *)&data.in;
+ struct core_reloc_flavors___weird *in_weird = (void *)&data.in;
+ struct core_reloc_flavors *out = (void *)&data.out;
+
+ /* read a using weird layout */
+ if (CORE_READ(&out->a, &in_weird->a))
+ return 1;
+ /* read b using reversed layout */
+ if (CORE_READ(&out->b, &in_rev->b))
+ return 1;
+ /* read c using original layout */
+ if (CORE_READ(&out->c, &in_orig->c))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
new file mode 100644
index 000000000000..6b5290739806
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_ints {
+ uint8_t u8_field;
+ int8_t s8_field;
+ uint16_t u16_field;
+ int16_t s16_field;
+ uint32_t u32_field;
+ int32_t s32_field;
+ uint64_t u64_field;
+ int64_t s64_field;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_ints(void *ctx)
+{
+ struct core_reloc_ints *in = (void *)&data.in;
+ struct core_reloc_ints *out = (void *)&data.out;
+
+ if (CORE_READ(&out->u8_field, &in->u8_field) ||
+ CORE_READ(&out->s8_field, &in->s8_field) ||
+ CORE_READ(&out->u16_field, &in->u16_field) ||
+ CORE_READ(&out->s16_field, &in->s16_field) ||
+ CORE_READ(&out->u32_field, &in->u32_field) ||
+ CORE_READ(&out->s32_field, &in->s32_field) ||
+ CORE_READ(&out->u64_field, &in->u64_field) ||
+ CORE_READ(&out->s64_field, &in->s64_field))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
new file mode 100644
index 000000000000..aba928fd60d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ uint64_t my_pid_tgid;
+} data = {};
+
+struct core_reloc_kernel_output {
+ int valid[10];
+ /* we have test_progs[-flavor], so cut flavor part */
+ char comm[sizeof("test_progs")];
+ int comm_len;
+};
+
+struct task_struct {
+ int pid;
+ int tgid;
+ char comm[16];
+ struct task_struct *group_leader;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_kernel(void *ctx)
+{
+ struct task_struct *task = (void *)bpf_get_current_task();
+ struct core_reloc_kernel_output *out = (void *)&data.out;
+ uint64_t pid_tgid = bpf_get_current_pid_tgid();
+ uint32_t real_tgid = (uint32_t)pid_tgid;
+ int pid, tgid;
+
+ if (data.my_pid_tgid != pid_tgid)
+ return 0;
+
+ if (CORE_READ(&pid, &task->pid) ||
+ CORE_READ(&tgid, &task->tgid))
+ return 1;
+
+ /* validate pid + tgid matches */
+ out->valid[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid;
+
+ /* test variadic BPF_CORE_READ macros */
+ out->valid[1] = BPF_CORE_READ(task,
+ tgid) == real_tgid;
+ out->valid[2] = BPF_CORE_READ(task,
+ group_leader,
+ tgid) == real_tgid;
+ out->valid[3] = BPF_CORE_READ(task,
+ group_leader, group_leader,
+ tgid) == real_tgid;
+ out->valid[4] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ tgid) == real_tgid;
+ out->valid[5] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ group_leader,
+ tgid) == real_tgid;
+ out->valid[6] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ group_leader, group_leader,
+ tgid) == real_tgid;
+ out->valid[7] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ group_leader, group_leader, group_leader,
+ tgid) == real_tgid;
+ out->valid[8] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ group_leader, group_leader, group_leader,
+ group_leader,
+ tgid) == real_tgid;
+ out->valid[9] = BPF_CORE_READ(task,
+ group_leader, group_leader, group_leader,
+ group_leader, group_leader, group_leader,
+ group_leader, group_leader,
+ tgid) == real_tgid;
+
+ /* test BPF_CORE_READ_STR_INTO() returns correct code and contents */
+ out->comm_len = BPF_CORE_READ_STR_INTO(
+ &out->comm, task,
+ group_leader, group_leader, group_leader, group_leader,
+ group_leader, group_leader, group_leader, group_leader,
+ comm);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
new file mode 100644
index 000000000000..d5756dbdef82
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_misc_output {
+ int a, b, c;
+};
+
+struct core_reloc_misc___a {
+ int a1;
+ int a2;
+};
+
+struct core_reloc_misc___b {
+ int b1;
+ int b2;
+};
+
+/* fixed two first members, can be extended with new fields */
+struct core_reloc_misc_extensible {
+ int a;
+ int b;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_misc(void *ctx)
+{
+ struct core_reloc_misc___a *in_a = (void *)&data.in;
+ struct core_reloc_misc___b *in_b = (void *)&data.in;
+ struct core_reloc_misc_extensible *in_ext = (void *)&data.in;
+ struct core_reloc_misc_output *out = (void *)&data.out;
+
+ /* record two different relocations with the same accessor string */
+ if (CORE_READ(&out->a, &in_a->a1) || /* accessor: 0:0 */
+ CORE_READ(&out->b, &in_b->b1)) /* accessor: 0:0 */
+ return 1;
+
+ /* Validate relocations capture array-only accesses for structs with
+ * fixed header, but with potentially extendable tail. This will read
+ * first 4 bytes of 2nd element of in_ext array of potentially
+ * variably sized struct core_reloc_misc_extensible. */
+ if (CORE_READ(&out->c, &in_ext[2])) /* accessor: 2 */
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
new file mode 100644
index 000000000000..8b533db4a7a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_mods_output {
+ int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+ int x;
+ int y;
+};
+
+typedef struct {
+ int x;
+ int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+ int a;
+ int_t b;
+ char *c;
+ char_ptr_t d;
+ int e[3];
+ arr_t f;
+ struct core_reloc_mods_substruct g;
+ core_reloc_mods_substruct_t h;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_mods(void *ctx)
+{
+ struct core_reloc_mods *in = (void *)&data.in;
+ struct core_reloc_mods_output *out = (void *)&data.out;
+
+ if (CORE_READ(&out->a, &in->a) ||
+ CORE_READ(&out->b, &in->b) ||
+ CORE_READ(&out->c, &in->c) ||
+ CORE_READ(&out->d, &in->d) ||
+ CORE_READ(&out->e, &in->e[2]) ||
+ CORE_READ(&out->f, &in->f[1]) ||
+ CORE_READ(&out->g, &in->g.x) ||
+ CORE_READ(&out->h, &in->h.y))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
new file mode 100644
index 000000000000..2b4b6d49c677
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_nesting_substruct {
+ int a;
+};
+
+union core_reloc_nesting_subunion {
+ int b;
+};
+
+/* int a.a.a and b.b.b accesses */
+struct core_reloc_nesting {
+ union {
+ struct core_reloc_nesting_substruct a;
+ } a;
+ struct {
+ union core_reloc_nesting_subunion b;
+ } b;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_nesting(void *ctx)
+{
+ struct core_reloc_nesting *in = (void *)&data.in;
+ struct core_reloc_nesting *out = (void *)&data.out;
+
+ if (CORE_READ(&out->a.a.a, &in->a.a.a))
+ return 1;
+ if (CORE_READ(&out->b.b.b, &in->b.b.b))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
new file mode 100644
index 000000000000..2a8975678aa6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+enum core_reloc_primitives_enum {
+ A = 0,
+ B = 1,
+};
+
+struct core_reloc_primitives {
+ char a;
+ int b;
+ enum core_reloc_primitives_enum c;
+ void *d;
+ int (*f)(const char *);
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_primitives(void *ctx)
+{
+ struct core_reloc_primitives *in = (void *)&data.in;
+ struct core_reloc_primitives *out = (void *)&data.out;
+
+ if (CORE_READ(&out->a, &in->a) ||
+ CORE_READ(&out->b, &in->b) ||
+ CORE_READ(&out->c, &in->c) ||
+ CORE_READ(&out->d, &in->d) ||
+ CORE_READ(&out->f, &in->f))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
new file mode 100644
index 000000000000..ca61a5183b88
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_ptr_as_arr {
+ int a;
+};
+
+#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_ptr_as_arr(void *ctx)
+{
+ struct core_reloc_ptr_as_arr *in = (void *)&data.in;
+ struct core_reloc_ptr_as_arr *out = (void *)&data.out;
+
+ if (CORE_READ(&out->a, &in[2].a))
+ return 1;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
new file mode 100644
index 000000000000..d7fb6cfc7891
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+} data = {};
+
+struct core_reloc_size_output {
+ int int_sz;
+ int struct_sz;
+ int union_sz;
+ int arr_sz;
+ int arr_elem_sz;
+ int ptr_sz;
+ int enum_sz;
+};
+
+struct core_reloc_size {
+ int int_field;
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE = 123 } enum_field;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_size(void *ctx)
+{
+ struct core_reloc_size *in = (void *)&data.in;
+ struct core_reloc_size_output *out = (void *)&data.out;
+
+ out->int_sz = bpf_core_field_size(in->int_field);
+ out->struct_sz = bpf_core_field_size(in->struct_field);
+ out->union_sz = bpf_core_field_size(in->union_field);
+ out->arr_sz = bpf_core_field_size(in->arr_field);
+ out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
+ out->ptr_sz = bpf_core_field_size(in->ptr_field);
+ out->enum_sz = bpf_core_field_size(in->enum_field);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 33254b771384..29817a703984 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* Permit pretty deep stack traces */
#define MAX_STACK_RAWTP 100
@@ -47,15 +47,14 @@ struct {
* issue and avoid complicated C programming massaging.
* This is an acceptable workaround since there is one entry here.
*/
-typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP];
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
- __type(value, raw_stack_trace_t);
+ __type(value, __u64[2 * MAX_STACK_RAWTP]);
} rawdata_map SEC(".maps");
-SEC("tracepoint/raw_syscalls/sys_enter")
+SEC("raw_tracepoint/sys_enter")
int bpf_prog1(void *ctx)
{
int max_len, max_buildid_len, usize, ksize, total_size;
@@ -100,4 +99,3 @@ int bpf_prog1(void *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 32a6073acb99..dd7a4d3dbc0d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -5,7 +5,7 @@
#include <linux/pkt_cls.h>
#include <string.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
new file mode 100644
index 000000000000..880260f6d536
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#ifndef MAX_STACK
+#define MAX_STACK (512 - 3 * 32 + 8)
+#endif
+
+static __attribute__ ((noinline))
+int f0(int var, struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ return f0(0, skb) + skb->len;
+}
+
+int f3(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + f3(val, skb, 1);
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ return skb->ifindex * val * var;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+ return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
new file mode 100644
index 000000000000..2c18d82923a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#define MAX_STACK (512 - 3 * 32)
+#include "test_global_func1.c"
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
new file mode 100644
index 000000000000..86f0ecb304fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + val;
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ return f2(var, skb) + val;
+}
+
+__attribute__ ((noinline))
+int f4(struct __sk_buff *skb)
+{
+ return f3(1, skb, 2);
+}
+
+__attribute__ ((noinline))
+int f5(struct __sk_buff *skb)
+{
+ return f4(skb);
+}
+
+__attribute__ ((noinline))
+int f6(struct __sk_buff *skb)
+{
+ return f5(skb);
+}
+
+__attribute__ ((noinline))
+int f7(struct __sk_buff *skb)
+{
+ return f6(skb);
+}
+
+#ifndef NO_FN8
+__attribute__ ((noinline))
+int f8(struct __sk_buff *skb)
+{
+ return f7(skb);
+}
+#endif
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+#ifndef NO_FN8
+ return f8(skb);
+#else
+ return f7(skb);
+#endif
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c
new file mode 100644
index 000000000000..610f75edf276
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func4.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#define NO_FN8
+#include "test_global_func3.c"
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
new file mode 100644
index 000000000000..260c25b827ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+int f3(int, struct __sk_buff *skb);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + f3(val, (void *)&val); /* type mismatch */
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb)
+{
+ return skb->ifindex * val;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+ return f1(skb) + f2(2, skb) + f3(3, skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
new file mode 100644
index 000000000000..69e19c64e10b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func6.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+int f3(int, struct __sk_buff *skb);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + f3(val, skb + 1); /* type mismatch */
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb)
+{
+ return skb->ifindex * val;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+ return f1(skb) + f2(2, skb) + f3(3, skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
new file mode 100644
index 000000000000..309b3f6136bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__attribute__ ((noinline))
+void foo(struct __sk_buff *skb)
+{
+ skb->tc_index = 0;
+}
+
+SEC("classifier/test")
+int test_cls(struct __sk_buff *skb)
+{
+ foo(skb);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 1d652ee8e73d..33493911d87a 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -17,9 +17,9 @@
#include <linux/icmpv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "test_iptunnel_common.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 2e4efe70b1e5..28351936a438 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -13,9 +13,9 @@
#include <linux/icmpv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#include "test_iptunnel_common.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c
index 4147130cc3b7..7a6620671a83 100644
--- a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <linux/lirc.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("lirc_mode2")
int bpf_decoder(unsigned int *sample)
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
index c957d6dfe6d7..d6cb986e7533 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
@@ -4,8 +4,8 @@
#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct grehdr {
__be16 flags;
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index a334a0e882e4..48ff2b2ad5e7 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -3,8 +3,8 @@
#include <errno.h>
#include <linux/seg6_local.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
@@ -12,10 +12,6 @@
#define SR6_FLAG_ALERT (1 << 4)
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
- 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
- 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
@@ -276,8 +272,8 @@ int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
return 0;
// check if egress TLV value is correct
- if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
- ntohll(egr_addr.lo) == 0x4)
+ if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 &&
+ bpf_be64_to_cpu(egr_addr.lo) == 0x4)
return 1;
}
@@ -308,8 +304,8 @@ int __encap_srh(struct __sk_buff *skb)
#pragma clang loop unroll(full)
for (unsigned long long lo = 0; lo < 4; lo++) {
- seg->lo = htonll(4 - lo);
- seg->hi = htonll(hi);
+ seg->lo = bpf_cpu_to_be64(4 - lo);
+ seg->hi = bpf_cpu_to_be64(hi);
seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
}
@@ -349,8 +345,8 @@ int __add_egr_x(struct __sk_buff *skb)
if (err)
return BPF_DROP;
- addr.lo = htonll(lo);
- addr.hi = htonll(hi);
+ addr.lo = bpf_cpu_to_be64(lo);
+ addr.hi = bpf_cpu_to_be64(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 113226115365..1cfeb940cf9f 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -3,7 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index bb7ce35f691b..b5c07ae7b68f 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -2,7 +2,7 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define VAR_NUM 16
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
new file mode 100644
index 000000000000..6239596cd14e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 512 * 4); /* at least 4 pages of data */
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, __u32);
+ __type(value, __u64);
+} data_map SEC(".maps");
+
+__u64 in_val = 0;
+__u64 out_val = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_mmap(void *ctx)
+{
+ int zero = 0, one = 1, two = 2, far = 1500;
+ __u64 val, *p;
+
+ out_val = in_val;
+
+ /* data_map[2] = in_val; */
+ bpf_map_update_elem(&data_map, &two, (const void *)&in_val, 0);
+
+ /* data_map[1] = data_map[0] * 2; */
+ p = bpf_map_lookup_elem(&data_map, &zero);
+ if (p) {
+ val = (*p) * 2;
+ bpf_map_update_elem(&data_map, &one, &val, 0);
+ }
+
+ /* data_map[far] = in_val * 3; */
+ val = in_val * 3;
+ bpf_map_update_elem(&data_map, &far, &val, 0);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 3d30c02bdae9..98b9de2fafd0 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -4,7 +4,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* It is a dumb bpf program such that it must have no
* issue to be loaded since testing the verifier is
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
new file mode 100644
index 000000000000..bfe9fbcb9684
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/ptrace.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_trace_helpers.h"
+
+struct task_struct;
+
+SEC("kprobe/__set_task_comm")
+int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return !tsk;
+}
+
+SEC("kretprobe/__set_task_comm")
+int BPF_KRETPROBE(prog2,
+ struct task_struct *tsk, const char *buf, bool exec,
+ int ret)
+{
+ return !PT_REGS_PARM1(ctx) && ret;
+}
+
+SEC("raw_tp/task_rename")
+int prog3(struct bpf_raw_tracepoint_args *ctx)
+{
+ return !ctx->args[0];
+}
+
+SEC("fentry/__set_task_comm")
+int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return !tsk;
+}
+
+SEC("fexit/__set_task_comm")
+int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return !tsk;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index 876c27deb65a..ebfcc9f50c35 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -3,7 +3,8 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -12,7 +13,7 @@ struct {
} perf_buf_map SEC(".maps");
SEC("kprobe/sys_nanosleep")
-int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+int BPF_KPROBE(handle_sys_nanosleep_entry)
{
int cpu = bpf_get_smp_processor_id();
@@ -22,4 +23,3 @@ int handle_sys_nanosleep_entry(struct pt_regs *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c
new file mode 100644
index 000000000000..4ef2630292b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nopinmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(pinning, LIBBPF_PIN_NONE);
+} nopinmap2 SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
new file mode 100644
index 000000000000..5412e0c732c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(pinning, 2); /* invalid */
+} nopinmap3 SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 7cf42d14103f..e72eba4a93d2 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -11,14 +11,76 @@
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define barrier() __asm__ __volatile__("": : :"memory")
int _version SEC("version") = 1;
-SEC("test1")
-int process(struct __sk_buff *skb)
+/* llvm will optimize both subprograms into exactly the same BPF assembly
+ *
+ * Disassembly of section .text:
+ *
+ * 0000000000000000 test_pkt_access_subprog1:
+ * ; return skb->len * 2;
+ * 0: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0)
+ * 1: 64 00 00 00 01 00 00 00 w0 <<= 1
+ * 2: 95 00 00 00 00 00 00 00 exit
+ *
+ * 0000000000000018 test_pkt_access_subprog2:
+ * ; return skb->len * val;
+ * 3: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0)
+ * 4: 64 00 00 00 01 00 00 00 w0 <<= 1
+ * 5: 95 00 00 00 00 00 00 00 exit
+ *
+ * Which makes it an interesting test for BTF-enabled verifier.
+ */
+static __attribute__ ((noinline))
+int test_pkt_access_subprog1(volatile struct __sk_buff *skb)
+{
+ return skb->len * 2;
+}
+
+static __attribute__ ((noinline))
+int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb)
+{
+ return skb->len * val;
+}
+
+#define MAX_STACK (512 - 2 * 32)
+
+__attribute__ ((noinline))
+int get_skb_len(struct __sk_buff *skb)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int get_constant(long val)
+{
+ return val - 122;
+}
+
+int get_skb_ifindex(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int test_pkt_access_subprog3(int val, struct __sk_buff *skb)
+{
+ return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123));
+}
+
+__attribute__ ((noinline))
+int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ return skb->ifindex * val * var;
+}
+
+SEC("classifier/test_pkt_access")
+int test_pkt_access(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
@@ -48,6 +110,12 @@ int process(struct __sk_buff *skb)
tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
}
+ if (test_pkt_access_subprog1(skb) != skb->len * 2)
+ return TC_ACT_SHOT;
+ if (test_pkt_access_subprog2(2, skb) != skb->len * 2)
+ return TC_ACT_SHOT;
+ if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
+ return TC_ACT_SHOT;
if (tcp) {
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
index 3d039e18bf82..610c74ea9f64 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
@@ -5,7 +5,7 @@
#include <string.h>
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
@@ -27,8 +27,8 @@ int _version SEC("version") = 1;
}
#endif
-SEC("test1")
-int process(struct __sk_buff *skb)
+SEC("classifier/test_pkt_md_access")
+int test_pkt_md_access(struct __sk_buff *skb)
{
TEST_FIELD(__u8, len, 0xFF);
TEST_FIELD(__u16, len, 0xFFFF);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
new file mode 100644
index 000000000000..d556b1572cc6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+
+#include <netinet/in.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_trace_helpers.h"
+
+static struct sockaddr_in old;
+
+SEC("kprobe/__sys_connect")
+int BPF_KPROBE(handle_sys_connect)
+{
+ void *ptr = (void *)PT_REGS_PARM2(ctx);
+ struct sockaddr_in new;
+
+ bpf_probe_read_user(&old, sizeof(old), ptr);
+ __builtin_memset(&new, 0xab, sizeof(new));
+ bpf_probe_write_user(ptr, &new, sizeof(new));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
index 0e014d3b2b36..4dd9806ad73b 100644
--- a/tools/testing/selftests/bpf/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
@@ -6,7 +6,7 @@
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
new file mode 100644
index 000000000000..ecbeea2df259
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+static volatile const struct {
+ unsigned a[4];
+ /*
+ * if the struct's size is multiple of 16, compiler will put it into
+ * .rodata.cst16 section, which is not recognized by libbpf; work
+ * around this by ensuring we don't have 16-aligned struct
+ */
+ char _y;
+} rdonly_values = { .a = {2, 3, 4, 5} };
+
+static volatile struct {
+ unsigned did_run;
+ unsigned iters;
+ unsigned sum;
+} res;
+
+SEC("raw_tracepoint/sys_enter:skip_loop")
+int skip_loop(struct pt_regs *ctx)
+{
+ /* prevent compiler to optimize everything out */
+ unsigned * volatile p = (void *)&rdonly_values.a;
+ unsigned iters = 0, sum = 0;
+
+ /* we should never enter this loop */
+ while (*p & 1) {
+ iters++;
+ sum += *p;
+ p++;
+ }
+ res.did_run = 1;
+ res.iters = iters;
+ res.sum = sum;
+ return 0;
+}
+
+SEC("raw_tracepoint/sys_enter:part_loop")
+int part_loop(struct pt_regs *ctx)
+{
+ /* prevent compiler to optimize everything out */
+ unsigned * volatile p = (void *)&rdonly_values.a;
+ unsigned iters = 0, sum = 0;
+
+ /* validate verifier can derive loop termination */
+ while (*p < 5) {
+ iters++;
+ sum += *p;
+ p++;
+ }
+ res.did_run = 1;
+ res.iters = iters;
+ res.sum = sum;
+ return 0;
+}
+
+SEC("raw_tracepoint/sys_enter:full_loop")
+int full_loop(struct pt_regs *ctx)
+{
+ /* prevent compiler to optimize everything out */
+ unsigned * volatile p = (void *)&rdonly_values.a;
+ int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]);
+ unsigned iters = 0, sum = 0;
+
+ /* validate verifier can allow full loop as well */
+ while (i > 0 ) {
+ iters++;
+ sum += *p;
+ p++;
+ i--;
+ }
+ res.did_run = 1;
+ res.iters = iters;
+ res.sum = sum;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index 1dbe1d4d467e..a7278f064368 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -3,8 +3,8 @@
#include <errno.h>
#include <linux/seg6_local.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
@@ -12,10 +12,6 @@
#define SR6_FLAG_ALERT (1 << 4)
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
- 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
- 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
@@ -136,8 +132,10 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
+ // workaround: define induction variable "i" as "long" instead
+ // of "int" to prevent alu32 sub-register spilling.
#pragma clang loop unroll(disable)
- for (int i = 0; i < 100; i++) {
+ for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
if (cur_off == *tlv_off)
@@ -251,8 +249,8 @@ int __add_egr_x(struct __sk_buff *skb)
if (err)
return BPF_DROP;
- addr.lo = htonll(lo);
- addr.hi = htonll(hi);
+ addr.lo = bpf_cpu_to_be64(lo);
+ addr.hi = bpf_cpu_to_be64(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index ea7d84f01235..26e77dcc7e91 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -11,8 +11,8 @@
#include <linux/types.h>
#include <linux/if_ether.h>
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
#include "test_select_reuseport_common.h"
int _version SEC("version") = 1;
@@ -62,7 +62,7 @@ struct {
goto done; \
})
-SEC("select_by_skb_data")
+SEC("sk_reuseport")
int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
{
__u32 linum, index = 0, flags = 0, index_zero = 0;
@@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
data_check.skb_ports[0] = th->source;
data_check.skb_ports[1] = th->dest;
+ if (th->fin)
+ /* The connection is being torn down at the end of a
+ * test. It can't contain a cmd, so return early.
+ */
+ return SK_PASS;
+
if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
GOTO_DONE(DROP_ERR_SKB_DATA);
if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 0e6be01157e6..1acc91e87bfc 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -2,46 +2,39 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, __u32);
- __type(value, __u64);
-} info_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, __u32);
- __type(value, __u64);
-} status_map SEC(".maps");
-
-SEC("send_signal_demo")
-int bpf_send_signal_test(void *ctx)
+#include <bpf/bpf_helpers.h>
+
+__u32 sig = 0, pid = 0, status = 0, signal_thread = 0;
+
+static __always_inline int bpf_send_signal_test(void *ctx)
{
- __u64 *info_val, *status_val;
- __u32 key = 0, pid, sig;
int ret;
- status_val = bpf_map_lookup_elem(&status_map, &key);
- if (!status_val || *status_val != 0)
- return 0;
-
- info_val = bpf_map_lookup_elem(&info_map, &key);
- if (!info_val || *info_val == 0)
+ if (status != 0 || sig == 0 || pid == 0)
return 0;
- sig = *info_val >> 32;
- pid = *info_val & 0xffffFFFF;
-
if ((bpf_get_current_pid_tgid() >> 32) == pid) {
- ret = bpf_send_signal(sig);
+ if (signal_thread)
+ ret = bpf_send_signal_thread(sig);
+ else
+ ret = bpf_send_signal(sig);
if (ret == 0)
- *status_val = 1;
+ status = 1;
}
return 0;
}
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int send_signal_tp(void *ctx)
+{
+ return bpf_send_signal_test(ctx);
+}
+
+SEC("perf_event")
+int send_signal_perf(void *ctx)
+{
+ return bpf_send_signal_test(ctx);
+}
+
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e21cd736c196..d2b38fa6a5b0 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -12,8 +12,8 @@
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
@@ -53,7 +53,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
return result;
}
-SEC("sk_lookup_success")
+SEC("classifier/sk_lookup_success")
int bpf_sk_lookup_test0(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
@@ -78,7 +78,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
}
-SEC("sk_lookup_success_simple")
+SEC("classifier/sk_lookup_success_simple")
int bpf_sk_lookup_test1(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -90,7 +90,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("fail_use_after_free")
+SEC("classifier/fail_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -105,7 +105,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("fail_modify_sk_pointer")
+SEC("classifier/fail_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -120,7 +120,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("fail_modify_sk_or_null_pointer")
+SEC("classifier/fail_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -134,7 +134,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("fail_no_release")
+SEC("classifier/fail_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -143,7 +143,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("fail_release_twice")
+SEC("classifier/fail_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -155,7 +155,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("fail_release_unchecked")
+SEC("classifier/fail_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("fail_no_release_subcall")
+SEC("classifier/fail_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
index 68cf9829f5a7..552f2090665c 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -6,7 +6,7 @@
#include <string.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define NUM_CGROUP_LEVELS 4
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 7a80960d7df1..202de3938494 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
@@ -16,6 +16,13 @@ int process(struct __sk_buff *skb)
skb->cb[i]++;
}
skb->priority++;
+ skb->tstamp++;
+ skb->mark++;
+
+ if (skb->wire_len != 100)
+ return 1;
+ if (skb->gso_segs != 8)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
new file mode 100644
index 000000000000..de03a90f78ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct s {
+ int a;
+ long long b;
+} __attribute__((packed));
+
+int in1 = 0;
+long long in2 = 0;
+char in3 = '\0';
+long long in4 __attribute__((aligned(64))) = 0;
+struct s in5 = {};
+
+long long out2 = 0;
+char out3 = 0;
+long long out4 = 0;
+int out1 = 0;
+
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern int LINUX_KERNEL_VERSION __kconfig;
+bool bpf_syscall = 0;
+int kern_ver = 0;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ static volatile struct s out5;
+
+ out1 = in1;
+ out2 = in2;
+ out3 = in3;
+ out4 = in4;
+ out5 = in5;
+
+ bpf_syscall = CONFIG_BPF_SYSCALL;
+ kern_ver = LINUX_KERNEL_VERSION;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index a47b003623ef..9bcaa37f476a 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -5,8 +5,8 @@
#include <netinet/in.h>
#include <stdbool.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
enum bpf_addr_array_idx {
ADDR_SRV_IDX,
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index a43b999c8da2..0d31a3b3505f 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -2,7 +2,7 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include <linux/version.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct hmap_elem {
volatile int cnt;
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index f5638e26865d..0cf0134631b4 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -2,7 +2,7 @@
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
#define PERF_MAX_STACK_DEPTH 127
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index fa0be3e10a10..00ed48672620 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -2,7 +2,7 @@
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
#define PERF_MAX_STACK_DEPTH 127
@@ -74,4 +74,3 @@ int oncpu(struct sched_switch_args *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 608a06871572..458b0d69133e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -7,7 +7,7 @@
#include <linux/stddef.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -44,7 +44,10 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
- int ret;
+ /* a workaround to prevent compiler from generating
+ * codes verifier cannot handle yet.
+ */
+ volatile int ret;
if (ctx->write)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index cb201cbe11e7..b2e6f9b0894d 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -7,7 +7,7 @@
#include <linux/stddef.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 5cbbff416998..2d0b0b82a78a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -7,7 +7,7 @@
#include <linux/stddef.h>
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index 3af64c470d64..bf28814bfde5 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -2,12 +2,13 @@
#include <stdint.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
+#include <linux/stddef.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
/* the maximum delay we are willing to add (drop packets beyond that) */
#define TIME_HORIZON_NS (2000 * 1000 * 1000)
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 74370e7e286d..37bce7a7c394 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -17,8 +17,8 @@
#include <linux/pkt_cls.h>
#include <linux/types.h>
-#include "bpf_endian.h"
-#include "bpf_helpers.h"
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
static const int cfg_port = 8000;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 1ab095bcacd8..47cbe2eeae43 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -13,16 +13,35 @@
#include <sys/socket.h>
#include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct bpf_map_def SEC("maps") results = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = 1,
+ .value_size = sizeof(__u32),
+ .max_entries = 3,
};
+static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
+ void *iph, __u32 ip_size,
+ struct tcphdr *tcph)
+{
+ __u32 thlen = tcph->doff * 4;
+
+ if (tcph->syn && !tcph->ack) {
+ // packet should only have an MSS option
+ if (thlen != 24)
+ return 0;
+
+ if ((void *)tcph + thlen > data_end)
+ return 0;
+
+ return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen);
+ }
+ return 0;
+}
+
static __always_inline void check_syncookie(void *ctx, void *data,
void *data_end)
{
@@ -33,8 +52,10 @@ static __always_inline void check_syncookie(void *ctx, void *data,
struct ipv6hdr *ipv6h;
struct tcphdr *tcph;
int ret;
+ __u32 key_mss = 2;
+ __u32 key_gen = 1;
__u32 key = 0;
- __u64 value = 1;
+ __s64 seq_mss;
ethh = data;
if (ethh + 1 > data_end)
@@ -66,6 +87,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
if (sk->state != BPF_TCP_LISTEN)
goto release;
+ seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h),
+ tcph);
+
ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
tcph, sizeof(*tcph));
break;
@@ -95,6 +119,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
if (sk->state != BPF_TCP_LISTEN)
goto release;
+ seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h),
+ tcph);
+
ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
tcph, sizeof(*tcph));
break;
@@ -103,8 +130,19 @@ static __always_inline void check_syncookie(void *ctx, void *data,
return;
}
- if (ret == 0)
- bpf_map_update_elem(&results, &key, &value, 0);
+ if (seq_mss > 0) {
+ __u32 cookie = (__u32)seq_mss;
+ __u32 mss = seq_mss >> 32;
+
+ bpf_map_update_elem(&results, &key_gen, &cookie, 0);
+ bpf_map_update_elem(&results, &key_mss, &mss, 0);
+ }
+
+ if (ret == 0) {
+ __u32 cookie = bpf_ntohl(tcph->ack_seq) - 1;
+
+ bpf_map_update_elem(&results, &key, &cookie, 0);
+ }
release:
bpf_sk_release(sk);
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index c8c595da38d4..adc83a54c352 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -36,9 +36,9 @@
#include <linux/ipv6.h>
#include <linux/version.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
#define TCP_ESTATS_MAGIC 0xBAADBEEF
/* This test case needs "sock" and "pt_regs" data structure.
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 2e233613d1fc..1f1966e86e9f 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -10,8 +10,8 @@
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#include "test_tcpbpf.h"
struct {
@@ -131,6 +131,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
g.bytes_received = skops->bytes_received;
g.bytes_acked = skops->bytes_acked;
}
+ g.num_close_events++;
bpf_map_update_elem(&global_map, &key, &g,
BPF_ANY);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index 08346e7765d5..ac63410bb541 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -10,8 +10,8 @@
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/tcp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#include "test_tcpnotify.h"
struct {
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index 04bf084517e0..4b825ee122cf 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -2,7 +2,7 @@
// Copyright (c) 2017 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
new file mode 100644
index 000000000000..e51e6e3a81c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include "bpf_trace_helpers.h"
+
+struct task_struct;
+
+SEC("fentry/__set_task_comm")
+int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return 0;
+}
+
+SEC("fexit/__set_task_comm")
+int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 504df69c83df..f48dbfe24ddc 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -19,8 +19,8 @@
#include <linux/socket.h>
#include <linux/pkt_cls.h>
#include <linux/erspan.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#define ERROR(ret) do {\
char fmt[] = "ERROR line:%d ret:%d\n";\
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
index f3236ce35f31..d38153dab3dd 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index 9897150ed516..f024154c7be7 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define ATTR __always_inline
#include "test_jhash.h"
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
index 1848da04ea41..9beb5bf80373 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 0941c655b07b..31f9bce37491 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -16,8 +16,8 @@
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
new file mode 100644
index 000000000000..cb8a04ab7a78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_trace_helpers.h"
+
+struct net_device {
+ /* Structure does not need to contain all entries,
+ * as "preserve_access_index" will use BTF to fix this...
+ */
+ int ifindex;
+} __attribute__((preserve_access_index));
+
+struct xdp_rxq_info {
+ /* Structure does not need to contain all entries,
+ * as "preserve_access_index" will use BTF to fix this...
+ */
+ struct net_device *dev;
+ __u32 queue_index;
+} __attribute__((preserve_access_index));
+
+struct xdp_buff {
+ void *data;
+ void *data_end;
+ void *data_meta;
+ void *data_hard_start;
+ unsigned long handle;
+ struct xdp_rxq_info *rxq;
+} __attribute__((preserve_access_index));
+
+__u64 test_result_fentry = 0;
+SEC("fentry/_xdp_tx_iptunnel")
+int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
+{
+ test_result_fentry = xdp->rxq->dev->ifindex;
+ return 0;
+}
+
+__u64 test_result_fexit = 0;
+SEC("fexit/_xdp_tx_iptunnel")
+int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
+{
+ test_result_fexit = ret;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index 97175f73c3fe..fcabcda30ba3 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -12,8 +12,8 @@
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index 8d0182650653..a7c4a7d49fe6 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -2,7 +2,7 @@
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index e88d7b9d65ab..8beecec166d9 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -13,8 +13,8 @@
#include <linux/icmpv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
static __u32 rol32(__u32 word, unsigned int shift)
{
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-static __attribute__ ((noinline))
+__attribute__ ((noinline))
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-static __attribute__ ((noinline))
+__attribute__ ((noinline))
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
index ef9e704be140..a5337cd9400b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
@@ -10,7 +10,7 @@
* General Public License for more details.
*/
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index 365a7d2d9f5c..134768f6b788 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -22,8 +22,8 @@
#include <linux/in.h>
#include <linux/pkt_cls.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
*
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index 43b0ef1001ed..ea25e8881992 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -2,7 +2,7 @@
#define KBUILD_MODNAME "xdp_dummy"
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("xdp_dummy")
int xdp_dummy_prog(struct xdp_md *ctx)
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index 1c5f298d7196..d037262c8937 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 57912e7c94b0..94e6c2b281cb 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
-#include "bpf_helpers.h"
+#include <bpf/bpf_helpers.h>
SEC("tx")
int xdp_tx(struct xdp_md *xdp)
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 112a2857f4e2..6b9ca40bd1f4 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -12,8 +12,8 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
#include "xdping.h"
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
new file mode 100755
index 000000000000..ac349a5cea7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+case $1 in
+ -h|--help)
+ echo -e "$0 [-j <n>]"
+ echo -e "\tTest the different ways of building bpftool."
+ echo -e ""
+ echo -e "\tOptions:"
+ echo -e "\t\t-j <n>:\tPass -j flag to 'make'."
+ exit 0
+ ;;
+esac
+
+J=$*
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+if [ ! -e tools/bpf/bpftool/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 0
+fi
+
+ERROR=0
+TMPDIR=
+
+# If one build fails, continue but return non-0 on exit.
+return_value() {
+ if [ -d "$TMPDIR" ] ; then
+ rm -rf -- $TMPDIR
+ fi
+ exit $ERROR
+}
+trap return_value EXIT
+
+check() {
+ local dir=$(realpath $1)
+
+ echo -n "binary: "
+ # Returns non-null if file is found (and "false" is run)
+ find $dir -type f -executable -name bpftool -print -exec false {} + && \
+ ERROR=1 && printf "FAILURE: Did not find bpftool\n"
+}
+
+make_and_clean() {
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $* >/dev/null"
+ make $J -s $* >/dev/null
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ fi
+ if [ $# -ge 1 ] ; then
+ check ${@: -1}
+ else
+ check .
+ fi
+ (
+ if [ $# -ge 1 ] ; then
+ cd ${@: -1}
+ fi
+ make -s clean
+ )
+ echo
+}
+
+make_with_tmpdir() {
+ local ARGS
+
+ TMPDIR=$(mktemp -d)
+ if [ $# -ge 2 ] ; then
+ ARGS=${@:1:(($# - 1))}
+ fi
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null"
+ make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ fi
+ check $TMPDIR
+ rm -rf -- $TMPDIR
+ echo
+}
+
+echo "Trying to build bpftool"
+echo -e "... through kbuild\n"
+
+if [ -f ".config" ] ; then
+ make_and_clean tools/bpf
+
+ ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
+ ## down from toplevel Makefile to bpftool's Makefile.
+
+ # make_with_tmpdir tools/bpf OUTPUT
+ echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n"
+
+ make_with_tmpdir tools/bpf O
+else
+ echo -e "skip: make tools/bpf (no .config found)\n"
+ echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n"
+ echo -e "skip: make tools/bpf O=<dir> (no .config found)\n"
+fi
+
+echo -e "... from kernel source tree\n"
+
+make_and_clean -C tools/bpf/bpftool
+
+make_with_tmpdir -C tools/bpf/bpftool OUTPUT
+
+make_with_tmpdir -C tools/bpf/bpftool O
+
+echo -e "... from tools/\n"
+cd tools/
+
+make_and_clean bpf
+
+## In tools/bpf/Makefile, function "descend" is called and passes $(O) and
+## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before
+## calling bpftool's Makefile, but this is not the case as the "descend"
+## function focuses on $(O)/$(subdir). However, in the present case, updating
+## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not
+## work, because $(O) is not defined from command line and $(OUTPUT) is not
+## updated in tools/scripts/Makefile.include.
+##
+## Workarounds would require to a) edit "descend" or use an alternative way to
+## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and
+## other variables in tools/scripts/Makefile.include (at the risk of breaking
+## the build of other tools), or c) append manually the "bpf/bpftool" suffix to
+## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories
+## use "descend" in the future.
+
+# make_with_tmpdir bpf OUTPUT
+echo -e "skip: make bpf OUTPUT=<dir> (not supported)\n"
+
+make_with_tmpdir bpf O
+
+echo -e "... from bpftool's dir\n"
+cd bpf/bpftool
+
+make_and_clean
+
+make_with_tmpdir OUTPUT
+
+make_with_tmpdir O
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 3d617e806054..93040ca83e60 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4148,10 +4148,6 @@ static int do_test_file(unsigned int test_num)
if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
return PTR_ERR(obj);
- err = bpf_object__btf_fd(obj);
- if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
- goto done;
-
prog = bpf_program__next(NULL, obj);
if (CHECK(!prog, "Cannot find bpf_prog")) {
err = -1;
diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c
deleted file mode 100644
index 8f850823d35f..000000000000
--- a/tools/testing/selftests/bpf/test_btf_dump.c
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <linux/err.h>
-#include <btf.h>
-
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
-
-void btf_dump_printf(void *ctx, const char *fmt, va_list args)
-{
- vfprintf(ctx, fmt, args);
-}
-
-struct btf_dump_test_case {
- const char *name;
- struct btf_dump_opts opts;
-} btf_dump_test_cases[] = {
- {.name = "btf_dump_test_case_syntax", .opts = {}},
- {.name = "btf_dump_test_case_ordering", .opts = {}},
- {.name = "btf_dump_test_case_padding", .opts = {}},
- {.name = "btf_dump_test_case_packing", .opts = {}},
- {.name = "btf_dump_test_case_bitfields", .opts = {}},
- {.name = "btf_dump_test_case_multidim", .opts = {}},
- {.name = "btf_dump_test_case_namespacing", .opts = {}},
-};
-
-static int btf_dump_all_types(const struct btf *btf,
- const struct btf_dump_opts *opts)
-{
- size_t type_cnt = btf__get_nr_types(btf);
- struct btf_dump *d;
- int err = 0, id;
-
- d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
-
- for (id = 1; id <= type_cnt; id++) {
- err = btf_dump__dump_type(d, id);
- if (err)
- goto done;
- }
-
-done:
- btf_dump__free(d);
- return err;
-}
-
-int test_btf_dump_case(int n, struct btf_dump_test_case *test_case)
-{
- char test_file[256], out_file[256], diff_cmd[1024];
- struct btf *btf = NULL;
- int err = 0, fd = -1;
- FILE *f = NULL;
-
- fprintf(stderr, "Test case #%d (%s): ", n, test_case->name);
-
- snprintf(test_file, sizeof(test_file), "%s.o", test_case->name);
-
- btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf),
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
- err = -PTR_ERR(btf);
- btf = NULL;
- goto done;
- }
-
- snprintf(out_file, sizeof(out_file),
- "/tmp/%s.output.XXXXXX", test_case->name);
- fd = mkstemp(out_file);
- if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) {
- err = fd;
- goto done;
- }
- f = fdopen(fd, "w");
- if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n",
- strerror(errno), errno)) {
- close(fd);
- goto done;
- }
-
- test_case->opts.ctx = f;
- err = btf_dump_all_types(btf, &test_case->opts);
- fclose(f);
- close(fd);
- if (CHECK(err, "failure during C dumping: %d\n", err)) {
- goto done;
- }
-
- snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name);
- /*
- * Diff test output and expected test output, contained between
- * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
- * For expected output lines, everything before '*' is stripped out.
- * Also lines containing comment start and comment end markers are
- * ignored.
- */
- snprintf(diff_cmd, sizeof(diff_cmd),
- "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
- "/END-EXPECTED-OUTPUT/{out=0} "
- "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
- "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
- test_file, out_file);
- err = system(diff_cmd);
- if (CHECK(err,
- "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
- out_file, err, diff_cmd))
- goto done;
-
- remove(out_file);
- fprintf(stderr, "OK\n");
-
-done:
- btf__free(btf);
- return err;
-}
-
-int main() {
- int test_case_cnt, i, err, failed = 0;
-
- test_case_cnt = sizeof(btf_dump_test_cases) /
- sizeof(btf_dump_test_cases[0]);
-
- for (i = 0; i < test_case_cnt; i++) {
- err = test_btf_dump_case(i, &btf_dump_test_cases[i]);
- if (err)
- failed++;
- }
-
- fprintf(stderr, "%d tests succeeded, %d tests failed.\n",
- test_case_cnt - failed, failed);
-
- return failed;
-}
diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
deleted file mode 100644
index 7671909ee1cb..000000000000
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ /dev/null
@@ -1,571 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- * The eBPF program accesses the map passed in to store two pieces of
- * information. The number of invocations of the program, which maps
- * to the number of packets received, is stored to key 0. Key 1 is
- * incremented on each iteration by the number of bytes stored in
- * the skb. The program also stores the number of received bytes
- * in the cgroup storage.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- * packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <linux/filter.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_util.h"
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define FOO "/foo"
-#define BAR "/foo/bar/"
-#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-#ifdef DEBUG
-#define debug(args...) printf(args)
-#else
-#define debug(args...)
-#endif
-
-static int prog_load(int verdict)
-{
- int ret;
- struct bpf_insn prog[] = {
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-
- if (ret < 0) {
- log_err("Loading program");
- printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
- return 0;
- }
- return ret;
-}
-
-static int test_foo_bar(void)
-{
- int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
-
- allow_prog = prog_load(1);
- if (!allow_prog)
- goto err;
-
- drop_prog = prog_load(0);
- if (!drop_prog)
- goto err;
-
- if (setup_cgroup_environment())
- goto err;
-
- /* Create cgroup /foo, get fd, and join it */
- foo = create_and_get_cgroup(FOO);
- if (foo < 0)
- goto err;
-
- if (join_cgroup(FOO))
- goto err;
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo");
- goto err;
- }
-
- debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
- assert(system(PING_CMD) != 0);
-
- /* Create cgroup /foo/bar, get fd, and join it */
- bar = create_and_get_cgroup(BAR);
- if (bar < 0)
- goto err;
-
- if (join_cgroup(BAR))
- goto err;
-
- debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
- "This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo");
- goto err;
- }
-
- debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
- "This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- errno = 0;
- log_err("Unexpected success in double detach from /foo");
- goto err;
- }
-
- if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching non-overridable prog to /foo");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching non-overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo");
- goto err;
- }
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching different non-overridable prog to /foo");
- goto err;
- }
-
- goto out;
-
-err:
- rc = 1;
-
-out:
- close(foo);
- close(bar);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#override:PASS\n");
- else
- printf("#override:FAIL\n");
- return rc;
-}
-
-static int map_fd = -1;
-
-static int prog_load_cnt(int verdict, int val)
-{
- int cgroup_storage_fd, percpu_cgroup_storage_fd;
-
- if (map_fd < 0)
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (map_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
- if (cgroup_storage_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- percpu_cgroup_storage_fd = bpf_create_map(
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
- if (percpu_cgroup_storage_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- struct bpf_insn prog[] = {
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-
- BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_MOV64_IMM(BPF_REG_1, val),
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
- int ret;
-
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-
- if (ret < 0) {
- log_err("Loading program");
- printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
- return 0;
- }
- close(cgroup_storage_fd);
- return ret;
-}
-
-
-static int test_multiprog(void)
-{
- __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
- int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
- int drop_prog, allow_prog[6] = {}, rc = 0;
- unsigned long long value;
- int i = 0;
-
- for (i = 0; i < 6; i++) {
- allow_prog[i] = prog_load_cnt(1, 1 << i);
- if (!allow_prog[i])
- goto err;
- }
- drop_prog = prog_load_cnt(0, 1);
- if (!drop_prog)
- goto err;
-
- if (setup_cgroup_environment())
- goto err;
-
- cg1 = create_and_get_cgroup("/cg1");
- if (cg1 < 0)
- goto err;
- cg2 = create_and_get_cgroup("/cg1/cg2");
- if (cg2 < 0)
- goto err;
- cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
- if (cg3 < 0)
- goto err;
- cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
- if (cg4 < 0)
- goto err;
- cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
- if (cg5 < 0)
- goto err;
-
- if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
- goto err;
-
- if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog to cg1");
- goto err;
- }
- if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Unexpected success attaching the same prog to cg1");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog2 to cg1");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to cg2");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog to cg3");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to cg4");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching prog to cg5");
- goto err;
- }
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 8 + 32);
-
- /* query the number of effective progs in cg5 */
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- NULL, NULL, &prog_cnt) == 0);
- assert(prog_cnt == 4);
- /* retrieve prog_ids of effective progs in cg5 */
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 4);
- assert(attach_flags == 0);
- saved_prog_id = prog_ids[0];
- /* check enospc handling */
- prog_ids[0] = 0;
- prog_cnt = 2;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == -1 &&
- errno == ENOSPC);
- assert(prog_cnt == 4);
- /* check that prog_ids are returned even when buffer is too small */
- assert(prog_ids[0] == saved_prog_id);
- /* retrieve prog_id of single attached prog in cg5 */
- prog_ids[0] = 0;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
- NULL, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 1);
- assert(prog_ids[0] == saved_prog_id);
-
- /* detach bottom program and ping again */
- if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching prog from cg5");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 8 + 16);
-
- /* detach 3rd from bottom program and ping again */
- errno = 0;
- if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
- log_err("Unexpected success on detach from cg3");
- goto err;
- }
- if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching from cg3");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 16);
-
- /* detach 2nd from bottom program and ping again */
- if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching prog from cg4");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 4);
-
- prog_cnt = 4;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 3);
- assert(attach_flags == 0);
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
- NULL, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 0);
- goto out;
-err:
- rc = 1;
-
-out:
- for (i = 0; i < 6; i++)
- if (allow_prog[i] > 0)
- close(allow_prog[i]);
- close(cg1);
- close(cg2);
- close(cg3);
- close(cg4);
- close(cg5);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#multi:PASS\n");
- else
- printf("#multi:FAIL\n");
- return rc;
-}
-
-static int test_autodetach(void)
-{
- __u32 prog_cnt = 4, attach_flags;
- int allow_prog[2] = {0};
- __u32 prog_ids[2] = {0};
- int cg = 0, i, rc = -1;
- void *ptr = NULL;
- int attempts;
-
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- allow_prog[i] = prog_load_cnt(1, 1 << i);
- if (!allow_prog[i])
- goto err;
- }
-
- if (setup_cgroup_environment())
- goto err;
-
- /* create a cgroup, attach two programs and remember their ids */
- cg = create_and_get_cgroup("/cg_autodetach");
- if (cg < 0)
- goto err;
-
- if (join_cgroup("/cg_autodetach"))
- goto err;
-
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog[%d] to cg:egress", i);
- goto err;
- }
- }
-
- /* make sure that programs are attached and run some traffic */
- assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
- prog_ids, &prog_cnt) == 0);
- assert(system(PING_CMD) == 0);
-
- /* allocate some memory (4Mb) to pin the original cgroup */
- ptr = malloc(4 * (1 << 20));
- if (!ptr)
- goto err;
-
- /* close programs and cgroup fd */
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- close(allow_prog[i]);
- allow_prog[i] = 0;
- }
-
- close(cg);
- cg = 0;
-
- /* leave the cgroup and remove it. don't detach programs */
- cleanup_cgroup_environment();
-
- /* wait for the asynchronous auto-detachment.
- * wait for no more than 5 sec and give up.
- */
- for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
- for (attempts = 5; attempts >= 0; attempts--) {
- int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
-
- if (fd < 0)
- break;
-
- /* don't leave the fd open */
- close(fd);
-
- if (!attempts)
- goto err;
-
- sleep(1);
- }
- }
-
- rc = 0;
-err:
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
- if (allow_prog[i] > 0)
- close(allow_prog[i]);
- if (cg)
- close(cg);
- free(ptr);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#autodetach:PASS\n");
- else
- printf("#autodetach:FAIL\n");
- return rc;
-}
-
-int main(void)
-{
- int (*tests[])(void) = {
- test_foo_bar,
- test_multiprog,
- test_autodetach,
- };
- int errors = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); i++)
- if (tests[i]())
- errors++;
-
- if (errors)
- printf("test_cgroup_attach:FAIL\n");
- else
- printf("test_cgroup_attach:PASS\n");
-
- return errors ? EXIT_FAILURE : EXIT_SUCCESS;
-}
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 2fc4625c1a15..655729004391 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -20,9 +20,9 @@ int main(int argc, char **argv)
BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
@@ -30,7 +30,7 @@ int main(int argc, char **argv)
BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, 1),
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
new file mode 100644
index 000000000000..a8d2e9a87fbf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#include <iostream>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include "test_core_extern.skel.h"
+
+/* do nothing, just make sure we can link successfully */
+
+int main(int argc, char *argv[])
+{
+ struct test_core_extern *skel;
+
+ /* libbpf.h */
+ libbpf_set_print(NULL);
+
+ /* bpf.h */
+ bpf_prog_get_fd_by_id(0);
+
+ /* btf.h */
+ btf__new(NULL, 0);
+
+ /* BPF skeleton */
+ skel = test_core_extern__open_and_load();
+ test_core_extern__destroy(skel);
+
+ std::cout << "DONE!" << std::endl;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index d23d4da66b83..174b72a64a4c 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -18,19 +18,55 @@ fi
# this is the case and run it with in_netns.sh if it is being run in the root
# namespace.
if [[ -z $(ip netns identify $$) ]]; then
+ err=0
+ if bpftool="$(which bpftool)"; then
+ echo "Testing global flow dissector..."
+
+ $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
+ type flow_dissector
+
+ if ! unshare --net $bpftool prog attach pinned \
+ /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ echo "Unexpected unsuccessful attach in namespace" >&2
+ err=1
+ fi
+
+ $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
+ flow_dissector
+
+ if unshare --net $bpftool prog attach pinned \
+ /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ echo "Unexpected successful attach in namespace" >&2
+ err=1
+ fi
+
+ if ! $bpftool prog detach pinned \
+ /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ echo "Failed to detach flow dissector" >&2
+ err=1
+ fi
+
+ rm -rf /sys/fs/bpf/flow
+ else
+ echo "Skipping root flow dissector test, bpftool not found" >&2
+ fi
+
+ # Run the rest of the tests in a net namespace.
../net/in_netns.sh "$0" "$@"
- exit $?
-fi
+ err=$(( $err + $? ))
-# Determine selftest success via shell exit code
-exit_handler()
-{
- if (( $? == 0 )); then
+ if (( $err == 0 )); then
echo "selftests: $TESTNAME [PASS]";
else
echo "selftests: $TESTNAME [FAILED]";
fi
+ exit $err
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
set +e
# Cleanup
@@ -63,6 +99,9 @@ fi
# Setup
tc qdisc add dev lo ingress
+echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
echo "Testing IPv4..."
# Drops all IP/UDP packets coming from port 9
@@ -100,6 +139,20 @@ echo "Testing IPv4 + GRE..."
tc filter del dev lo ingress pref 1337
+echo "Testing port range..."
+# Drops all IP/UDP packets coming from port 8-10
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+ udp src_port 8-10 action drop
+
+# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
+./test_flow_dissector -i 4 -f 7
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
+./test_flow_dissector -i 4 -f 11
+
+tc filter del dev lo ingress pref 1337
+
echo "Testing IPv6..."
# Drops all IPv6/UDP packets coming from port 9
tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh
new file mode 100755
index 000000000000..20de7bb873bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_ftrace.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+TR=/sys/kernel/debug/tracing/
+clear_trace() { # reset trace output
+ echo > $TR/trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > $TR/tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > $TR/tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > $TR/current_tracer
+}
+
+disable_tracing
+clear_trace
+
+echo "" > $TR/set_ftrace_filter
+echo '*printk* *console* *wake* *serial* *lock*' > $TR/set_ftrace_notrace
+
+echo "bpf_prog_test*" > $TR/set_graph_function
+echo "" > $TR/set_graph_notrace
+
+echo function_graph > $TR/current_tracer
+
+enable_tracing
+./test_progs -t fentry
+./test_progs -t fexit
+disable_tracing
+clear_trace
+
+reset_tracer
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c
index b64094c981e3..c490e012c23f 100644
--- a/tools/testing/selftests/bpf/test_hashmap.c
+++ b/tools/testing/selftests/bpf/test_hashmap.c
@@ -8,7 +8,7 @@
#include <stdio.h>
#include <errno.h>
#include <linux/err.h>
-#include "hashmap.h"
+#include "bpf/hashmap.h"
#define CHECK(condition, format...) ({ \
int __ret = !!(condition); \
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
deleted file mode 100755
index 2989b2e2d856..000000000000
--- a/tools/testing/selftests/bpf/test_libbpf.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-export TESTNAME=test_libbpf
-
-# Determine selftest success via shell exit code
-exit_handler()
-{
- if [ $? -eq 0 ]; then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
- echo "selftests: $TESTNAME [FAILED]";
- fi
-}
-
-libbpf_open_file()
-{
- LAST_LOADED=$1
- if [ -n "$VERBOSE" ]; then
- ./test_libbpf_open $1
- else
- ./test_libbpf_open --quiet $1
- fi
-}
-
-# Exit script immediately (well catched by trap handler) if any
-# program/thing exits with a non-zero status.
-set -e
-
-# (Use 'trap -l' to list meaning of numbers)
-trap exit_handler 0 2 3 6 9
-
-libbpf_open_file test_l4lb.o
-
-# Load a program with BPF-to-BPF calls
-libbpf_open_file test_l4lb_noinline.o
-
-# Load a program compiled without the "-target bpf" flag
-libbpf_open_file test_xdp.o
-
-# Success
-exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
deleted file mode 100644
index 9e9db202d218..000000000000
--- a/tools/testing/selftests/bpf/test_libbpf_open.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
- */
-static const char *__doc__ =
- "Libbpf test program for loading BPF ELF object files";
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include <bpf/libbpf.h>
-#include <getopt.h>
-
-#include "bpf_rlimit.h"
-
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"debug", no_argument, NULL, 'D' },
- {"quiet", no_argument, NULL, 'q' },
- {0, 0, NULL, 0 }
-};
-
-static void usage(char *argv[])
-{
- int i;
-
- printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
- printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- printf(" short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n");
-}
-
-static bool debug = 0;
-static int libbpf_debug_print(enum libbpf_print_level level,
- const char *fmt, va_list args)
-{
- if (level == LIBBPF_DEBUG && !debug)
- return 0;
-
- fprintf(stderr, "[%d] ", level);
- return vfprintf(stderr, fmt, args);
-}
-
-#define EXIT_FAIL_LIBBPF EXIT_FAILURE
-#define EXIT_FAIL_OPTION 2
-
-int test_walk_progs(struct bpf_object *obj, bool verbose)
-{
- struct bpf_program *prog;
- int cnt = 0;
-
- bpf_object__for_each_program(prog, obj) {
- cnt++;
- if (verbose)
- printf("Prog (count:%d) section_name: %s\n", cnt,
- bpf_program__title(prog, false));
- }
- return 0;
-}
-
-int test_walk_maps(struct bpf_object *obj, bool verbose)
-{
- struct bpf_map *map;
- int cnt = 0;
-
- bpf_object__for_each_map(map, obj) {
- cnt++;
- if (verbose)
- printf("Map (count:%d) name: %s\n", cnt,
- bpf_map__name(map));
- }
- return 0;
-}
-
-int test_open_file(char *filename, bool verbose)
-{
- struct bpf_object *bpfobj = NULL;
- long err;
-
- if (verbose)
- printf("Open BPF ELF-file with libbpf: %s\n", filename);
-
- /* Load BPF ELF object file and check for errors */
- bpfobj = bpf_object__open(filename);
- err = libbpf_get_error(bpfobj);
- if (err) {
- char err_buf[128];
- libbpf_strerror(err, err_buf, sizeof(err_buf));
- if (verbose)
- printf("Unable to load eBPF objects in file '%s': %s\n",
- filename, err_buf);
- return EXIT_FAIL_LIBBPF;
- }
- test_walk_progs(bpfobj, verbose);
- test_walk_maps(bpfobj, verbose);
-
- if (verbose)
- printf("Close BPF ELF-file with libbpf: %s\n",
- bpf_object__name(bpfobj));
- bpf_object__close(bpfobj);
-
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- char filename[1024] = { 0 };
- bool verbose = 1;
- int longindex = 0;
- int opt;
-
- libbpf_set_print(libbpf_debug_print);
-
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hDq",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 'D':
- debug = 1;
- break;
- case 'q': /* Use in scripting mode */
- verbose = 0;
- break;
- case 'h':
- default:
- usage(argv);
- return EXIT_FAIL_OPTION;
- }
- }
- if (optind >= argc) {
- usage(argv);
- printf("ERROR: Expected BPF_FILE argument after options\n");
- return EXIT_FAIL_OPTION;
- }
- snprintf(filename, sizeof(filename), "%s", argv[optind]);
-
- return test_open_file(filename, verbose);
-}
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index acf7a74f97cd..59ea56945e6c 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -314,15 +314,15 @@ test_gso()
command -v nc >/dev/null 2>&1 || \
{ echo >&2 "nc is not available: skipping TSO tests"; return; }
- # listen on IPv*_DST, capture TCP into $TMPFILE
+ # listen on port 9000, capture TCP into $TMPFILE
if [ "${PROTO}" == "IPv4" ] ; then
IP_DST=${IPv4_DST}
ip netns exec ${NS3} bash -c \
- "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &"
+ "nc -4 -l -p 9000 > ${TMPFILE} &"
elif [ "${PROTO}" == "IPv6" ] ; then
IP_DST=${IPv6_DST}
ip netns exec ${NS3} bash -c \
- "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &"
+ "nc -6 -l -p 9000 > ${TMPFILE} &"
RET=$?
else
echo " test_gso: unknown PROTO: ${PROTO}"
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 5443b9bd75ed..02eae1e864c2 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data)
close(fd);
}
+static void test_devmap_hash(unsigned int task, void *data)
+{
+ int fd;
+ __u32 key, value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
+ 2, 0);
+ if (fd < 0) {
+ printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+}
+
static void test_queuemap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
@@ -1127,7 +1142,6 @@ out_sockmap:
#define MAPINMAP_PROG "./test_map_in_map.o"
static void test_map_in_map(void)
{
- struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
@@ -1164,9 +1178,6 @@ static void test_map_in_map(void)
goto out_map_in_map;
}
- bpf_object__for_each_program(prog, obj) {
- bpf_program__set_xdp(prog);
- }
bpf_object__load(obj);
map = bpf_object__find_map_by_name(obj, "mim_array");
@@ -1684,6 +1695,7 @@ static void run_all_tests(void)
test_arraymap_percpu_many_keys();
test_devmap(0, NULL);
+ test_devmap_hash(0, NULL);
test_sockmap(0, NULL);
test_map_large();
@@ -1701,9 +1713,9 @@ static void run_all_tests(void)
test_map_in_map();
}
-#define DECLARE
+#define DEFINE_TEST(name) extern void test_##name(void);
#include <map_tests/tests.h>
-#undef DECLARE
+#undef DEFINE_TEST
int main(void)
{
@@ -1715,9 +1727,9 @@ int main(void)
map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
-#define CALL
+#define DEFINE_TEST(name) test_##name();
#include <map_tests/tests.h>
-#undef CALL
+#undef DEFINE_TEST
printf("test_maps: OK, %d SKIPPED\n", skips);
return 0;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 425f9ed27c3b..8294ae3ffb3c 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -22,6 +22,7 @@ import os
import pprint
import random
import re
+import stat
import string
import struct
import subprocess
@@ -311,7 +312,11 @@ class DebugfsDir:
for f in out.split():
if f == "ports":
continue
+
p = os.path.join(path, f)
+ if not os.stat(p).st_mode & stat.S_IRUSR:
+ continue
+
if os.path.isfile(p):
_, out = cmd('cat %s/%s' % (path, f))
dfs[f] = out.strip()
@@ -330,13 +335,22 @@ class NetdevSimDev:
"""
Class for netdevsim bus device and its attributes.
"""
+ @staticmethod
+ def ctrl_write(path, val):
+ fullpath = os.path.join("/sys/bus/netdevsim/", path)
+ try:
+ with open(fullpath, "w") as f:
+ f.write(val)
+ except OSError as e:
+ log("WRITE %s: %r" % (fullpath, val), -e.errno)
+ raise e
+ log("WRITE %s: %r" % (fullpath, val), 0)
def __init__(self, port_count=1):
addr = 0
while True:
try:
- with open("/sys/bus/netdevsim/new_device", "w") as f:
- f.write("%u %u" % (addr, port_count))
+ self.ctrl_write("new_device", "%u %u" % (addr, port_count))
except OSError as e:
if e.errno == errno.ENOSPC:
addr += 1
@@ -398,14 +412,13 @@ class NetdevSimDev:
return progs
def remove(self):
- with open("/sys/bus/netdevsim/del_device", "w") as f:
- f.write("%u" % self.addr)
+ self.ctrl_write("del_device", "%u" % (self.addr, ))
devs.remove(self)
def remove_nsim(self, nsim):
self.nsims.remove(nsim)
- with open("/sys/bus/netdevsim/devices/netdevsim%u/del_port" % self.addr ,"w") as f:
- f.write("%u" % nsim.port_index)
+ self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
+ "%u" % (nsim.port_index, ))
class NetdevSim:
"""
@@ -1353,7 +1366,7 @@ try:
bpftool_prog_list_wait(expected=1)
ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
- fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+ fail(ifnameB != simB1['ifname'], "program not bound to original device")
simB1.remove()
bpftool_prog_list_wait(expected=1)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index dae0819b1141..bab1e6f1d8f1 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -2,11 +2,176 @@
/* Copyright (c) 2017 Facebook
*/
#include "test_progs.h"
+#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
+#include <argp.h>
+#include <string.h>
-int error_cnt, pass_cnt;
-bool jit_enabled;
-bool verifier_stats = false;
+/* defined in test_progs.h */
+struct test_env env = {};
+
+struct prog_test_def {
+ const char *test_name;
+ int test_num;
+ void (*run_test)(void);
+ bool force_log;
+ int error_cnt;
+ int skip_cnt;
+ bool tested;
+ bool need_cgroup_cleanup;
+
+ char *subtest_name;
+ int subtest_num;
+
+ /* store counts before subtest started */
+ int old_error_cnt;
+};
+
+static bool should_run(struct test_selector *sel, int num, const char *name)
+{
+ int i;
+
+ for (i = 0; i < sel->blacklist.cnt; i++) {
+ if (strstr(name, sel->blacklist.strs[i]))
+ return false;
+ }
+
+ for (i = 0; i < sel->whitelist.cnt; i++) {
+ if (strstr(name, sel->whitelist.strs[i]))
+ return true;
+ }
+
+ if (!sel->whitelist.cnt && !sel->num_set)
+ return true;
+
+ return num < sel->num_set_len && sel->num_set[num];
+}
+
+static void dump_test_log(const struct prog_test_def *test, bool failed)
+{
+ if (stdout == env.stdout)
+ return;
+
+ fflush(stdout); /* exports env.log_buf & env.log_cnt */
+
+ if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
+ if (env.log_cnt) {
+ env.log_buf[env.log_cnt] = '\0';
+ fprintf(env.stdout, "%s", env.log_buf);
+ if (env.log_buf[env.log_cnt - 1] != '\n')
+ fprintf(env.stdout, "\n");
+ }
+ }
+
+ fseeko(stdout, 0, SEEK_SET); /* rewind */
+}
+
+static void skip_account(void)
+{
+ if (env.test->skip_cnt) {
+ env.skip_cnt++;
+ env.test->skip_cnt = 0;
+ }
+}
+
+void test__end_subtest()
+{
+ struct prog_test_def *test = env.test;
+ int sub_error_cnt = test->error_cnt - test->old_error_cnt;
+
+ if (sub_error_cnt)
+ env.fail_cnt++;
+ else
+ env.sub_succ_cnt++;
+ skip_account();
+
+ dump_test_log(test, sub_error_cnt);
+
+ fprintf(env.stdout, "#%d/%d %s:%s\n",
+ test->test_num, test->subtest_num,
+ test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+
+ free(test->subtest_name);
+ test->subtest_name = NULL;
+}
+
+bool test__start_subtest(const char *name)
+{
+ struct prog_test_def *test = env.test;
+
+ if (test->subtest_name)
+ test__end_subtest();
+
+ test->subtest_num++;
+
+ if (!name || !name[0]) {
+ fprintf(env.stderr,
+ "Subtest #%d didn't provide sub-test name!\n",
+ test->subtest_num);
+ return false;
+ }
+
+ if (!should_run(&env.subtest_selector, test->subtest_num, name))
+ return false;
+
+ test->subtest_name = strdup(name);
+ if (!test->subtest_name) {
+ fprintf(env.stderr,
+ "Subtest #%d: failed to copy subtest name!\n",
+ test->subtest_num);
+ return false;
+ }
+ env.test->old_error_cnt = env.test->error_cnt;
+
+ return true;
+}
+
+void test__force_log() {
+ env.test->force_log = true;
+}
+
+void test__skip(void)
+{
+ env.test->skip_cnt++;
+}
+
+void test__fail(void)
+{
+ env.test->error_cnt++;
+}
+
+int test__join_cgroup(const char *path)
+{
+ int fd;
+
+ if (!env.test->need_cgroup_cleanup) {
+ if (setup_cgroup_environment()) {
+ fprintf(stderr,
+ "#%d %s: Failed to setup cgroup environment\n",
+ env.test->test_num, env.test->test_name);
+ return -1;
+ }
+
+ env.test->need_cgroup_cleanup = true;
+ }
+
+ fd = create_and_get_cgroup(path);
+ if (fd < 0) {
+ fprintf(stderr,
+ "#%d %s: Failed to create cgroup '%s' (errno=%d)\n",
+ env.test->test_num, env.test->test_name, path, errno);
+ return fd;
+ }
+
+ if (join_cgroup(path)) {
+ fprintf(stderr,
+ "#%d %s: Failed to join cgroup '%s' (errno=%d)\n",
+ env.test->test_num, env.test->test_name, path, errno);
+ return -1;
+ }
+
+ return fd;
+}
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -32,7 +197,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
printf("%s:FAIL:map '%s' not found\n", test, name);
- error_cnt++;
+ test__fail();
return -1;
}
return bpf_map__fd(map);
@@ -156,23 +321,367 @@ void *spin_lock_thread(void *arg)
pthread_exit(arg);
}
-#define DECLARE
+/* extern declarations for test funcs */
+#define DEFINE_TEST(name) extern void test_##name(void);
#include <prog_tests/tests.h>
-#undef DECLARE
+#undef DEFINE_TEST
+
+static struct prog_test_def prog_test_defs[] = {
+#define DEFINE_TEST(name) { \
+ .test_name = #name, \
+ .run_test = &test_##name, \
+},
+#include <prog_tests/tests.h>
+#undef DEFINE_TEST
+};
+const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+const char *argp_program_version = "test_progs 0.1";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] = "BPF selftests test runner";
+
+enum ARG_KEYS {
+ ARG_TEST_NUM = 'n',
+ ARG_TEST_NAME = 't',
+ ARG_TEST_NAME_BLACKLIST = 'b',
+ ARG_VERIFIER_STATS = 's',
+ ARG_VERBOSE = 'v',
+};
+
+static const struct argp_option opts[] = {
+ { "num", ARG_TEST_NUM, "NUM", 0,
+ "Run test number NUM only " },
+ { "name", ARG_TEST_NAME, "NAMES", 0,
+ "Run tests with names containing any string from NAMES list" },
+ { "name-blacklist", ARG_TEST_NAME_BLACKLIST, "NAMES", 0,
+ "Don't run tests with names containing any string from NAMES list" },
+ { "verifier-stats", ARG_VERIFIER_STATS, NULL, 0,
+ "Output verifier statistics", },
+ { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
+ "Verbose output (use -vv or -vvv for progressively verbose output)" },
+ {},
+};
-int main(int ac, char **av)
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
{
+ if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
+ return 0;
+ vprintf(format, args);
+ return 0;
+}
+
+static int parse_str_list(const char *s, struct str_set *set)
+{
+ char *input, *state = NULL, *next, **tmp, **strs = NULL;
+ int cnt = 0;
+
+ input = strdup(s);
+ if (!input)
+ return -ENOMEM;
+
+ set->cnt = 0;
+ set->strs = NULL;
+
+ while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+ tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
+ if (!tmp)
+ goto err;
+ strs = tmp;
+
+ strs[cnt] = strdup(next);
+ if (!strs[cnt])
+ goto err;
+
+ cnt++;
+ }
+
+ set->cnt = cnt;
+ set->strs = (const char **)strs;
+ free(input);
+ return 0;
+err:
+ free(strs);
+ free(input);
+ return -ENOMEM;
+}
+
+int parse_num_list(const char *s, struct test_selector *sel)
+{
+ int i, set_len = 0, num, start = 0, end = -1;
+ bool *set = NULL, *tmp, parsing_end = false;
+ char *next;
+
+ while (s[0]) {
+ errno = 0;
+ num = strtol(s, &next, 10);
+ if (errno)
+ return -errno;
+
+ if (parsing_end)
+ end = num;
+ else
+ start = num;
+
+ if (!parsing_end && *next == '-') {
+ s = next + 1;
+ parsing_end = true;
+ continue;
+ } else if (*next == ',') {
+ parsing_end = false;
+ s = next + 1;
+ end = num;
+ } else if (*next == '\0') {
+ parsing_end = false;
+ s = next;
+ end = num;
+ } else {
+ return -EINVAL;
+ }
+
+ if (start > end)
+ return -EINVAL;
+
+ if (end + 1 > set_len) {
+ set_len = end + 1;
+ tmp = realloc(set, set_len);
+ if (!tmp) {
+ free(set);
+ return -ENOMEM;
+ }
+ set = tmp;
+ }
+ for (i = start; i <= end; i++) {
+ set[i] = true;
+ }
+
+ }
+
+ if (!set)
+ return -EINVAL;
+
+ sel->num_set = set;
+ sel->num_set_len = set_len;
+
+ return 0;
+}
+
+extern int extra_prog_load_log_flags;
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ struct test_env *env = state->input;
+
+ switch (key) {
+ case ARG_TEST_NUM: {
+ char *subtest_str = strchr(arg, '/');
+
+ if (subtest_str) {
+ *subtest_str = '\0';
+ if (parse_num_list(subtest_str + 1,
+ &env->subtest_selector)) {
+ fprintf(stderr,
+ "Failed to parse subtest numbers.\n");
+ return -EINVAL;
+ }
+ }
+ if (parse_num_list(arg, &env->test_selector)) {
+ fprintf(stderr, "Failed to parse test numbers.\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ case ARG_TEST_NAME: {
+ char *subtest_str = strchr(arg, '/');
+
+ if (subtest_str) {
+ *subtest_str = '\0';
+ if (parse_str_list(subtest_str + 1,
+ &env->subtest_selector.whitelist))
+ return -ENOMEM;
+ }
+ if (parse_str_list(arg, &env->test_selector.whitelist))
+ return -ENOMEM;
+ break;
+ }
+ case ARG_TEST_NAME_BLACKLIST: {
+ char *subtest_str = strchr(arg, '/');
+
+ if (subtest_str) {
+ *subtest_str = '\0';
+ if (parse_str_list(subtest_str + 1,
+ &env->subtest_selector.blacklist))
+ return -ENOMEM;
+ }
+ if (parse_str_list(arg, &env->test_selector.blacklist))
+ return -ENOMEM;
+ break;
+ }
+ case ARG_VERIFIER_STATS:
+ env->verifier_stats = true;
+ break;
+ case ARG_VERBOSE:
+ env->verbosity = VERBOSE_NORMAL;
+ if (arg) {
+ if (strcmp(arg, "v") == 0) {
+ env->verbosity = VERBOSE_VERY;
+ extra_prog_load_log_flags = 1;
+ } else if (strcmp(arg, "vv") == 0) {
+ env->verbosity = VERBOSE_SUPER;
+ extra_prog_load_log_flags = 2;
+ } else {
+ fprintf(stderr,
+ "Unrecognized verbosity setting ('%s'), only -v and -vv are supported\n",
+ arg);
+ return -EINVAL;
+ }
+ }
+ break;
+ case ARGP_KEY_ARG:
+ argp_usage(state);
+ break;
+ case ARGP_KEY_END:
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static void stdio_hijack(void)
+{
+#ifdef __GLIBC__
+ env.stdout = stdout;
+ env.stderr = stderr;
+
+ if (env.verbosity > VERBOSE_NONE) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ /* stdout and stderr -> buffer */
+ fflush(stdout);
+
+ stdout = open_memstream(&env.log_buf, &env.log_cnt);
+ if (!stdout) {
+ stdout = env.stdout;
+ perror("open_memstream");
+ return;
+ }
+
+ stderr = stdout;
+#endif
+}
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+ if (stdout == env.stdout)
+ return;
+
+ fclose(stdout);
+ free(env.log_buf);
+
+ env.log_buf = NULL;
+ env.log_cnt = 0;
+
+ stdout = env.stdout;
+ stderr = env.stderr;
+#endif
+}
+
+/*
+ * Determine if test_progs is running as a "flavored" test runner and switch
+ * into corresponding sub-directory to load correct BPF objects.
+ *
+ * This is done by looking at executable name. If it contains "-flavor"
+ * suffix, then we are running as a flavored test runner.
+ */
+int cd_flavor_subdir(const char *exec_name)
+{
+ /* General form of argv[0] passed here is:
+ * some/path/to/test_progs[-flavor], where -flavor part is optional.
+ * First cut out "test_progs[-flavor]" part, then extract "flavor"
+ * part, if it's there.
+ */
+ const char *flavor = strrchr(exec_name, '/');
+
+ if (!flavor)
+ return 0;
+ flavor++;
+ flavor = strrchr(flavor, '-');
+ if (!flavor)
+ return 0;
+ flavor++;
+ printf("Switching to flavor '%s' subdirectory...\n", flavor);
+ return chdir(flavor);
+}
+
+int main(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ };
+ int err, i;
+
+ err = argp_parse(&argp, argc, argv, 0, NULL, &env);
+ if (err)
+ return err;
+
+ err = cd_flavor_subdir(argv[0]);
+ if (err)
+ return err;
+
+ libbpf_set_print(libbpf_print_fn);
+
srand(time(NULL));
- jit_enabled = is_jit_enabled();
+ env.jit_enabled = is_jit_enabled();
- if (ac == 2 && strcmp(av[1], "-s") == 0)
- verifier_stats = true;
+ stdio_hijack();
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
-#define CALL
-#include <prog_tests/tests.h>
-#undef CALL
+ env.test = test;
+ test->test_num = i + 1;
+
+ if (!should_run(&env.test_selector,
+ test->test_num, test->test_name))
+ continue;
+
+ test->run_test();
+ /* ensure last sub-test is finalized properly */
+ if (test->subtest_name)
+ test__end_subtest();
+
+ test->tested = true;
+ if (test->error_cnt)
+ env.fail_cnt++;
+ else
+ env.succ_cnt++;
+ skip_account();
+
+ dump_test_log(test, test->error_cnt);
+
+ fprintf(env.stdout, "#%d %s:%s\n",
+ test->test_num, test->test_name,
+ test->error_cnt ? "FAIL" : "OK");
+
+ if (test->need_cgroup_cleanup)
+ cleanup_cgroup_environment();
+ }
+ stdio_restore();
+ printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+
+ free(env.test_selector.blacklist.strs);
+ free(env.test_selector.whitelist.strs);
+ free(env.test_selector.num_set);
+ free(env.subtest_selector.blacklist.strs);
+ free(env.subtest_selector.whitelist.strs);
+ free(env.subtest_selector.num_set);
- printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
- return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+ return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 49e0f7d85643..bcfa9ef23fda 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -16,9 +16,10 @@ typedef __u16 __sum16;
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
+#include <linux/socket.h>
#include <linux/unistd.h>
#include <sys/ioctl.h>
@@ -34,13 +35,56 @@ typedef __u16 __sum16;
#include "test_iptunnel_common.h"
#include "bpf_util.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
#include "flow_dissector_load.h"
-extern int error_cnt, pass_cnt;
-extern bool jit_enabled;
-extern bool verifier_stats;
+enum verbosity {
+ VERBOSE_NONE,
+ VERBOSE_NORMAL,
+ VERBOSE_VERY,
+ VERBOSE_SUPER,
+};
+
+struct str_set {
+ const char **strs;
+ int cnt;
+};
+
+struct test_selector {
+ struct str_set whitelist;
+ struct str_set blacklist;
+ bool *num_set;
+ int num_set_len;
+};
+
+struct test_env {
+ struct test_selector test_selector;
+ struct test_selector subtest_selector;
+ bool verifier_stats;
+ enum verbosity verbosity;
+
+ bool jit_enabled;
+
+ struct prog_test_def *test;
+ FILE *stdout;
+ FILE *stderr;
+ char *log_buf;
+ size_t log_cnt;
+
+ int succ_cnt; /* successful tests */
+ int sub_succ_cnt; /* successful sub-tests */
+ int fail_cnt; /* total failed tests + sub-tests */
+ int skip_cnt; /* skipped tests */
+};
+
+extern struct test_env env;
+
+extern void test__force_log();
+extern bool test__start_subtest(const char *name);
+extern void test__skip(void);
+extern void test__fail(void);
+extern int test__join_cgroup(const char *path);
#define MAGIC_BYTES 123
@@ -62,14 +106,27 @@ extern struct ipv6_packet pkt_v6;
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
+ int __save_errno = errno; \
if (__ret) { \
- error_cnt++; \
+ test__fail(); \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
- pass_cnt++; \
- printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+ printf("%s:PASS:%s %d nsec\n", \
+ __func__, tag, duration); \
+ } \
+ errno = __save_errno; \
+ __ret; \
+})
+
+#define CHECK_FAIL(condition) ({ \
+ int __ret = !!(condition); \
+ int __save_errno = errno; \
+ if (__ret) { \
+ test__fail(); \
+ printf("%s:FAIL:%d\n", __func__, __LINE__); \
} \
+ errno = __save_errno; \
__ret; \
})
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 9220747c069d..356351c0ac28 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -120,7 +120,7 @@ int check_ancestor_cgroup_ids(int prog_id)
int err = 0;
int map_fd;
- expected_ids[0] = 0x100000001; /* root cgroup */
+ expected_ids[0] = get_cgroup_id("/.."); /* root cgroup */
expected_ids[1] = get_cgroup_id("");
expected_ids[2] = get_cgroup_id(CGROUP_PATH);
expected_ids[3] = 0; /* non-existent cgroup */
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index fb679ac3d4b0..52bf14955797 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -13,6 +13,7 @@
#include <bpf/bpf.h>
#include "cgroup_helpers.h"
+#include <bpf/bpf_endian.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
@@ -232,7 +233,8 @@ static struct sock_test tests[] = {
/* if (ip == expected && port == expected) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip6[3])),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_port)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
@@ -261,7 +263,8 @@ static struct sock_test tests[] = {
/* if (ip == expected && port == expected) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_port)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 3845144e2c91..779e11da979c 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -240,14 +240,14 @@ static int sockmap_init_sockets(int verbose)
addr.sin_port = htons(S1_PORT);
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
- perror("bind s1 failed()\n");
+ perror("bind s1 failed()");
return errno;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
- perror("bind s2 failed()\n");
+ perror("bind s2 failed()");
return errno;
}
@@ -255,14 +255,14 @@ static int sockmap_init_sockets(int verbose)
addr.sin_port = htons(S1_PORT);
err = listen(s1, 32);
if (err < 0) {
- perror("listen s1 failed()\n");
+ perror("listen s1 failed()");
return errno;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
- perror("listen s1 failed()\n");
+ perror("listen s1 failed()");
return errno;
}
@@ -270,14 +270,14 @@ static int sockmap_init_sockets(int verbose)
addr.sin_port = htons(S1_PORT);
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
- perror("connect c1 failed()\n");
+ perror("connect c1 failed()");
return errno;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
- perror("connect c2 failed()\n");
+ perror("connect c2 failed()");
return errno;
} else if (err < 0) {
err = 0;
@@ -286,13 +286,13 @@ static int sockmap_init_sockets(int verbose)
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
- perror("accept s1 failed()\n");
+ perror("accept s1 failed()");
return errno;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
- perror("accept s1 failed()\n");
+ perror("accept s1 failed()");
return errno;
}
@@ -331,25 +331,29 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
FILE *file;
int i, fp;
- file = fopen(".sendpage_tst.tmp", "w+");
+ file = tmpfile();
+ if (!file) {
+ perror("create file for sendpage");
+ return 1;
+ }
for (i = 0; i < iov_length * cnt; i++, k++)
fwrite(&k, sizeof(char), 1, file);
fflush(file);
fseek(file, 0, SEEK_SET);
- fclose(file);
- fp = open(".sendpage_tst.tmp", O_RDONLY);
+ fp = fileno(file);
+
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
int sent = sendfile(fd, fp, NULL, iov_length);
if (!drop && sent < 0) {
- perror("send loop error:");
- close(fp);
+ perror("send loop error");
+ fclose(file);
return sent;
} else if (drop && sent >= 0) {
printf("sendpage loop error expected: %i\n", sent);
- close(fp);
+ fclose(file);
return -EIO;
}
@@ -357,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
s->bytes_sent += sent;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
- close(fp);
+ fclose(file);
return 0;
}
@@ -463,7 +467,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
int sent = sendmsg(fd, &msg, flags);
if (!drop && sent < 0) {
- perror("send loop error:");
+ perror("send loop error");
goto out_errno;
} else if (drop && sent >= 0) {
printf("send loop error expected: %i\n", sent);
@@ -499,7 +503,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
total_bytes -= txmsg_pop_total;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
- perror("recv start time: ");
+ perror("recv start time");
while (s->bytes_recvd < total_bytes) {
if (txmsg_cork) {
timeout.tv_sec = 0;
@@ -543,7 +547,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
if (recv < 0) {
if (errno != EWOULDBLOCK) {
clock_gettime(CLOCK_MONOTONIC, &s->end);
- perror("recv failed()\n");
+ perror("recv failed()");
goto out_errno;
}
}
@@ -557,7 +561,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
errno = msg_verify_data(&msg, recv, chunk_sz);
if (errno) {
- perror("data verify msg failed\n");
+ perror("data verify msg failed");
goto out_errno;
}
if (recvp) {
@@ -565,7 +569,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
recvp,
chunk_sz);
if (errno) {
- perror("data verify msg_peek failed\n");
+ perror("data verify msg_peek failed");
goto out_errno;
}
}
@@ -654,7 +658,7 @@ static int sendmsg_test(struct sockmap_options *opt)
err = 0;
exit(err ? 1 : 0);
} else if (rxpid == -1) {
- perror("msg_loop_rx: ");
+ perror("msg_loop_rx");
return errno;
}
@@ -681,7 +685,7 @@ static int sendmsg_test(struct sockmap_options *opt)
s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
exit(err ? 1 : 0);
} else if (txpid == -1) {
- perror("msg_loop_tx: ");
+ perror("msg_loop_tx");
return errno;
}
@@ -715,7 +719,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
- perror("send failed()\n");
+ perror("send failed()");
return sc;
}
@@ -748,7 +752,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
rc = recv(i, buf, sizeof(buf), 0);
if (rc < 0) {
if (errno != EWOULDBLOCK) {
- perror("recv failed()\n");
+ perror("recv failed()");
return rc;
}
}
@@ -760,7 +764,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
sc = send(i, buf, rc, 0);
if (sc < 0) {
- perror("send failed()\n");
+ perror("send failed()");
return sc;
}
}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index d008b41b7d8d..9b4d3a68a91a 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -12,8 +12,8 @@
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <sys/socket.h>
-#include "bpf_helpers.h"
-#include "bpf_endian.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
/* Sockmap sample program connects a client and a backend together
* using cgroups.
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
index 84e81a89e2f9..47e132726203 100644
--- a/tools/testing/selftests/bpf/test_stub.c
+++ b/tools/testing/selftests/bpf/test_stub.c
@@ -5,6 +5,8 @@
#include <bpf/libbpf.h>
#include <string.h>
+int extra_prog_load_log_flags = 0;
+
int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
@@ -15,6 +17,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
attr.prog_type = type;
attr.expected_attach_type = 0;
attr.prog_flags = BPF_F_TEST_RND_HI32;
+ attr.log_level = extra_prog_load_log_flags;
return bpf_prog_load_xattr(&attr, pobj, prog_fd);
}
@@ -35,6 +38,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
load_attr.license = license;
load_attr.kern_version = kern_version;
load_attr.prog_flags = BPF_F_TEST_RND_HI32;
+ load_attr.log_level = extra_prog_load_log_flags;
return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a3bebd7c68dd..d196e2a4a6e0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -13,6 +13,7 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include <bpf/bpf_endian.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
@@ -31,6 +32,7 @@ struct sysctl_test {
enum bpf_attach_type attach_type;
const char *sysctl;
int open_flags;
+ int seek;
const char *newval;
const char *oldval;
enum {
@@ -100,7 +102,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:write sysctl:write read ok",
.insns = {
/* If (write) */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
@@ -119,6 +121,29 @@ static struct sysctl_test tests[] = {
.result = OP_EPERM,
},
{
+ .descr = "ctx:write sysctl:write read ok narrow",
+ .insns = {
+ /* u64 w = (u16)write & 1; */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, write)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, write) + 2),
+#endif
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1),
+ /* return 1 - w; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SYSCTL,
+ .sysctl = "kernel/domainname",
+ .open_flags = O_WRONLY,
+ .newval = "(none)", /* same as default, should fail anyway */
+ .result = OP_EPERM,
+ },
+ {
.descr = "ctx:write sysctl:read write reject",
.insns = {
/* write = X */
@@ -139,7 +164,7 @@ static struct sysctl_test tests[] = {
/* If (file_pos == X) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -152,15 +177,21 @@ static struct sysctl_test tests[] = {
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
+ .seek = 3,
.result = SUCCESS,
},
{
.descr = "ctx:file_pos sysctl:read read ok narrow",
.insns = {
/* If (file_pos == X) */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, file_pos) + 3),
+#endif
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -173,6 +204,7 @@ static struct sysctl_test tests[] = {
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
+ .seek = 4,
.result = SUCCESS,
},
{
@@ -214,7 +246,8 @@ static struct sysctl_test tests[] = {
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
/* buf == "tcp_mem\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x7463705f6d656d00ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -255,7 +288,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:7] == "tcp_me\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x7463705f6d650000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -298,12 +332,14 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
/* buf[0:8] == "net/ipv4" && */
- BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69707634ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "/tcp_mem" && */
- BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x2f7463705f6d656dULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
@@ -350,12 +386,14 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
/* buf[0:8] == "net/ipv4" && */
- BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69707634ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[8:16] == "/tcp_me\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x2f7463705f6d6500ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -396,7 +434,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:8] == "net/ip\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69700000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -431,7 +470,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -469,7 +509,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -507,7 +548,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:6] == "Linux\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e7578000000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -650,7 +692,8 @@ static struct sysctl_test tests[] = {
/* buf[0:4] == "606\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+ bpf_ntohl(0x36303600), 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -685,17 +728,20 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
/* buf[0:8] == "3000000 " && */
- BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3330303030303020ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "4000000 " && */
- BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3430303030303020ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[16:24] == "6000000\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3630303030303000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -735,7 +781,8 @@ static struct sysctl_test tests[] = {
/* buf[0:3] == "60\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+ bpf_ntohl(0x36300000), 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -757,7 +804,8 @@ static struct sysctl_test tests[] = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -791,7 +839,7 @@ static struct sysctl_test tests[] = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
+ BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -825,8 +873,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -869,7 +918,8 @@ static struct sysctl_test tests[] = {
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
/* "600 602\0" */
- BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3630302036303200ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -937,7 +987,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -969,8 +1020,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x30373700)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1012,7 +1064,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1052,7 +1105,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0d0c0a09)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1092,7 +1146,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+ /* " -6\0" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0a2d3600)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1132,8 +1188,10 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ /* " -6\0" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0a2d3600)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1175,8 +1233,10 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ /* "0xfe" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x30786665)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1218,11 +1278,14 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) 9223372036854775807 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
- BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3932323333373230ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3336383534373735ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
- BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3830370000000000ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1266,11 +1329,14 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) 9223372036854775808 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
- BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3932323333373230ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3336383534373735ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
- BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3830380000000000ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1344,20 +1410,23 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
static int fixup_sysctl_value(const char *buf, size_t buf_len,
struct bpf_insn *prog, size_t insn_num)
{
- uint32_t value_num = 0;
- uint8_t c, i;
+ union {
+ uint8_t raw[sizeof(uint64_t)];
+ uint64_t num;
+ } value = {};
- if (buf_len > sizeof(value_num)) {
+ if (buf_len > sizeof(value)) {
log_err("Value is too big (%zd) to use in fixup", buf_len);
return -1;
}
-
- for (i = 0; i < buf_len; ++i) {
- c = buf[i];
- value_num |= (c << i * 8);
+ if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) {
+ log_err("Can fixup only BPF_LD_IMM64 insns");
+ return -1;
}
- prog[insn_num].imm = value_num;
+ memcpy(value.raw, buf, buf_len);
+ prog[insn_num].imm = (uint32_t)value.num;
+ prog[insn_num + 1].imm = (uint32_t)(value.num >> 32);
return 0;
}
@@ -1442,6 +1511,11 @@ static int access_sysctl(const char *sysctl_path,
if (fd < 0)
return fd;
+ if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) {
+ log_err("lseek(%d) failed", test->seek);
+ goto err;
+ }
+
if (test->open_flags == O_RDONLY) {
char buf[128];
@@ -1499,6 +1573,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
goto err;
}
+ errno = 0;
if (access_sysctl(sysctl_path, test) == -1) {
if (test->result == OP_EPERM && errno == EPERM)
goto out;
@@ -1507,7 +1582,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
}
if (test->result != SUCCESS) {
- log_err("Unexpected failure");
+ log_err("Unexpected success");
goto err;
}
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
index f38567ef694b..daa7d1b8d309 100755
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
# start the listener
ip netns exec ${NS_DST} bash -c \
- "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+ "nc -4 -l -p 9000 >/dev/null &"
declare -i NC_PID=$!
sleep 1
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index ff0d31d38061..7c76b841b17b 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -62,6 +62,10 @@ cleanup() {
if [[ -f "${infile}" ]]; then
rm "${infile}"
fi
+
+ if [[ -n $server_pid ]]; then
+ kill $server_pid 2> /dev/null
+ fi
}
server_listen() {
@@ -77,6 +81,7 @@ client_connect() {
verify_data() {
wait "${server_pid}"
+ server_pid=
# sha1sum returns two fields [sha1] [filepath]
# convert to bash array and access first elem
insum=($(sha1sum ${infile}))
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index d48e51716d19..9b3617d770a5 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -37,6 +37,9 @@ setup()
ns1_exec ip link set lo up
ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
+ ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0
+ ns1_exec sysctl -w net.ipv4.tcp_timestamps=0
+ ns1_exec sysctl -w net.ipv4.tcp_sack=0
wait_for_ip 127.0.0.1
wait_for_ip ::1
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index 87829c86c746..b9e991d43155 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -2,6 +2,7 @@
// Copyright (c) 2018 Facebook
// Copyright (c) 2019 Cloudflare
+#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
@@ -77,7 +78,7 @@ out:
return fd;
}
-static int get_map_fd_by_prog_id(int prog_id)
+static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
@@ -104,6 +105,8 @@ static int get_map_fd_by_prog_id(int prog_id)
goto err;
}
+ *xdp = info.type == BPF_PROG_TYPE_XDP;
+
map_fd = bpf_map_get_fd_by_id(map_ids[0]);
if (map_fd < 0)
log_err("Failed to get fd by map id %d", map_ids[0]);
@@ -113,18 +116,32 @@ err:
return map_fd;
}
-static int run_test(int server_fd, int results_fd)
+static int run_test(int server_fd, int results_fd, bool xdp)
{
int client = -1, srv_client = -1;
int ret = 0;
__u32 key = 0;
- __u64 value = 0;
+ __u32 key_gen = 1;
+ __u32 key_mss = 2;
+ __u32 value = 0;
+ __u32 value_gen = 0;
+ __u32 value_mss = 0;
if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
log_err("Can't clear results");
goto err;
}
+ if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) {
+ log_err("Can't clear results");
+ goto err;
+ }
+
+ if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) {
+ log_err("Can't clear results");
+ goto err;
+ }
+
client = connect_to_server(server_fd);
if (client == -1)
goto err;
@@ -140,8 +157,35 @@ static int run_test(int server_fd, int results_fd)
goto err;
}
- if (value != 1) {
- log_err("Didn't match syncookie: %llu", value);
+ if (value == 0) {
+ log_err("Didn't match syncookie: %u", value);
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) {
+ log_err("Can't lookup result");
+ goto err;
+ }
+
+ if (xdp && value_gen == 0) {
+ // SYN packets do not get passed through generic XDP, skip the
+ // rest of the test.
+ printf("Skipping XDP cookie check\n");
+ goto out;
+ }
+
+ if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) {
+ log_err("Can't lookup result");
+ goto err;
+ }
+
+ if (value != value_gen) {
+ log_err("BPF generated cookie does not match kernel one");
+ goto err;
+ }
+
+ if (value_mss < 536 || value_mss > USHRT_MAX) {
+ log_err("Unexpected MSS retrieved");
goto err;
}
@@ -163,13 +207,14 @@ int main(int argc, char **argv)
int server_v6 = -1;
int results = -1;
int err = 0;
+ bool xdp;
if (argc < 2) {
fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
exit(1);
}
- results = get_map_fd_by_prog_id(atoi(argv[1]));
+ results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
if (results < 0) {
log_err("Can't get map");
goto err;
@@ -194,10 +239,10 @@ int main(int argc, char **argv)
if (server_v6 == -1)
goto err;
- if (run_test(server, results))
+ if (run_test(server, results, xdp))
goto err;
- if (run_test(server_v6, results))
+ if (run_test(server_v6, results, xdp))
goto err;
printf("ok\n");
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 7bcfa6207005..6220b95cbd02 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -13,5 +13,6 @@ struct tcpbpf_globals {
__u64 bytes_received;
__u64 bytes_acked;
__u32 num_listen;
+ __u32 num_close_events;
};
#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 716b4e3be581..3ae127620463 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -16,6 +16,9 @@
#include "test_tcpbpf.h"
+/* 3 comes from one listening socket + both ends of the connection */
+#define EXPECTED_CLOSE_EVENTS 3
+
#define EXPECT_EQ(expected, actual, fmt) \
do { \
if ((expected) != (actual)) { \
@@ -23,13 +26,14 @@
" Actual: %" fmt "\n" \
" Expected: %" fmt "\n", \
(actual), (expected)); \
- goto err; \
+ ret--; \
} \
} while (0)
int verify_result(const struct tcpbpf_globals *result)
{
__u32 expected_events;
+ int ret = 0;
expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
(1 << BPF_SOCK_OPS_RWND_INIT) |
@@ -48,15 +52,15 @@ int verify_result(const struct tcpbpf_globals *result)
EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
EXPECT_EQ(1, result->num_listen, PRIu32);
+ EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
- return 0;
-err:
- return -1;
+ return ret;
}
int verify_sockopt_result(int sock_map_fd)
{
__u32 key = 0;
+ int ret = 0;
int res;
int rv;
@@ -69,9 +73,7 @@ int verify_sockopt_result(int sock_map_fd)
rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
EXPECT_EQ(0, rv, "d");
EXPECT_EQ(1, res, "d");
- return 0;
-err:
- return -1;
+ return ret;
}
static int bpf_find_map(const char *test, struct bpf_object *obj,
@@ -96,6 +98,7 @@ int main(int argc, char **argv)
int error = EXIT_FAILURE;
struct bpf_object *obj;
int cg_fd = -1;
+ int retry = 10;
__u32 key = 0;
int rv;
@@ -134,12 +137,20 @@ int main(int argc, char **argv)
if (sock_map_fd < 0)
goto err;
+retry_lookup:
rv = bpf_map_lookup_elem(map_fd, &key, &g);
if (rv != 0) {
printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
goto err;
}
+ if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
+ printf("Unexpected number of close events (%d), retrying!\n",
+ g.num_close_events);
+ usleep(100);
+ goto retry_lookup;
+ }
+
if (verify_result(&g)) {
printf("FAILED: Wrong stats\n");
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 86152d9ae95b..f9765ddf0761 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -17,6 +17,7 @@
#include <linux/rtnetlink.h>
#include <signal.h>
#include <linux/perf_event.h>
+#include <linux/err.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
@@ -30,28 +31,34 @@
pthread_t tid;
int rx_callbacks;
-static int dummyfn(void *data, int size)
+static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
{
struct tcp_notifier *t = data;
if (t->type != 0xde || t->subtype != 0xad ||
t->source != 0xbe || t->hash != 0xef)
- return 1;
+ return;
rx_callbacks++;
- return 0;
}
-void tcp_notifier_poller(int fd)
+void tcp_notifier_poller(struct perf_buffer *pb)
{
- while (1)
- perf_event_poller(fd, dummyfn);
+ int err;
+
+ while (1) {
+ err = perf_buffer__poll(pb, 100);
+ if (err < 0 && err != -EINTR) {
+ printf("failed perf_buffer__poll: %d\n", err);
+ return;
+ }
+ }
}
static void *poller_thread(void *arg)
{
- int fd = *(int *)arg;
+ struct perf_buffer *pb = arg;
- tcp_notifier_poller(fd);
+ tcp_notifier_poller(pb);
return arg;
}
@@ -60,52 +67,20 @@ int verify_result(const struct tcpnotify_globals *result)
return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1);
}
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-static int setup_bpf_perf_event(int mapfd)
-{
- struct perf_event_attr attr = {
- .sample_type = PERF_SAMPLE_RAW,
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- };
- int key = 0;
- int pmu_fd;
-
- pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0);
- if (pmu_fd < 0)
- return pmu_fd;
- bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY);
-
- ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
- return pmu_fd;
-}
-
int main(int argc, char **argv)
{
const char *file = "test_tcpnotify_kern.o";
- int prog_fd, map_fd, perf_event_fd;
+ struct bpf_map *perf_map, *global_map;
+ struct perf_buffer_opts pb_opts = {};
struct tcpnotify_globals g = {0};
+ struct perf_buffer *pb = NULL;
const char *cg_path = "/foo";
+ int prog_fd, rv, cg_fd = -1;
int error = EXIT_FAILURE;
struct bpf_object *obj;
- int cg_fd = -1;
- __u32 key = 0;
- int rv;
char test_script[80];
- int pmu_fd;
cpu_set_t cpuset;
+ __u32 key = 0;
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
@@ -133,19 +108,24 @@ int main(int argc, char **argv)
goto err;
}
- perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map");
- if (perf_event_fd < 0)
+ perf_map = bpf_object__find_map_by_name(obj, "perf_event_map");
+ if (!perf_map) {
+ printf("FAIL:map '%s' not found\n", "perf_event_map");
goto err;
+ }
- map_fd = bpf_find_map(__func__, obj, "global_map");
- if (map_fd < 0)
- goto err;
+ global_map = bpf_object__find_map_by_name(obj, "global_map");
+ if (!global_map) {
+ printf("FAIL:map '%s' not found\n", "global_map");
+ return -1;
+ }
- pmu_fd = setup_bpf_perf_event(perf_event_fd);
- if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0)
+ pb_opts.sample_cb = dummyfn;
+ pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
+ if (IS_ERR(pb))
goto err;
- pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd);
+ pthread_create(&tid, NULL, poller_thread, pb);
sprintf(test_script,
"iptables -A INPUT -p tcp --dport %d -j DROP",
@@ -162,7 +142,7 @@ int main(int argc, char **argv)
TESTPORT);
system(test_script);
- rv = bpf_map_lookup_elem(map_fd, &key, &g);
+ rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
if (rv != 0) {
printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
goto err;
@@ -182,5 +162,7 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
+ if (!IS_ERR_OR_NULL(pb))
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 84135d5f4b35..87eaa49609a0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 18
+#define MAX_NR_MAPS 19
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -61,6 +61,7 @@
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
static int skips;
+static bool verbose = false;
struct bpf_test {
const char *descr;
@@ -84,6 +85,7 @@ struct bpf_test {
int fixup_map_array_wo[MAX_FIXUPS];
int fixup_map_array_small[MAX_FIXUPS];
int fixup_sk_storage_map[MAX_FIXUPS];
+ int fixup_map_event_output[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t insn_processed;
@@ -91,7 +93,8 @@ struct bpf_test {
enum {
UNDEF,
ACCEPT,
- REJECT
+ REJECT,
+ VERBOSE_ACCEPT,
} result, result_unpriv;
enum bpf_prog_type prog_type;
uint8_t flags;
@@ -405,10 +408,10 @@ static void update_map(int fd, int index)
assert(!bpf_map_update_elem(fd, &index, &value, 0));
}
-static int create_prog_dummy1(enum bpf_prog_type prog_type)
+static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret)
{
struct bpf_insn prog[] = {
- BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, ret),
BPF_EXIT_INSN(),
};
@@ -416,14 +419,15 @@ static int create_prog_dummy1(enum bpf_prog_type prog_type)
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx)
+static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
+ int idx, int ret)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_3, idx),
BPF_LD_MAP_FD(BPF_REG_2, mfd),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 41),
+ BPF_MOV64_IMM(BPF_REG_0, ret),
BPF_EXIT_INSN(),
};
@@ -432,10 +436,9 @@ static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx)
}
static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
- int p1key)
+ int p1key, int p2key, int p3key)
{
- int p2key = 1;
- int mfd, p1fd, p2fd;
+ int mfd, p1fd, p2fd, p3fd;
mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
sizeof(int), max_elem, 0);
@@ -446,23 +449,24 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
return -1;
}
- p1fd = create_prog_dummy1(prog_type);
- p2fd = create_prog_dummy2(prog_type, mfd, p2key);
- if (p1fd < 0 || p2fd < 0)
- goto out;
+ p1fd = create_prog_dummy_simple(prog_type, 42);
+ p2fd = create_prog_dummy_loop(prog_type, mfd, p2key, 41);
+ p3fd = create_prog_dummy_simple(prog_type, 24);
+ if (p1fd < 0 || p2fd < 0 || p3fd < 0)
+ goto err;
if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
- goto out;
+ goto err;
if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
- goto out;
+ goto err;
+ if (bpf_map_update_elem(mfd, &p3key, &p3fd, BPF_ANY) < 0) {
+err:
+ close(mfd);
+ mfd = -1;
+ }
+ close(p3fd);
close(p2fd);
close(p1fd);
-
return mfd;
-out:
- close(p2fd);
- close(p1fd);
- close(mfd);
- return -1;
}
static int create_map_in_map(void)
@@ -632,6 +636,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_array_wo = test->fixup_map_array_wo;
int *fixup_map_array_small = test->fixup_map_array_small;
int *fixup_sk_storage_map = test->fixup_sk_storage_map;
+ int *fixup_map_event_output = test->fixup_map_event_output;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -680,7 +685,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_prog1) {
- map_fds[4] = create_prog_array(prog_type, 4, 0);
+ map_fds[4] = create_prog_array(prog_type, 4, 0, 1, 2);
do {
prog[*fixup_prog1].imm = map_fds[4];
fixup_prog1++;
@@ -688,7 +693,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_prog2) {
- map_fds[5] = create_prog_array(prog_type, 8, 7);
+ map_fds[5] = create_prog_array(prog_type, 8, 7, 1, 2);
do {
prog[*fixup_prog2].imm = map_fds[5];
fixup_prog2++;
@@ -793,6 +798,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_sk_storage_map++;
} while (*fixup_sk_storage_map);
}
+ if (*fixup_map_event_output) {
+ map_fds[18] = __create_map(BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ sizeof(int), sizeof(int), 1, 0);
+ do {
+ prog[*fixup_map_event_output].imm = map_fds[18];
+ fixup_map_event_output++;
+ } while (*fixup_map_event_output);
+ }
}
static int set_admin(bool admin)
@@ -849,6 +862,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
return 0;
}
+static bool cmp_str_seq(const char *log, const char *exp)
+{
+ char needle[80];
+ const char *p, *q;
+ int len;
+
+ do {
+ p = strchr(exp, '\t');
+ if (!p)
+ p = exp + strlen(exp);
+
+ len = p - exp;
+ if (len >= sizeof(needle) || !len) {
+ printf("FAIL\nTestcase bug\n");
+ return false;
+ }
+ strncpy(needle, exp, len);
+ needle[len] = 0;
+ q = strstr(log, needle);
+ if (!q) {
+ printf("FAIL\nUnexpected verifier log in successful load!\n"
+ "EXP: %s\nRES:\n", needle);
+ return false;
+ }
+ log = q + len;
+ exp = p + 1;
+ } while (*p);
+ return true;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
@@ -887,14 +930,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
pflags |= BPF_F_ANY_ALIGNMENT;
+ if (test->flags & ~3)
+ pflags |= test->flags;
+ expected_ret = unpriv && test->result_unpriv != UNDEF ?
+ test->result_unpriv : test->result;
+ expected_err = unpriv && test->errstr_unpriv ?
+ test->errstr_unpriv : test->errstr;
memset(&attr, 0, sizeof(attr));
attr.prog_type = prog_type;
attr.expected_attach_type = test->expected_attach_type;
attr.insns = prog;
attr.insns_cnt = prog_len;
attr.license = "GPL";
- attr.log_level = 4;
+ attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
attr.prog_flags = pflags;
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
@@ -904,14 +953,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
goto close_fds;
}
- expected_ret = unpriv && test->result_unpriv != UNDEF ?
- test->result_unpriv : test->result;
- expected_err = unpriv && test->errstr_unpriv ?
- test->errstr_unpriv : test->errstr;
-
alignment_prevented_execution = 0;
- if (expected_ret == ACCEPT) {
+ if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
if (fd_prog < 0) {
printf("FAIL\nFailed to load prog '%s'!\n",
strerror(errno));
@@ -922,6 +966,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS))
alignment_prevented_execution = 1;
#endif
+ if (expected_ret == VERBOSE_ACCEPT && !cmp_str_seq(bpf_vlog, expected_err)) {
+ goto fail_log;
+ }
} else {
if (fd_prog >= 0) {
printf("FAIL\nUnexpected success to load!\n");
@@ -947,6 +994,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
+ if (verbose)
+ printf(", verifier log:\n%s", bpf_vlog);
+
run_errs = 0;
run_successes = 0;
if (!alignment_prevented_execution && fd_prog >= 0) {
@@ -1087,17 +1137,24 @@ int main(int argc, char **argv)
{
unsigned int from = 0, to = ARRAY_SIZE(tests);
bool unpriv = !is_admin();
+ int arg = 1;
+
+ if (argc > 1 && strcmp(argv[1], "-v") == 0) {
+ arg++;
+ verbose = true;
+ argc--;
+ }
if (argc == 3) {
- unsigned int l = atoi(argv[argc - 2]);
- unsigned int u = atoi(argv[argc - 1]);
+ unsigned int l = atoi(argv[arg]);
+ unsigned int u = atoi(argv[arg + 1]);
if (l < to && u < to) {
from = l;
to = u + 1;
}
} else if (argc == 2) {
- unsigned int t = atoi(argv[argc - 1]);
+ unsigned int t = atoi(argv[arg]);
if (t < to) {
from = t;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index b47f205f0310..7f989b3e4e22 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -86,128 +86,3 @@ long ksym_get_addr(const char *name)
return 0;
}
-
-static int page_size;
-static int page_cnt = 8;
-static struct perf_event_mmap_page *header;
-
-int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
-{
- void *base;
- int mmap_size;
-
- page_size = getpagesize();
- mmap_size = page_size * (page_cnt + 1);
-
- base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (base == MAP_FAILED) {
- printf("mmap err\n");
- return -1;
- }
-
- *header = base;
- return 0;
-}
-
-int perf_event_mmap(int fd)
-{
- return perf_event_mmap_header(fd, &header);
-}
-
-static int perf_event_poll(int fd)
-{
- struct pollfd pfd = { .fd = fd, .events = POLLIN };
-
- return poll(&pfd, 1, 1000);
-}
-
-struct perf_event_sample {
- struct perf_event_header header;
- __u32 size;
- char data[];
-};
-
-static enum bpf_perf_event_ret
-bpf_perf_event_print(struct perf_event_header *hdr, void *private_data)
-{
- struct perf_event_sample *e = (struct perf_event_sample *)hdr;
- perf_event_print_fn fn = private_data;
- int ret;
-
- if (e->header.type == PERF_RECORD_SAMPLE) {
- ret = fn(e->data, e->size);
- if (ret != LIBBPF_PERF_EVENT_CONT)
- return ret;
- } else if (e->header.type == PERF_RECORD_LOST) {
- struct {
- struct perf_event_header header;
- __u64 id;
- __u64 lost;
- } *lost = (void *) e;
- printf("lost %lld events\n", lost->lost);
- } else {
- printf("unknown event type=%d size=%d\n",
- e->header.type, e->header.size);
- }
-
- return LIBBPF_PERF_EVENT_CONT;
-}
-
-int perf_event_poller(int fd, perf_event_print_fn output_fn)
-{
- enum bpf_perf_event_ret ret;
- void *buf = NULL;
- size_t len = 0;
-
- for (;;) {
- perf_event_poll(fd);
- ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
- page_size, &buf, &len,
- bpf_perf_event_print,
- output_fn);
- if (ret != LIBBPF_PERF_EVENT_CONT)
- break;
- }
- free(buf);
-
- return ret;
-}
-
-int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
- int num_fds, perf_event_print_fn output_fn)
-{
- enum bpf_perf_event_ret ret;
- struct pollfd *pfds;
- void *buf = NULL;
- size_t len = 0;
- int i;
-
- pfds = calloc(num_fds, sizeof(*pfds));
- if (!pfds)
- return LIBBPF_PERF_EVENT_ERROR;
-
- for (i = 0; i < num_fds; i++) {
- pfds[i].fd = fds[i];
- pfds[i].events = POLLIN;
- }
-
- for (;;) {
- poll(pfds, num_fds, 1000);
- for (i = 0; i < num_fds; i++) {
- if (!pfds[i].revents)
- continue;
-
- ret = bpf_perf_event_read_simple(headers[i],
- page_cnt * page_size,
- page_size, &buf, &len,
- bpf_perf_event_print,
- output_fn);
- if (ret != LIBBPF_PERF_EVENT_CONT)
- break;
- }
- }
- free(buf);
- free(pfds);
-
- return ret;
-}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 18924f23db1b..0383c9b8adc1 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -2,8 +2,7 @@
#ifndef __TRACE_HELPER_H
#define __TRACE_HELPER_H
-#include <libbpf.h>
-#include <linux/perf_event.h>
+#include <bpf/libbpf.h>
struct ksym {
long addr;
@@ -14,12 +13,4 @@ int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
-typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
-
-int perf_event_mmap(int fd);
-int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
-/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
-int perf_event_poller(int fd, perf_event_print_fn output_fn);
-int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
- int num_fds, perf_event_print_fn output_fn);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
new file mode 100644
index 000000000000..130553e19eca
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -0,0 +1,94 @@
+/* instructions used to output a skb based software event, produced
+ * from code snippet:
+ * struct TMP {
+ * uint64_t tmp;
+ * } tt;
+ * tt.tmp = 5;
+ * bpf_perf_event_output(skb, &connection_tracking_event_map, 0,
+ * &tt, sizeof(tt));
+ * return 1;
+ *
+ * the bpf assembly from llvm is:
+ * 0: b7 02 00 00 05 00 00 00 r2 = 5
+ * 1: 7b 2a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r2
+ * 2: bf a4 00 00 00 00 00 00 r4 = r10
+ * 3: 07 04 00 00 f8 ff ff ff r4 += -8
+ * 4: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0ll
+ * 6: b7 03 00 00 00 00 00 00 r3 = 0
+ * 7: b7 05 00 00 08 00 00 00 r5 = 8
+ * 8: 85 00 00 00 19 00 00 00 call 25
+ * 9: b7 00 00 00 01 00 00 00 r0 = 1
+ * 10: 95 00 00 00 00 00 00 00 exit
+ *
+ * The reason I put the code here instead of fill_helpers is that map fixup
+ * is against the insns, instead of filled prog.
+ */
+
+#define __PERF_EVENT_INSNS__ \
+ BPF_MOV64_IMM(BPF_REG_2, 5), \
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), \
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), \
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), \
+ BPF_LD_MAP_FD(BPF_REG_2, 0), \
+ BPF_MOV64_IMM(BPF_REG_3, 0), \
+ BPF_MOV64_IMM(BPF_REG_5, 8), \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, \
+ BPF_FUNC_perf_event_output), \
+ BPF_MOV64_IMM(BPF_REG_0, 1), \
+ BPF_EXIT_INSN(),
+{
+ "perfevent for sockops",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for tc",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for lwt out",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_LWT_OUT,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for xdp",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for socket filter",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for sk_skb",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for cgroup skb",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index f0961c58581e..bf0322eb5346 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -744,3 +744,86 @@
.result = ACCEPT,
.retval = 2,
},
+{
+ "jgt32: range bound deduction, reg op imm",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid),
+ BPF_JMP32_IMM(BPF_JGT, BPF_REG_0, 1, 5),
+ BPF_MOV32_REG(BPF_REG_6, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "jgt32: range bound deduction, reg1 op reg2, reg1 unknown",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_JMP32_REG(BPF_JGT, BPF_REG_0, BPF_REG_2, 5),
+ BPF_MOV32_REG(BPF_REG_6, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "jle32: range bound deduction, reg1 op reg2, reg2 unknown",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_JMP32_REG(BPF_JLE, BPF_REG_2, BPF_REG_0, 5),
+ BPF_MOV32_REG(BPF_REG_6, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
index 1fc4e61e9f9f..1af37187dc12 100644
--- a/tools/testing/selftests/bpf/verifier/loops1.c
+++ b/tools/testing/selftests/bpf/verifier/loops1.c
@@ -187,3 +187,20 @@
.prog_type = BPF_PROG_TYPE_XDP,
.retval = 55,
},
+{
+ "taken loop with back jump to 1st insn, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .retval = 55,
+},
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
new file mode 100644
index 000000000000..02151f8c940f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -0,0 +1,194 @@
+{
+ "precise: test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_array_48b = { 1 },
+ .result = VERBOSE_ACCEPT,
+ .errstr =
+ "26: (85) call bpf_probe_read#4\
+ last_idx 26 first_idx 20\
+ regs=4 stack=0 before 25\
+ regs=4 stack=0 before 24\
+ regs=4 stack=0 before 23\
+ regs=4 stack=0 before 22\
+ regs=4 stack=0 before 20\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 19 first_idx 10\
+ regs=4 stack=0 before 19\
+ regs=200 stack=0 before 18\
+ regs=300 stack=0 before 17\
+ regs=201 stack=0 before 15\
+ regs=201 stack=0 before 14\
+ regs=200 stack=0 before 13\
+ regs=200 stack=0 before 12\
+ regs=200 stack=0 before 11\
+ regs=200 stack=0 before 10\
+ parent already had regs=0 stack=0 marks",
+},
+{
+ "precise: test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_array_48b = { 1 },
+ .result = VERBOSE_ACCEPT,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr =
+ "26: (85) call bpf_probe_read#4\
+ last_idx 26 first_idx 22\
+ regs=4 stack=0 before 25\
+ regs=4 stack=0 before 24\
+ regs=4 stack=0 before 23\
+ regs=4 stack=0 before 22\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 20 first_idx 20\
+ regs=4 stack=0 before 20\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 19 first_idx 17\
+ regs=4 stack=0 before 19\
+ regs=200 stack=0 before 18\
+ regs=300 stack=0 before 17\
+ parent already had regs=0 stack=0 marks",
+},
+{
+ "precise: cross frame pruning",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "!read_ok",
+ .result = REJECT,
+},
+{
+ "precise: ST insn causing spi > allocated_stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "5: (2d) if r4 > r0 goto pc+0\
+ last_idx 5 first_idx 5\
+ parent didn't have regs=10 stack=0 marks\
+ last_idx 4 first_idx 2\
+ regs=10 stack=0 before 4\
+ regs=10 stack=0 before 3\
+ regs=0 stack=1 before 2\
+ last_idx 5 first_idx 5\
+ parent didn't have regs=1 stack=0 marks",
+ .result = VERBOSE_ACCEPT,
+ .retval = -1,
+},
+{
+ "precise: STX insn causing spi > allocated_stack",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "last_idx 6 first_idx 6\
+ parent didn't have regs=10 stack=0 marks\
+ last_idx 5 first_idx 3\
+ regs=10 stack=0 before 5\
+ regs=10 stack=0 before 4\
+ regs=0 stack=1 before 3\
+ last_idx 6 first_idx 6\
+ parent didn't have regs=1 stack=0 marks\
+ last_idx 5 first_idx 3\
+ regs=1 stack=0 before 5",
+ .result = VERBOSE_ACCEPT,
+ .retval = -1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index ebcbf154c460..604b46151736 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -455,7 +455,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
/* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -478,7 +478,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
/* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -497,7 +497,7 @@
BPF_SK_LOOKUP(sk_lookup_tcp),
/* bpf_tail_call() */
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c
index a9a8f620e71c..94c399d1faca 100644
--- a/tools/testing/selftests/bpf/verifier/runtime_jit.c
+++ b/tools/testing/selftests/bpf/verifier/runtime_jit.c
@@ -27,6 +27,19 @@
{
"runtime/jit: tail_call within bounds, no prog",
.insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2",
+ .insns = {
BPF_MOV64_IMM(BPF_REG_3, 2),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -35,9 +48,147 @@
},
.fixup_prog1 = { 1 },
.result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2 / key 2, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2 / key 2, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 0 / key 2, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 0 / key 2, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 42,
+},
+{
+ "runtime/jit: tail_call within bounds, different maps, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 9 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
.retval = 1,
},
{
+ "runtime/jit: tail_call within bounds, different maps, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 9 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
+ .retval = 42,
+},
+{
"runtime/jit: tail_call out of bounds",
.insns = {
BPF_MOV64_IMM(BPF_REG_3, 256),
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index d60a343b1371..842d9155d36c 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -45,7 +45,7 @@ static int get_stats(int fd, __u16 count, __u32 raddr)
printf("\nXDP RTT data:\n");
if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
- perror("bpf_map_lookup elem: ");
+ perror("bpf_map_lookup elem");
return 1;
}
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index 58ed5eeab709..ad41ea69001b 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -109,7 +109,7 @@ static bool set_watchpoint(pid_t pid, int size, int wp)
return false;
}
-static bool arun_test(int wr_size, int wp_size, int wr, int wp)
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
{
int status;
siginfo_t siginfo;
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 8d369b6a2069..66aafe1f5746 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -Wall
+CFLAGS += -Wall -pthread
all:
+TEST_FILES := with_stress.sh
+TEST_PROGS := test_stress.sh
TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index bdb69599c4bd..8f7131dcf1ff 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -158,6 +158,22 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key)
return atol(ptr + strlen(key));
}
+long cg_read_lc(const char *cgroup, const char *control)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ char *line;
+ long cnt = 0;
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt;
+}
+
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
@@ -282,10 +298,12 @@ int cg_enter(const char *cgroup, int pid)
int cg_enter_current(const char *cgroup)
{
- char pidbuf[64];
+ return cg_write(cgroup, "cgroup.procs", "0");
+}
- snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
- return cg_write(cgroup, "cgroup.procs", pidbuf);
+int cg_enter_current_thread(const char *cgroup)
+{
+ return cg_write(cgroup, "cgroup.threads", "0");
}
int cg_run(const char *cgroup,
@@ -410,11 +428,25 @@ int set_oom_adj_score(int pid, int score)
return 0;
}
-char proc_read_text(int pid, const char *item, char *buf, size_t size)
+ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
- snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
+ if (!pid)
+ snprintf(path, sizeof(path), "/proc/%s/%s",
+ thread ? "thread-self" : "self", item);
+ else
+ snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
return read_text(path, buf, size);
}
+
+int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
+{
+ char buf[PAGE_SIZE];
+
+ if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
+ return -1;
+
+ return strstr(buf, needle) ? 0 : -1;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index c72f28046bfa..49c54fbdb229 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdbool.h>
#include <stdlib.h>
#define PAGE_SIZE 4096
@@ -29,12 +30,14 @@ extern int cg_read_strstr(const char *cgroup, const char *control,
const char *needle);
extern long cg_read_long(const char *cgroup, const char *control);
long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern long cg_read_lc(const char *cgroup, const char *control);
extern int cg_write(const char *cgroup, const char *control, char *buf);
extern int cg_run(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
extern int cg_enter(const char *cgroup, int pid);
extern int cg_enter_current(const char *cgroup);
+extern int cg_enter_current_thread(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
@@ -45,4 +48,5 @@ extern int is_swap_enabled(void);
extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
-extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
+extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
+extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 79053a4f4783..e19ce940cd6a 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -5,6 +5,9 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <pthread.h>
#include "../kselftest.h"
#include "cgroup_util.h"
@@ -354,6 +357,147 @@ cleanup:
return ret;
}
+static void *dummy_thread_fn(void *arg)
+{
+ return (void *)(size_t)pause();
+}
+
+/*
+ * Test threadgroup migration.
+ * All threads of a process are migrated together.
+ */
+static int test_cgcore_proc_migration(const char *root)
+{
+ int ret = KSFT_FAIL;
+ int t, c_threads = 0, n_threads = 13;
+ char *src = NULL, *dst = NULL;
+ pthread_t threads[n_threads];
+
+ src = cg_name(root, "cg_src");
+ dst = cg_name(root, "cg_dst");
+ if (!src || !dst)
+ goto cleanup;
+
+ if (cg_create(src))
+ goto cleanup;
+ if (cg_create(dst))
+ goto cleanup;
+
+ if (cg_enter_current(src))
+ goto cleanup;
+
+ for (c_threads = 0; c_threads < n_threads; ++c_threads) {
+ if (pthread_create(&threads[c_threads], NULL, dummy_thread_fn, NULL))
+ goto cleanup;
+ }
+
+ cg_enter_current(dst);
+ if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (t = 0; t < c_threads; ++t) {
+ pthread_cancel(threads[t]);
+ }
+
+ for (t = 0; t < c_threads; ++t) {
+ pthread_join(threads[t], NULL);
+ }
+
+ cg_enter_current(root);
+
+ if (dst)
+ cg_destroy(dst);
+ if (src)
+ cg_destroy(src);
+ free(dst);
+ free(src);
+ return ret;
+}
+
+static void *migrating_thread_fn(void *arg)
+{
+ int g, i, n_iterations = 1000;
+ char **grps = arg;
+ char lines[3][PATH_MAX];
+
+ for (g = 1; g < 3; ++g)
+ snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
+
+ for (i = 0; i < n_iterations; ++i) {
+ cg_enter_current_thread(grps[(i % 2) + 1]);
+
+ if (proc_read_strstr(0, 1, "cgroup", lines[(i % 2) + 1]))
+ return (void *)-1;
+ }
+ return NULL;
+}
+
+/*
+ * Test single thread migration.
+ * Threaded cgroups allow successful migration of a thread.
+ */
+static int test_cgcore_thread_migration(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *dom = NULL;
+ char line[PATH_MAX];
+ char *grps[3] = { (char *)root, NULL, NULL };
+ pthread_t thr;
+ void *retval;
+
+ dom = cg_name(root, "cg_dom");
+ grps[1] = cg_name(root, "cg_dom/cg_src");
+ grps[2] = cg_name(root, "cg_dom/cg_dst");
+ if (!grps[1] || !grps[2] || !dom)
+ goto cleanup;
+
+ if (cg_create(dom))
+ goto cleanup;
+ if (cg_create(grps[1]))
+ goto cleanup;
+ if (cg_create(grps[2]))
+ goto cleanup;
+
+ if (cg_write(grps[1], "cgroup.type", "threaded"))
+ goto cleanup;
+ if (cg_write(grps[2], "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_enter_current(grps[1]))
+ goto cleanup;
+
+ if (pthread_create(&thr, NULL, migrating_thread_fn, grps))
+ goto cleanup;
+
+ if (pthread_join(thr, &retval))
+ goto cleanup;
+
+ if (retval)
+ goto cleanup;
+
+ snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
+ if (proc_read_strstr(0, 1, "cgroup", line))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (grps[2])
+ cg_destroy(grps[2]);
+ if (grps[1])
+ cg_destroy(grps[1]);
+ if (dom)
+ cg_destroy(dom);
+ free(grps[2]);
+ free(grps[1]);
+ free(dom);
+ return ret;
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -366,6 +510,8 @@ struct corecg_test {
T(test_cgcore_parent_becomes_threaded),
T(test_cgcore_invalid_domain),
T(test_cgcore_populated),
+ T(test_cgcore_proc_migration),
+ T(test_cgcore_thread_migration),
};
#undef T
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 8219a30853d2..23d8fa4a3e4e 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -72,6 +72,7 @@ static int cg_prepare_for_wait(const char *cgroup)
if (ret == -1) {
debug("Error: inotify_add_watch() failed\n");
close(fd);
+ fd = -1;
}
return fd;
@@ -448,6 +449,59 @@ cleanup:
}
/*
+ * The test creates a cgroups and freezes it. Then it creates a child cgroup
+ * and populates it with a task. After that it checks that the child cgroup
+ * is frozen and the parent cgroup remains frozen too.
+ */
+static int test_cgfreezer_mkdir(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child = NULL;
+ int pid;
+
+ parent = cg_name(root, "cg_test_mkdir_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_mkdir_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_freeze_wait(parent, true))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ pid = cg_run_nowait(child, child_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(child, 1))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ if (cg_check_frozen(parent, true))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
* The test creates two nested cgroups, freezes the parent
* and removes the child. Then it checks that the parent cgroup
* remains frozen and it's possible to create a new child
@@ -648,7 +702,7 @@ static int proc_check_stopped(int pid)
char buf[PAGE_SIZE];
int len;
- len = proc_read_text(pid, "stat", buf, sizeof(buf));
+ len = proc_read_text(pid, 0, "stat", buf, sizeof(buf));
if (len == -1) {
debug("Can't get %d stat\n", pid);
return -1;
@@ -815,6 +869,7 @@ struct cgfreezer_test {
T(test_cgfreezer_simple),
T(test_cgfreezer_tree),
T(test_cgfreezer_forkbomb),
+ T(test_cgfreezer_mkdir),
T(test_cgfreezer_rmdir),
T(test_cgfreezer_migrate),
T(test_cgfreezer_ptrace),
diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
new file mode 100755
index 000000000000..15d9d5896394
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_stress.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./with_stress.sh -s subsys -s fork ./test_core
diff --git a/tools/testing/selftests/cgroup/with_stress.sh b/tools/testing/selftests/cgroup/with_stress.sh
new file mode 100755
index 000000000000..e28c35008f5b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/with_stress.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+stress_fork()
+{
+ while true ; do
+ /usr/bin/true
+ sleep 0.01
+ done
+}
+
+stress_subsys()
+{
+ local verb=+
+ while true ; do
+ echo $verb$subsys_ctrl >$sysfs/cgroup.subtree_control
+ [ $verb = "+" ] && verb=- || verb=+
+ # incommensurable period with other stresses
+ sleep 0.011
+ done
+}
+
+init_and_check()
+{
+ sysfs=`mount -t cgroup2 | head -1 | awk '{ print $3 }'`
+ if [ ! -d "$sysfs" ]; then
+ echo "Skipping: cgroup2 is not mounted" >&2
+ exit $ksft_skip
+ fi
+
+ if ! echo +$subsys_ctrl >$sysfs/cgroup.subtree_control ; then
+ echo "Skipping: cannot enable $subsys_ctrl in $sysfs" >&2
+ exit $ksft_skip
+ fi
+
+ if ! echo -$subsys_ctrl >$sysfs/cgroup.subtree_control ; then
+ echo "Skipping: cannot disable $subsys_ctrl in $sysfs" >&2
+ exit $ksft_skip
+ fi
+}
+
+declare -a stresses
+declare -a stress_pids
+duration=5
+rc=0
+subsys_ctrl=cpuset
+sysfs=
+
+while getopts c:d:hs: opt; do
+ case $opt in
+ c)
+ subsys_ctrl=$OPTARG
+ ;;
+ d)
+ duration=$OPTARG
+ ;;
+ h)
+ echo "Usage $0 [ -s stress ] ... [ -d duration ] [-c controller] cmd args .."
+ echo -e "\t default duration $duration seconds"
+ echo -e "\t default controller $subsys_ctrl"
+ exit
+ ;;
+ s)
+ func=stress_$OPTARG
+ if [ "x$(type -t $func)" != "xfunction" ] ; then
+ echo "Unknown stress $OPTARG"
+ exit 1
+ fi
+ stresses+=($func)
+ ;;
+ esac
+done
+shift $((OPTIND - 1))
+
+init_and_check
+
+for s in ${stresses[*]} ; do
+ $s &
+ stress_pids+=($!)
+done
+
+
+time=0
+start=$(date +%s)
+
+while [ $time -lt $duration ] ; do
+ $*
+ rc=$?
+ [ $rc -eq 0 ] || break
+ time=$(($(date +%s) - $start))
+done
+
+for pid in ${stress_pids[*]} ; do
+ kill -SIGTERM $pid
+ wait $pid
+done
+
+exit $rc
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
new file mode 100644
index 000000000000..0dc4f32c6cb8
--- /dev/null
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -0,0 +1,3 @@
+clone3
+clone3_clear_sighand
+clone3_set_tid
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
new file mode 100644
index 000000000000..cf976c732906
--- /dev/null
+++ b/tools/testing/selftests/clone3/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid
+
+include ../lib.mk
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
new file mode 100644
index 000000000000..f14c269a5a18
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Based on Christian Brauner's clone3() example */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+/*
+ * Different sizes of struct clone_args
+ */
+#ifndef CLONE3_ARGS_SIZE_V0
+#define CLONE3_ARGS_SIZE_V0 64
+#endif
+
+enum test_mode {
+ CLONE3_ARGS_NO_TEST,
+ CLONE3_ARGS_ALL_0,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+};
+
+static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+{
+ struct clone_args args = {
+ .flags = flags,
+ .exit_signal = SIGCHLD,
+ };
+
+ struct clone_args_extended {
+ struct clone_args args;
+ __aligned_u64 excess_space[2];
+ } args_ext;
+
+ pid_t pid = -1;
+ int status;
+
+ memset(&args_ext, 0, sizeof(args_ext));
+ if (size > sizeof(struct clone_args))
+ args_ext.excess_space[1] = 1;
+
+ if (size == 0)
+ size = sizeof(struct clone_args);
+
+ switch (test_mode) {
+ case CLONE3_ARGS_ALL_0:
+ args.flags = 0;
+ args.exit_signal = 0;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG:
+ args.exit_signal = 0xbadc0ded00000000ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG:
+ args.exit_signal = 0x0000000080000000ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG:
+ args.exit_signal = 0x0000000000000100ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG:
+ args.exit_signal = 0x00000000000000f0ULL;
+ break;
+ }
+
+ memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+
+ pid = sys_clone3((struct clone_args *)&args_ext, size);
+ if (pid < 0) {
+ ksft_print_msg("%s - Failed to create new process\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ ksft_print_msg("I am the child, my PID is %d\n", getpid());
+ _exit(EXIT_SUCCESS);
+ }
+
+ ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
+ getpid(), pid);
+
+ if (waitpid(-1, &status, __WALL) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ return -errno;
+ }
+ if (WEXITSTATUS(status))
+ return WEXITSTATUS(status);
+
+ return 0;
+}
+
+static void test_clone3(uint64_t flags, size_t size, int expected,
+ enum test_mode test_mode)
+{
+ int ret;
+
+ ksft_print_msg(
+ "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n",
+ getpid(), flags, size);
+ ret = call_clone3(flags, size, test_mode);
+ ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
+ getpid(), ret, expected);
+ if (ret != expected)
+ ksft_test_result_fail(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, expected);
+ else
+ ksft_test_result_pass(
+ "[%d] Result (%d) matches expectation (%d)\n",
+ getpid(), ret, expected);
+}
+
+int main(int argc, char *argv[])
+{
+ pid_t pid;
+
+ uid_t uid = getuid();
+
+ test_clone3_supported();
+ ksft_print_header();
+ ksft_set_plan(17);
+
+ /* Just a simple clone3() should return 0.*/
+ test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() in a new PID NS.*/
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
+ test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
+ test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with sizeof(struct clone_args) + 8 */
+ test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with exit_signal having highest 32 bits non-zero */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+
+ /* Do a clone3() with negative 32-bit exit_signal */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+
+ /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+
+ /* Do a clone3() with NSIG < exit_signal < CSIG */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+
+ test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+
+ test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+ CLONE3_ARGS_ALL_0);
+
+ test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+ CLONE3_ARGS_ALL_0);
+
+ /* Do a clone3() with > page size */
+ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+ CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
+ test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+ CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+ CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with > page size in a new PID NS */
+ test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
+ CLONE3_ARGS_NO_TEST);
+
+ return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
new file mode 100644
index 000000000000..9e1af8aa7698
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+#ifndef CLONE_CLEAR_SIGHAND
+#define CLONE_CLEAR_SIGHAND 0x100000000ULL
+#endif
+
+static void nop_handler(int signo)
+{
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void test_clone3_clear_sighand(void)
+{
+ int ret;
+ pid_t pid;
+ struct clone_args args = {};
+ struct sigaction act;
+
+ /*
+ * Check that CLONE_CLEAR_SIGHAND and CLONE_SIGHAND are mutually
+ * exclusive.
+ */
+ args.flags |= CLONE_CLEAR_SIGHAND | CLONE_SIGHAND;
+ args.exit_signal = SIGCHLD;
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid > 0)
+ ksft_exit_fail_msg(
+ "clone3(CLONE_CLEAR_SIGHAND | CLONE_SIGHAND) succeeded\n");
+
+ act.sa_handler = nop_handler;
+ ret = sigemptyset(&act.sa_mask);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s - sigemptyset() failed\n",
+ strerror(errno));
+
+ act.sa_flags = 0;
+
+ /* Register signal handler for SIGUSR1 */
+ ret = sigaction(SIGUSR1, &act, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s - sigaction(SIGUSR1, &act, NULL) failed\n",
+ strerror(errno));
+
+ /* Register signal handler for SIGUSR2 */
+ ret = sigaction(SIGUSR2, &act, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s - sigaction(SIGUSR2, &act, NULL) failed\n",
+ strerror(errno));
+
+ /* Check that CLONE_CLEAR_SIGHAND works. */
+ args.flags = CLONE_CLEAR_SIGHAND;
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0)
+ ksft_exit_fail_msg("%s - clone3(CLONE_CLEAR_SIGHAND) failed\n",
+ strerror(errno));
+
+ if (pid == 0) {
+ ret = sigaction(SIGUSR1, NULL, &act);
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ if (act.sa_handler != SIG_DFL)
+ exit(EXIT_FAILURE);
+
+ ret = sigaction(SIGUSR2, NULL, &act);
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ if (act.sa_handler != SIG_DFL)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret)
+ ksft_exit_fail_msg(
+ "Failed to clear signal handler for child process\n");
+
+ ksft_test_result_pass("Cleared signal handlers for child process\n");
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ test_clone3_supported();
+
+ ksft_set_plan(1);
+
+ test_clone3_clear_sighand();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
new file mode 100644
index 000000000000..a3f2c8ad8bcc
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CLONE3_SELFTESTS_H
+#define _CLONE3_SELFTESTS_H
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <linux/types.h>
+
+#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+
+#ifndef __NR_clone3
+#define __NR_clone3 -1
+struct clone_args {
+ __aligned_u64 flags;
+ __aligned_u64 pidfd;
+ __aligned_u64 child_tid;
+ __aligned_u64 parent_tid;
+ __aligned_u64 exit_signal;
+ __aligned_u64 stack;
+ __aligned_u64 stack_size;
+ __aligned_u64 tls;
+ __aligned_u64 set_tid;
+ __aligned_u64 set_tid_size;
+};
+#endif
+
+static pid_t sys_clone3(struct clone_args *args, size_t size)
+{
+ fflush(stdout);
+ fflush(stderr);
+ return syscall(__NR_clone3, args, size);
+}
+
+static inline void test_clone3_supported(void)
+{
+ pid_t pid;
+ struct clone_args args = {};
+
+ if (__NR_clone3 < 0)
+ ksft_exit_skip("clone3() syscall is not supported\n");
+
+ /* Set to something that will always cause EINVAL. */
+ args.exit_signal = -1;
+ pid = sys_clone3(&args, sizeof(args));
+ if (!pid)
+ exit(EXIT_SUCCESS);
+
+ if (pid > 0) {
+ wait(NULL);
+ ksft_exit_fail_msg(
+ "Managed to create child process with invalid exit_signal\n");
+ }
+
+ if (errno == ENOSYS)
+ ksft_exit_skip("clone3() syscall is not supported\n");
+
+ ksft_print_msg("clone3() syscall supported\n");
+}
+
+#endif /* _CLONE3_SELFTESTS_H */
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
new file mode 100644
index 000000000000..25beb22f35b5
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+#ifndef MAX_PID_NS_LEVEL
+#define MAX_PID_NS_LEVEL 32
+#endif
+
+static int pipe_1[2];
+static int pipe_2[2];
+
+static void child_exit(int ret)
+{
+ fflush(stdout);
+ fflush(stderr);
+ _exit(ret);
+}
+
+static int call_clone3_set_tid(pid_t *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected_pid,
+ bool wait_for_it)
+{
+ int status;
+ pid_t pid = -1;
+
+ struct clone_args args = {
+ .flags = flags,
+ .exit_signal = SIGCHLD,
+ .set_tid = ptr_to_u64(set_tid),
+ .set_tid_size = set_tid_size,
+ };
+
+ pid = sys_clone3(&args, sizeof(struct clone_args));
+ if (pid < 0) {
+ ksft_print_msg("%s - Failed to create new process\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ int ret;
+ char tmp = 0;
+ int exit_code = EXIT_SUCCESS;
+
+ ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
+ getpid(), set_tid[0]);
+ if (wait_for_it) {
+ ksft_print_msg("[%d] Child is ready and waiting\n",
+ getpid());
+
+ /* Signal the parent that the child is ready */
+ close(pipe_1[0]);
+ ret = write(pipe_1[1], &tmp, 1);
+ if (ret != 1) {
+ ksft_print_msg(
+ "Writing to pipe returned %d", ret);
+ exit_code = EXIT_FAILURE;
+ }
+ close(pipe_1[1]);
+ close(pipe_2[1]);
+ ret = read(pipe_2[0], &tmp, 1);
+ if (ret != 1) {
+ ksft_print_msg(
+ "Reading from pipe returned %d", ret);
+ exit_code = EXIT_FAILURE;
+ }
+ close(pipe_2[0]);
+ }
+
+ if (set_tid[0] != getpid())
+ child_exit(EXIT_FAILURE);
+ child_exit(exit_code);
+ }
+
+ if (expected_pid == 0 || expected_pid == pid) {
+ ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
+ getpid(), pid);
+ } else {
+ ksft_print_msg(
+ "Expected child pid %d does not match actual pid %d\n",
+ expected_pid, pid);
+ return -1;
+ }
+
+ if (waitpid(pid, &status, 0) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ return -errno;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void test_clone3_set_tid(pid_t *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected,
+ int expected_pid,
+ bool wait_for_it)
+{
+ int ret;
+
+ ksft_print_msg(
+ "[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
+ getpid(), set_tid[0], flags);
+ ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
+ wait_for_it);
+ ksft_print_msg(
+ "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ getpid(), set_tid[0], ret, expected);
+ if (ret != expected)
+ ksft_test_result_fail(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, expected);
+ else
+ ksft_test_result_pass(
+ "[%d] Result (%d) matches expectation (%d)\n",
+ getpid(), ret, expected);
+}
+int main(int argc, char *argv[])
+{
+ FILE *f;
+ char buf;
+ char *line;
+ int status;
+ int ret = -1;
+ size_t len = 0;
+ int pid_max = 0;
+ uid_t uid = getuid();
+ char proc_path[100] = {0};
+ pid_t pid, ns1, ns2, ns3, ns_pid;
+ pid_t set_tid[MAX_PID_NS_LEVEL * 2];
+
+ ksft_print_header();
+ test_clone3_supported();
+ ksft_set_plan(29);
+
+ if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
+ ksft_exit_fail_msg("pipe() failed\n");
+
+ f = fopen("/proc/sys/kernel/pid_max", "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open /proc/sys/kernel/pid_max\n",
+ strerror(errno));
+ fscanf(f, "%d", &pid_max);
+ fclose(f);
+ ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
+
+ /* Try invalid settings */
+ memset(&set_tid, 0, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0xff, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0, sizeof(set_tid));
+ /* Try with an invalid PID */
+ set_tid[0] = 0;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ set_tid[0] = -1;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ /* Claim that the set_tid array actually contains 2 elements. */
+ test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* Try with a valid PID (1) this should return -EEXIST. */
+ set_tid[0] = 1;
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* pid_max should fail everywhere */
+ set_tid[0] = pid_max;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ if (uid != 0) {
+ /*
+ * All remaining tests require root. Tell the framework
+ * that all those tests are skipped as non-root.
+ */
+ ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
+ goto out;
+ }
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ ksft_print_msg("Child has PID %d\n", getpid());
+ child_exit(EXIT_SUCCESS);
+ }
+ if (waitpid(pid, &status, 0) < 0)
+ ksft_exit_fail_msg("Waiting for child %d failed", pid);
+
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ /*
+ * Creating a process with PID 1 in the newly created most nested
+ * PID namespace and PID 'pid' in the parent PID namespace. This
+ * needs to work.
+ */
+ set_tid[0] = 1;
+ set_tid[1] = pid;
+ test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+
+ ksft_print_msg("unshare PID namespace\n");
+ if (unshare(CLONE_NEWPID) == -1)
+ ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n",
+ strerror(errno));
+
+ set_tid[0] = pid;
+
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ /* Let's create a PID 1 */
+ ns_pid = fork();
+ if (ns_pid == 0) {
+ /*
+ * This and the next test cases check that all pid-s are
+ * released on error paths.
+ */
+ set_tid[0] = 43;
+ set_tid[1] = -1;
+ test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+
+ set_tid[0] = 43;
+ set_tid[1] = pid;
+ test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
+
+ ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
+ set_tid[0] = 2;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+
+ set_tid[0] = 1;
+ set_tid[1] = -1;
+ set_tid[2] = pid;
+ /* This should fail as there is invalid PID at level '1'. */
+ test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ set_tid[0] = 1;
+ set_tid[1] = 42;
+ set_tid[2] = pid;
+ /*
+ * This should fail as there are not enough active PID
+ * namespaces. Again assuming this is running in the host's
+ * PID namespace. Not yet nested.
+ */
+ test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ /*
+ * This should work and from the parent we should see
+ * something like 'NSpid: pid 42 1'.
+ */
+ test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
+
+ child_exit(ksft_cnt.ksft_fail);
+ }
+
+ close(pipe_1[1]);
+ close(pipe_2[0]);
+ while (read(pipe_1[0], &buf, 1) > 0) {
+ ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
+ break;
+ }
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid);
+ f = fopen(proc_path, "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open %s\n",
+ strerror(errno), proc_path);
+
+ while (getline(&line, &len, f) != -1) {
+ if (strstr(line, "NSpid")) {
+ int i;
+
+ /* Verify that all generated PIDs are as expected. */
+ i = sscanf(line, "NSpid:\t%d\t%d\t%d",
+ &ns3, &ns2, &ns1);
+ if (i != 3) {
+ ksft_print_msg(
+ "Unexpected 'NSPid:' entry: %s",
+ line);
+ ns1 = ns2 = ns3 = 0;
+ }
+ break;
+ }
+ }
+ fclose(f);
+ free(line);
+ close(pipe_2[0]);
+
+ /* Tell the clone3()'d child to finish. */
+ write(pipe_2[1], &buf, 1);
+ close(pipe_2[1]);
+
+ if (waitpid(ns_pid, &status, 0) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ ret = -errno;
+ goto out;
+ }
+
+ if (!WIFEXITED(status))
+ ksft_test_result_fail("Child error\n");
+
+ ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
+ ksft_cnt.ksft_fail = WEXITSTATUS(status);
+
+ if (ns3 == pid && ns2 == 42 && ns1 == 1)
+ ksft_test_result_pass(
+ "PIDs in all namespaces as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+ else
+ ksft_test_result_fail(
+ "PIDs in all namespaces not as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+out:
+ ret = 0;
+
+ return !ret ? ksft_exit_pass() : ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile
new file mode 100644
index 000000000000..607c2acd2082
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include
+
+TEST_GEN_PROGS = dmabuf-heap
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
new file mode 100644
index 000000000000..cd5e1f602ac9
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include <linux/dma-buf.h>
+#include <drm/drm.h>
+
+#include "../../../../include/uapi/linux/dma-heap.h"
+
+#define DEVPATH "/dev/dma_heap"
+
+static int check_vgem(int fd)
+{
+ drm_version_t version = { 0 };
+ char name[5];
+ int ret;
+
+ version.name_len = 4;
+ version.name = name;
+
+ ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+ if (ret)
+ return 0;
+
+ return !strcmp(name, "vgem");
+}
+
+static int open_vgem(void)
+{
+ int i, fd;
+ const char *drmstr = "/dev/dri/card";
+
+ fd = -1;
+ for (i = 0; i < 16; i++) {
+ char name[80];
+
+ snprintf(name, 80, "%s%u", drmstr, i);
+
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ continue;
+
+ if (!check_vgem(fd)) {
+ close(fd);
+ fd = -1;
+ continue;
+ } else {
+ break;
+ }
+ }
+ return fd;
+}
+
+static int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+ struct drm_prime_handle import_handle = {
+ .fd = dma_buf_fd,
+ .flags = 0,
+ .handle = 0,
+ };
+ int ret;
+
+ ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+ if (ret == 0)
+ *handle = import_handle.handle;
+ return ret;
+}
+
+static void close_handle(int vgem_fd, uint32_t handle)
+{
+ struct drm_gem_close close = {
+ .handle = handle,
+ };
+
+ ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+static int dmabuf_heap_open(char *name)
+{
+ int ret, fd;
+ char buf[256];
+
+ ret = snprintf(buf, 256, "%s/%s", DEVPATH, name);
+ if (ret < 0) {
+ printf("snprintf failed!\n");
+ return ret;
+ }
+
+ fd = open(buf, O_RDWR);
+ if (fd < 0)
+ printf("open %s failed!\n", buf);
+ return fd;
+}
+
+static int dmabuf_heap_alloc_fdflags(int fd, size_t len, unsigned int fd_flags,
+ unsigned int heap_flags, int *dmabuf_fd)
+{
+ struct dma_heap_allocation_data data = {
+ .len = len,
+ .fd = 0,
+ .fd_flags = fd_flags,
+ .heap_flags = heap_flags,
+ };
+ int ret;
+
+ if (!dmabuf_fd)
+ return -EINVAL;
+
+ ret = ioctl(fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ if (ret < 0)
+ return ret;
+ *dmabuf_fd = (int)data.fd;
+ return ret;
+}
+
+static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags,
+ int *dmabuf_fd)
+{
+ return dmabuf_heap_alloc_fdflags(fd, len, O_RDWR | O_CLOEXEC, flags,
+ dmabuf_fd);
+}
+
+static void dmabuf_sync(int fd, int start_stop)
+{
+ struct dma_buf_sync sync = {
+ .flags = start_stop | DMA_BUF_SYNC_RW,
+ };
+ int ret;
+
+ ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync failed %d\n", errno);
+}
+
+#define ONE_MEG (1024 * 1024)
+
+static int test_alloc_and_import(char *heap_name)
+{
+ int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1;
+ uint32_t handle = 0;
+ void *p = NULL;
+ int ret;
+
+ printf("Testing heap: %s\n", heap_name);
+
+ heap_fd = dmabuf_heap_open(heap_name);
+ if (heap_fd < 0)
+ return -1;
+
+ printf("Allocating 1 MEG\n");
+ ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
+ if (ret) {
+ printf("Allocation Failed!\n");
+ ret = -1;
+ goto out;
+ }
+ /* mmap and write a simple pattern */
+ p = mmap(NULL,
+ ONE_MEG,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ dmabuf_fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ ret = -1;
+ goto out;
+ }
+ printf("mmap passed\n");
+
+ dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
+ memset(p, 1, ONE_MEG / 2);
+ memset((char *)p + ONE_MEG / 2, 0, ONE_MEG / 2);
+ dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
+
+ importer_fd = open_vgem();
+ if (importer_fd < 0) {
+ ret = importer_fd;
+ printf("Failed to open vgem\n");
+ goto out;
+ }
+
+ ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ if (ret < 0) {
+ printf("Failed to import buffer\n");
+ goto out;
+ }
+ printf("import passed\n");
+
+ dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
+ memset(p, 0xff, ONE_MEG);
+ dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
+ printf("syncs passed\n");
+
+ close_handle(importer_fd, handle);
+ ret = 0;
+
+out:
+ if (p)
+ munmap(p, ONE_MEG);
+ if (importer_fd >= 0)
+ close(importer_fd);
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ if (heap_fd >= 0)
+ close(heap_fd);
+
+ return ret;
+}
+
+/* Test the ioctl version compatibility w/ a smaller structure then expected */
+static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags,
+ int *dmabuf_fd)
+{
+ int ret;
+ unsigned int older_alloc_ioctl;
+ struct dma_heap_allocation_data_smaller {
+ __u64 len;
+ __u32 fd;
+ __u32 fd_flags;
+ } data = {
+ .len = len,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ };
+
+ older_alloc_ioctl = _IOWR(DMA_HEAP_IOC_MAGIC, 0x0,
+ struct dma_heap_allocation_data_smaller);
+ if (!dmabuf_fd)
+ return -EINVAL;
+
+ ret = ioctl(fd, older_alloc_ioctl, &data);
+ if (ret < 0)
+ return ret;
+ *dmabuf_fd = (int)data.fd;
+ return ret;
+}
+
+/* Test the ioctl version compatibility w/ a larger structure then expected */
+static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags,
+ int *dmabuf_fd)
+{
+ int ret;
+ unsigned int newer_alloc_ioctl;
+ struct dma_heap_allocation_data_bigger {
+ __u64 len;
+ __u32 fd;
+ __u32 fd_flags;
+ __u64 heap_flags;
+ __u64 garbage1;
+ __u64 garbage2;
+ __u64 garbage3;
+ } data = {
+ .len = len,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = flags,
+ .garbage1 = 0xffffffff,
+ .garbage2 = 0x88888888,
+ .garbage3 = 0x11111111,
+ };
+
+ newer_alloc_ioctl = _IOWR(DMA_HEAP_IOC_MAGIC, 0x0,
+ struct dma_heap_allocation_data_bigger);
+ if (!dmabuf_fd)
+ return -EINVAL;
+
+ ret = ioctl(fd, newer_alloc_ioctl, &data);
+ if (ret < 0)
+ return ret;
+
+ *dmabuf_fd = (int)data.fd;
+ return ret;
+}
+
+static int test_alloc_compat(char *heap_name)
+{
+ int heap_fd = -1, dmabuf_fd = -1;
+ int ret;
+
+ heap_fd = dmabuf_heap_open(heap_name);
+ if (heap_fd < 0)
+ return -1;
+
+ printf("Testing (theoretical)older alloc compat\n");
+ ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
+ if (ret) {
+ printf("Older compat allocation failed!\n");
+ ret = -1;
+ goto out;
+ }
+ close(dmabuf_fd);
+
+ printf("Testing (theoretical)newer alloc compat\n");
+ ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
+ if (ret) {
+ printf("Newer compat allocation failed!\n");
+ ret = -1;
+ goto out;
+ }
+ printf("Ioctl compatibility tests passed\n");
+out:
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ if (heap_fd >= 0)
+ close(heap_fd);
+
+ return ret;
+}
+
+static int test_alloc_errors(char *heap_name)
+{
+ int heap_fd = -1, dmabuf_fd = -1;
+ int ret;
+
+ heap_fd = dmabuf_heap_open(heap_name);
+ if (heap_fd < 0)
+ return -1;
+
+ printf("Testing expected error cases\n");
+ ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
+ if (!ret) {
+ printf("Did not see expected error (invalid fd)!\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
+ if (!ret) {
+ printf("Did not see expected error (invalid heap flags)!\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
+ ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
+ if (!ret) {
+ printf("Did not see expected error (invalid fd flags)!\n");
+ ret = -1;
+ goto out;
+ }
+
+ printf("Expected error checking passed\n");
+out:
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ if (heap_fd >= 0)
+ close(heap_fd);
+
+ return ret;
+}
+
+int main(void)
+{
+ DIR *d;
+ struct dirent *dir;
+ int ret = -1;
+
+ d = opendir(DEVPATH);
+ if (!d) {
+ printf("No %s directory?\n", DEVPATH);
+ return -1;
+ }
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!strncmp(dir->d_name, ".", 2))
+ continue;
+ if (!strncmp(dir->d_name, "..", 3))
+ continue;
+
+ ret = test_alloc_and_import(dir->d_name);
+ if (ret)
+ break;
+
+ ret = test_alloc_compat(dir->d_name);
+ if (ret)
+ break;
+
+ ret = test_alloc_errors(dir->d_name);
+ if (ret)
+ break;
+ }
+ closedir(d);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
new file mode 100755
index 000000000000..89b55e946eed
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test generic devlink-trap functionality over mlxsw. These tests are not
+# specific to a single trap, but do not check the devlink-trap common
+# infrastructure either.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ dev_del_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+dev_del_test()
+{
+ local trap_name="source_mac_is_multicast"
+ local smac=01:02:03:04:05:06
+ local num_iter=5
+ local mz_pid
+ local i
+
+ $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -q &
+ mz_pid=$!
+
+ # The purpose of this test is to make sure we correctly dismantle a
+ # port while packets are trapped from it. This is done by reloading the
+ # the driver while the 'ingress_smac_mc_drop' trap is triggered.
+ RET=0
+
+ for i in $(seq 1 $num_iter); do
+ log_info "Iteration $i / $num_iter"
+
+ devlink_trap_action_set $trap_name "trap"
+ sleep 1
+
+ devlink_reload
+ # Allow netdevices to be re-created following the reload
+ sleep 20
+
+ cleanup
+ setup_prepare
+ setup_wait
+ done
+
+ log_test "Device delete"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
new file mode 100755
index 000000000000..58cdbfb608e9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -0,0 +1,437 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L2 drops functionality over mlxsw. Each registered L2 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ source_mac_is_multicast_test
+ vlan_tag_mismatch_test
+ ingress_vlan_filter_test
+ ingress_stp_filter_test
+ port_list_is_empty_test
+ port_loopback_filter_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+source_mac_is_multicast_test()
+{
+ local trap_name="source_mac_is_multicast"
+ local smac=01:02:03:04:05:06
+ local group_name="l2_drops"
+ local mz_pid
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower src_mac $smac action drop
+
+ $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -d 1msec -q &
+ mz_pid=$!
+
+ RET=0
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ log_test "Source MAC is multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+}
+
+__vlan_tag_mismatch_test()
+{
+ local trap_name="vlan_tag_mismatch"
+ local dmac=de:ad:be:ef:13:37
+ local group_name="l2_drops"
+ local opt=$1; shift
+ local mz_pid
+
+ # Remove PVID flag. This should prevent untagged and prio-tagged
+ # packets from entering the bridge.
+ bridge vlan add vid 1 dev $swp1 untagged master
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Add PVID and make sure packets are no longer dropped.
+ bridge vlan add vid 1 dev $swp1 pvid untagged master
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+}
+
+vlan_tag_mismatch_untagged_test()
+{
+ RET=0
+
+ __vlan_tag_mismatch_test
+
+ log_test "VLAN tag mismatch - untagged packets"
+}
+
+vlan_tag_mismatch_vid_0_test()
+{
+ RET=0
+
+ __vlan_tag_mismatch_test "-Q 0"
+
+ log_test "VLAN tag mismatch - prio-tagged packets"
+}
+
+vlan_tag_mismatch_test()
+{
+ vlan_tag_mismatch_untagged_test
+ vlan_tag_mismatch_vid_0_test
+}
+
+ingress_vlan_filter_test()
+{
+ local trap_name="ingress_vlan_filter"
+ local dmac=de:ad:be:ef:13:37
+ local group_name="l2_drops"
+ local mz_pid
+ local vid=10
+
+ bridge vlan add vid $vid dev $swp2 master
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Add the VLAN on the bridge port and make sure packets are no longer
+ # dropped.
+ bridge vlan add vid $vid dev $swp1 master
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Ingress VLAN filter"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+
+ bridge vlan del vid $vid dev $swp1 master
+ bridge vlan del vid $vid dev $swp2 master
+}
+
+__ingress_stp_filter_test()
+{
+ local trap_name="ingress_spanning_tree_filter"
+ local dmac=de:ad:be:ef:13:37
+ local group_name="l2_drops"
+ local state=$1; shift
+ local mz_pid
+ local vid=20
+
+ bridge vlan add vid $vid dev $swp2 master
+ bridge vlan add vid $vid dev $swp1 master
+ ip link set dev $swp1 type bridge_slave state $state
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Change STP state to forwarding and make sure packets are no longer
+ # dropped.
+ ip link set dev $swp1 type bridge_slave state 3
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+
+ bridge vlan del vid $vid dev $swp1 master
+ bridge vlan del vid $vid dev $swp2 master
+}
+
+ingress_stp_filter_listening_test()
+{
+ local state=$1; shift
+
+ RET=0
+
+ __ingress_stp_filter_test $state
+
+ log_test "Ingress STP filter - listening state"
+}
+
+ingress_stp_filter_learning_test()
+{
+ local state=$1; shift
+
+ RET=0
+
+ __ingress_stp_filter_test $state
+
+ log_test "Ingress STP filter - learning state"
+}
+
+ingress_stp_filter_test()
+{
+ ingress_stp_filter_listening_test 1
+ ingress_stp_filter_learning_test 2
+}
+
+port_list_is_empty_uc_test()
+{
+ local trap_name="port_list_is_empty"
+ local dmac=de:ad:be:ef:13:37
+ local group_name="l2_drops"
+ local mz_pid
+
+ # Disable unicast flooding on both ports, so that packets cannot egress
+ # any port.
+ ip link set dev $swp1 type bridge_slave flood off
+ ip link set dev $swp2 type bridge_slave flood off
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Allow packets to be flooded to one port.
+ ip link set dev $swp2 type bridge_slave flood on
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Port list is empty - unicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+
+ ip link set dev $swp1 type bridge_slave flood on
+}
+
+port_list_is_empty_mc_test()
+{
+ local trap_name="port_list_is_empty"
+ local dmac=01:00:5e:00:00:01
+ local group_name="l2_drops"
+ local dip=239.0.0.1
+ local mz_pid
+
+ # Disable multicast flooding on both ports, so that packets cannot
+ # egress any port. We also need to flush IP addresses from the bridge
+ # in order to prevent packets from being flooded to the router port.
+ ip link set dev $swp1 type bridge_slave mcast_flood off
+ ip link set dev $swp2 type bridge_slave mcast_flood off
+ ip address flush dev br0
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Allow packets to be flooded to one port.
+ ip link set dev $swp2 type bridge_slave mcast_flood on
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Port list is empty - multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+
+ ip link set dev $swp1 type bridge_slave mcast_flood on
+}
+
+port_list_is_empty_test()
+{
+ port_list_is_empty_uc_test
+ port_list_is_empty_mc_test
+}
+
+port_loopback_filter_uc_test()
+{
+ local trap_name="port_loopback_filter"
+ local dmac=de:ad:be:ef:13:37
+ local group_name="l2_drops"
+ local mz_pid
+
+ # Make sure packets can only egress the input port.
+ ip link set dev $swp2 type bridge_slave flood off
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_mac $dmac action drop
+
+ $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp2
+
+ # Allow packets to be flooded.
+ ip link set dev $swp2 type bridge_slave flood on
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle when packets should not be dropped"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_fail $? "Packets not forwarded when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Port loopback filter - unicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip
+}
+
+port_loopback_filter_test()
+{
+ port_loopback_filter_uc_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
new file mode 100755
index 000000000000..d88d8e47d11b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -0,0 +1,675 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 drops functionality over mlxsw. Each registered L3 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | 2001:db8:1::1/64 |
+# | | |
+# | | default via 192.0.2.2 |
+# | | default via 2001:db8:1::2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | 2001:db8:1::2/64 |
+# | |
+# | 2001:db8:2::2/64 |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | default via 2001:db8:2::2 |
+# | | |
+# | | 2001:db8:2::1/64 |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ non_ip_test
+ uc_dip_over_mc_dmac_test
+ dip_is_loopback_test
+ sip_is_mc_test
+ sip_is_loopback_test
+ ip_header_corrupted_test
+ ipv4_sip_is_limited_bc_test
+ ipv6_mc_dip_reserved_scope_test
+ ipv6_mc_dip_interface_local_scope_test
+ blackhole_route_test
+ irif_disabled_test
+ erif_disabled_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $h2_ipv4/24 $h2_ipv6/64
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+ ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 $h2_ipv4/24 $h2_ipv6/64
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ tc qdisc add dev $rp2 clsact
+
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+ __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+ tc qdisc del dev $rp2 clsact
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ rp1mac=$(mac_get $rp1)
+
+ h1_ipv4=192.0.2.1
+ h2_ipv4=198.51.100.1
+ h1_ipv6=2001:db8:1::1
+ h2_ipv6=2001:db8:2::1
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_check()
+{
+ trap_name=$1; shift
+
+ devlink_trap_action_set $trap_name "trap"
+ ping_do $h1 $h2_ipv4
+ check_err $? "Packets that should not be trapped were trapped"
+ devlink_trap_action_set $trap_name "drop"
+}
+
+non_ip_test()
+{
+ local trap_name="non_ip"
+ local group_name="l3_drops"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip $h2_ipv4 action drop
+
+ # Generate non-IP packets to the router
+ $MZ $h1 -c 0 -p 100 -d 1msec -B $h2_ipv4 -q "$rp1mac $h1mac \
+ 00:00 de:ad:be:ef" &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "Non IP"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+}
+
+__uc_dip_over_mc_dmac_test()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="uc_dip_over_mc_dmac"
+ local group_name="l3_drops"
+ local dmac=01:02:03:04:05:06
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower ip_proto udp src_port 54321 dst_port 12345 action drop
+
+ # Generate IP packets with a unicast IP and a multicast destination MAC
+ $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $dmac \
+ -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "Unicast destination IP over multicast destination MAC: $desc"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+}
+
+uc_dip_over_mc_dmac_test()
+{
+ __uc_dip_over_mc_dmac_test "IPv4" "ip" $h2_ipv4
+ __uc_dip_over_mc_dmac_test "IPv6" "ipv6" $h2_ipv6 "-6"
+}
+
+__sip_is_loopback_test()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="sip_is_loopback_address"
+ local group_name="l3_drops"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower src_ip $sip action drop
+
+ # Generate packets with loopback source IP
+ $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+ -b $rp1mac -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "Source IP is loopback address: $desc"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+}
+
+sip_is_loopback_test()
+{
+ __sip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" $h2_ipv4
+ __sip_is_loopback_test "IPv6" "ipv6" "::1" $h2_ipv6 "-6"
+}
+
+__dip_is_loopback_test()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="dip_is_loopback_address"
+ local group_name="l3_drops"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower dst_ip $dip action drop
+
+ # Generate packets with loopback destination IP
+ $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+ -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "Destination IP is loopback address: $desc"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+}
+
+dip_is_loopback_test()
+{
+ __dip_is_loopback_test "IPv4" "ip" "127.0.0.0/8"
+ __dip_is_loopback_test "IPv6" "ipv6" "::1" "-6"
+}
+
+__sip_is_mc_test()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="sip_is_mc"
+ local group_name="l3_drops"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower src_ip $sip action drop
+
+ # Generate packets with multicast source IP
+ $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+ -b $rp1mac -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "Source IP is multicast: $desc"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+}
+
+sip_is_mc_test()
+{
+ __sip_is_mc_test "IPv4" "ip" "239.1.1.1" $h2_ipv4
+ __sip_is_mc_test "IPv6" "ipv6" "FF02::2" $h2_ipv6 "-6"
+}
+
+ipv4_sip_is_limited_bc_test()
+{
+ local trap_name="ipv4_sip_is_limited_bc"
+ local group_name="l3_drops"
+ local sip=255.255.255.255
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower src_ip $sip action drop
+
+ # Generate packets with limited broadcast source IP
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip -b $rp1mac \
+ -B $h2_ipv4 -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "IPv4 source IP is limited broadcast"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+}
+
+ipv4_payload_get()
+{
+ local ipver=$1; shift
+ local ihl=$1; shift
+ local checksum=$1; shift
+
+ p=$(:
+ )"08:00:"$( : ETH type
+ )"$ipver"$( : IP version
+ )"$ihl:"$( : IHL
+ )"00:"$( : IP TOS
+ )"00:F4:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"30:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"$checksum:"$( : IP header csum
+ )"$h1_ipv4:"$( : IP saddr
+ )"$h2_ipv4:"$( : IP daddr
+ )
+ echo $p
+}
+
+__ipv4_header_corrupted_test()
+{
+ local desc=$1; shift
+ local ipver=$1; shift
+ local ihl=$1; shift
+ local checksum=$1; shift
+ local trap_name="ip_header_corrupted"
+ local group_name="l3_drops"
+ local payload
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip $h2_ipv4 action drop
+
+ payload=$(ipv4_payload_get $ipver $ihl $checksum)
+
+ # Generate packets with corrupted IP header
+ $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "IP header corrupted: $desc: IPv4"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+}
+
+ipv6_payload_get()
+{
+ local ipver=$1; shift
+
+ p=$(:
+ )"86:DD:"$( : ETH type
+ )"$ipver"$( : IP version
+ )"0:0:"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:00:"$( : Payload length
+ )"01:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$h1_ipv6:"$( : IP saddr
+ )"$h2_ipv6:"$( : IP daddr
+ )
+ echo $p
+}
+
+__ipv6_header_corrupted_test()
+{
+ local desc=$1; shift
+ local ipver=$1; shift
+ local trap_name="ip_header_corrupted"
+ local group_name="l3_drops"
+ local payload
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip $h2_ipv4 action drop
+
+ payload=$(ipv6_payload_get $ipver)
+
+ # Generate packets with corrupted IP header
+ $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "IP header corrupted: $desc: IPv6"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+}
+
+ip_header_corrupted_test()
+{
+ # Each test uses one wrong value. The three values below are correct.
+ local ipv="4"
+ local ihl="5"
+ local checksum="00:F4"
+
+ __ipv4_header_corrupted_test "wrong IP version" 5 $ihl $checksum
+ __ipv4_header_corrupted_test "wrong IHL" $ipv 4 $checksum
+ __ipv4_header_corrupted_test "wrong checksum" $ipv $ihl "00:00"
+ __ipv6_header_corrupted_test "wrong IP version" 5
+}
+
+ipv6_mc_dip_reserved_scope_test()
+{
+ local trap_name="ipv6_mc_dip_reserved_scope"
+ local group_name="l3_drops"
+ local dip=FF00::
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+ flower dst_ip $dip action drop
+
+ # Generate packets with reserved scope destination IP
+ $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+ "33:33:00:00:00:00" -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "IPv6 multicast destination IP reserved scope"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+}
+
+ipv6_mc_dip_interface_local_scope_test()
+{
+ local trap_name="ipv6_mc_dip_interface_local_scope"
+ local group_name="l3_drops"
+ local dip=FF01::
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+ flower dst_ip $dip action drop
+
+ # Generate packets with interface local scope destination IP
+ $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+ "33:33:00:00:00:00" -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+
+ log_test "IPv6 multicast destination IP interface-local scope"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+}
+
+__blackhole_route_test()
+{
+ local flags=$1; shift
+ local subnet=$1; shift
+ local proto=$1; shift
+ local dip=$1; shift
+ local ip_proto=${1:-"icmp"}; shift
+ local trap_name="blackhole_route"
+ local group_name="l3_drops"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ ip -$flags route add blackhole $subnet
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower skip_hw dst_ip $dip ip_proto $ip_proto action drop
+
+ # Generate packets to the blackhole route
+ $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+ -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $rp2
+ log_test "Blackhole route: IPv$flags"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ ip -$flags route del blackhole $subnet
+}
+
+blackhole_route_test()
+{
+ __blackhole_route_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+ __blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6"
+}
+
+irif_disabled_test()
+{
+ local trap_name="irif_disabled"
+ local group_name="l3_drops"
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ devlink_trap_action_set $trap_name "trap"
+
+ # When RIF of a physical port ("Sub-port RIF") is destroyed, we first
+ # block the STP of the {Port, VLAN} so packets cannot get into the RIF.
+ # Using bridge enables us to see this trap because when bridge is
+ # destroyed, there is a small time window that packets can go into the
+ # RIF, while it is disabled.
+ ip link add dev br0 type bridge
+ ip link set dev $rp1 master br0
+ ip address flush dev $rp1
+ __addr_add_del br0 add 192.0.2.2/24
+ ip li set dev br0 up
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ # Generate packets to h2 through br0 RIF that will be removed later
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \
+ -B $h2_ipv4 -q &
+ mz_pid=$!
+
+ # Wait before removing br0 RIF to allow packets to go into the bridge.
+ sleep 1
+
+ # Flushing address will dismantle the RIF
+ ip address flush dev br0
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ check_err 1 "Trap stats idle when packets should be trapped"
+ fi
+
+ log_test "Ingress RIF disabled"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ ip link set dev $rp1 nomaster
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ ip link del dev br0 type bridge
+ devlink_trap_action_set $trap_name "drop"
+}
+
+erif_disabled_test()
+{
+ local trap_name="erif_disabled"
+ local group_name="l3_drops"
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+
+ devlink_trap_action_set $trap_name "trap"
+ ip link add dev br0 type bridge
+ ip add flush dev $rp1
+ ip link set dev $rp1 master br0
+ __addr_add_del br0 add 192.0.2.2/24
+ ip link set dev br0 up
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ rp2mac=$(mac_get $rp2)
+
+ # Generate packets that should go out through br0 RIF that will be
+ # removed later
+ $MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \
+ -B 192.0.2.1 -q &
+ mz_pid=$!
+
+ sleep 5
+ # In order to see this trap we need a route that points to disabled RIF.
+ # When ipv6 address is flushed, there is a delay and the routes are
+ # deleted before the RIF and we cannot get state that we have route
+ # to disabled RIF.
+ # Delete IPv6 address first and then check this trap with flushing IPv4.
+ ip -6 add flush dev br0
+ ip -4 add flush dev br0
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ check_err 1 "Trap stats idle when packets should be trapped"
+ fi
+
+ log_test "Egress RIF disabled"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ ip link set dev $rp1 nomaster
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ ip link del dev br0 type bridge
+ devlink_trap_action_set $trap_name "drop"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
new file mode 100755
index 000000000000..2bc6df42d597
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -0,0 +1,557 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | 2001:db8:1::1/64 |
+# | | |
+# | | default via 192.0.2.2 |
+# | | default via 2001:db8:1::2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | 2001:db8:1::2/64 |
+# | |
+# | 2001:db8:2::2/64 |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | default via 2001:db8:2::2 |
+# | | |
+# | | 2001:db8:2::1/64 |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ mtu_value_is_too_small_test
+ ttl_value_is_too_small_test
+ mc_reverse_path_forwarding_test
+ reject_route_test
+ unresolved_neigh_test
+ ipv4_lpm_miss_test
+ ipv6_lpm_miss_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+
+ tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+
+ ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+ ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ tc qdisc add dev $rp2 clsact
+
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+ __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+ tc qdisc del dev $rp2 clsact
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+
+ start_mcd
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+
+ kill_mcd
+}
+
+ping_check()
+{
+ ping_do $h1 198.51.100.1
+ check_err $? "Packets that should not be trapped were trapped"
+}
+
+trap_action_check()
+{
+ local trap_name=$1; shift
+ local expected_action=$1; shift
+
+ action=$(devlink_trap_action_get $trap_name)
+ if [ "$action" != $expected_action ]; then
+ check_err 1 "Trap $trap_name has wrong action: $action"
+ fi
+}
+
+mtu_value_is_too_small_test()
+{
+ local trap_name="mtu_value_is_too_small"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ # type - Destination Unreachable
+ # code - Fragmentation Needed and Don't Fragment was Set
+ tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+ flower skip_hw ip_proto icmp type 3 code 4 action pass
+
+ mtu_set $rp2 1300
+
+ # Generate IP packets bigger than router's MTU with don't fragment
+ # flag on.
+ $MZ $h1 -t udp "sp=54321,dp=12345,df" -p 1400 -c 0 -d 1msec -b $rp1mac \
+ -B 198.51.100.1 -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets_hitting "dev $h1 ingress" 101
+ check_err $? "Packets were not received to h1"
+
+ log_test "MTU value is too small"
+
+ mtu_restore $rp2
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__ttl_value_is_too_small_test()
+{
+ local ttl_val=$1; shift
+ local trap_name="ttl_value_is_too_small"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ # type - Time Exceeded
+ # code - Time to Live exceeded in Transit
+ tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+ flower skip_hw ip_proto icmp type 11 code 0 action pass
+
+ # Generate IP packets with small TTL
+ $MZ $h1 -t udp "ttl=$ttl_val,sp=54321,dp=12345" -c 0 -d 1msec \
+ -b $rp1mac -B 198.51.100.1 -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets_hitting "dev $h1 ingress" 101
+ check_err $? "Packets were not received to h1"
+
+ log_test "TTL value is too small: TTL=$ttl_val"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+ttl_value_is_too_small_test()
+{
+ __ttl_value_is_too_small_test 0
+ __ttl_value_is_too_small_test 1
+}
+
+start_mcd()
+{
+ SMCROUTEDIR="$(mktemp -d)"
+ for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ $SMCROUTEDIR/$table_name.conf
+ done
+
+ $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+ -P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+ pkill $MCD
+ rm -rf $SMCROUTEDIR
+}
+
+__mc_reverse_path_forwarding_test()
+{
+ local desc=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dst_mac=$1; shift
+ local proto=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="mc_reverse_path_forwarding"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower dst_ip $dst_ip ip_proto udp action drop
+
+ $MC_CLI -I $table_name add $rp1 $src_ip $dst_ip $rp2
+
+ # Generate packets to multicast address.
+ $MZ $h2 $flags -t udp "sp=54321,dp=12345" -c 0 -p 128 \
+ -a 00:11:22:33:44:55 -b $dst_mac \
+ -A $src_ip -B $dst_ip -q &
+
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets "dev $rp2 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "Multicast reverse path forwarding: $desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
+}
+
+mc_reverse_path_forwarding_test()
+{
+ __mc_reverse_path_forwarding_test "IPv4" "192.0.2.1" "225.1.2.3" \
+ "01:00:5e:01:02:03" "ip"
+ __mc_reverse_path_forwarding_test "IPv6" "2001:db8:1::1" "ff0e::3" \
+ "33:33:00:00:00:03" "ipv6" "-6"
+}
+
+__reject_route_test()
+{
+ local desc=$1; shift
+ local dst_ip=$1; shift
+ local proto=$1; shift
+ local ip_proto=$1; shift
+ local type=$1; shift
+ local code=$1; shift
+ local unreachable=$1; shift
+ local flags=${1:-""}; shift
+ local trap_name="reject_route"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ tc filter add dev $h1 ingress protocol $proto pref 1 handle 101 flower \
+ skip_hw ip_proto $ip_proto type $type code $code action pass
+
+ ip route add unreachable $unreachable
+
+ # Generate pacekts to h2. The destination IP is unreachable.
+ $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+ -B $dst_ip -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets_hitting "dev $h1 ingress" 101
+ check_err $? "ICMP packet was not received to h1"
+
+ log_test "Reject route: $desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ ip route del unreachable $unreachable
+ tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower
+}
+
+reject_route_test()
+{
+ # type - Destination Unreachable
+ # code - Host Unreachable
+ __reject_route_test "IPv4" 198.51.100.1 "ip" "icmp" 3 1 \
+ "198.51.100.0/26"
+ # type - Destination Unreachable
+ # code - No Route
+ __reject_route_test "IPv6" 2001:db8:2::1 "ipv6" "icmpv6" 1 0 \
+ "2001:db8:2::0/66" "-6"
+}
+
+__host_miss_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local trap_name="unresolved_neigh"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ ip neigh flush dev $rp2
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ # Generate packets to h2 (will incur a unresolved neighbor).
+ # The ping should pass and devlink counters should be increased.
+ ping_do $h1 $dip
+ check_err $? "ping failed: $desc"
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ log_test "Unresolved neigh: host miss: $desc"
+}
+
+__invalid_nexthop_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local extra_add=$1; shift
+ local subnet=$1; shift
+ local via_add=$1; shift
+ local trap_name="unresolved_neigh"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ ip address add $extra_add/$subnet dev $h2
+
+ # Check that correct route does not trigger unresolved_neigh
+ ip $flags route add $dip via $extra_add dev $rp2
+
+ # Generate packets in order to discover all neighbours.
+ # Without it, counters of unresolved_neigh will be increased
+ # during neighbours discovery and the check below will fail
+ # for a wrong reason
+ ping_do $h1 $dip
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -ne $t1_packets ]]; then
+ check_err 1 "Trap counter increased when it should not"
+ fi
+
+ ip $flags route del $dip via $extra_add dev $rp2
+
+ # Check that route to nexthop that does not exist trigger
+ # unresolved_neigh
+ ip $flags route add $dip via $via_add dev $h2
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ ip $flags route del $dip via $via_add dev $h2
+ ip address del $extra_add/$subnet dev $h2
+ log_test "Unresolved neigh: nexthop does not exist: $desc"
+}
+
+unresolved_neigh_test()
+{
+ __host_miss_test "IPv4" 198.51.100.1
+ __host_miss_test "IPv6" 2001:db8:2::1
+ __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
+ __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
+ 2001:db8:2::4
+}
+
+vrf_without_routes_create()
+{
+ # VRF creating makes the links to be down and then up again.
+ # By default, IPv6 address is not saved after link becomes down.
+ # Save IPv6 address using sysctl configuration.
+ sysctl_set net.ipv6.conf.$rp1.keep_addr_on_down 1
+ sysctl_set net.ipv6.conf.$rp2.keep_addr_on_down 1
+
+ ip link add dev vrf1 type vrf table 101
+ ip link set dev $rp1 master vrf1
+ ip link set dev $rp2 master vrf1
+ ip link set dev vrf1 up
+
+ # Wait for rp1 and rp2 to be up
+ setup_wait
+}
+
+vrf_without_routes_destroy()
+{
+ ip link set dev $rp1 nomaster
+ ip link set dev $rp2 nomaster
+ ip link del dev vrf1
+
+ sysctl_restore net.ipv6.conf.$rp2.keep_addr_on_down
+ sysctl_restore net.ipv6.conf.$rp1.keep_addr_on_down
+
+ # Wait for interfaces to be up
+ setup_wait
+}
+
+ipv4_lpm_miss_test()
+{
+ local trap_name="ipv4_lpm_miss"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ # Create a VRF without a default route
+ vrf_without_routes_create
+
+ # Generate packets through a VRF without a matching route.
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+ -B 203.0.113.1 -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ log_test "LPM miss: IPv4"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ vrf_without_routes_destroy
+}
+
+ipv6_lpm_miss_test()
+{
+ local trap_name="ipv6_lpm_miss"
+ local group_name="l3_drops"
+ local expected_action="trap"
+ local mz_pid
+
+ RET=0
+
+ ping_check $trap_name
+ trap_action_check $trap_name $expected_action
+
+ # Create a VRF without a default route
+ vrf_without_routes_create
+
+ # Generate packets through a VRF without a matching route.
+ $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+ -B 2001:db8::1 -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ log_test "LPM miss: IPv6"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ vrf_without_routes_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
new file mode 100755
index 000000000000..039629bb92a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -0,0 +1,265 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# +-------------------|-----+
+# |
+# +-------------------|-----+
+# | SW1 | |
+# | $swp1 + |
+# | 192.0.2.2/28 |
+# | |
+# | + g1a (gre) |
+# | loc=192.0.2.65 |
+# | rem=192.0.2.66 |
+# | tos=inherit |
+# | |
+# | + $rp1 |
+# | | 198.51.100.1/28 |
+# +--|----------------------+
+# |
+# +--|----------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 198.51.100.2/28 |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 198.51.100.2/28
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 198.51.100.2/28
+}
+
+switch_create()
+{
+ __addr_add_del $swp1 add 192.0.2.2/28
+ tc qdisc add dev $swp1 clsact
+ ip link set dev $swp1 up
+
+ tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit
+ __addr_add_del g1 add 192.0.2.65/32
+ ip link set dev g1 up
+
+ __addr_add_del $rp1 add 198.51.100.1/28
+ ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+ ip link set dev $rp1 down
+ __addr_add_del $rp1 del 198.51.100.1/28
+
+ ip link set dev g1 down
+ __addr_add_del g1 del 192.0.2.65/32
+ tunnel_destroy g1
+
+ ip link set dev $swp1 down
+ tc qdisc del dev $swp1 clsact
+ __addr_add_del $swp1 del 192.0.2.2/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+ecn_payload_get()
+{
+ p=$(:
+ )"0"$( : GRE flags
+ )"0:00:"$( : Reserved + version
+ )"08:00:"$( : ETH protocol type
+ )"4"$( : IP version
+ )"5:"$( : IHL
+ )"00:"$( : IP TOS
+ )"00:14:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"30:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"E7:E6:"$( : IP header csum
+ )"C0:00:01:01:"$( : IP saddr : 192.0.1.1
+ )"C0:00:02:01:"$( : IP daddr : 192.0.2.1
+ )
+ echo $p
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local group_name="tunnel_drops"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A 192.0.2.66 -B 192.0.2.65 -t ip \
+ len=48,tos=$outer_tos,proto=47,p=$payload -q &
+
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+ipip_payload_get()
+{
+ local flags=$1; shift
+ local key=$1; shift
+
+ p=$(:
+ )"$flags"$( : GRE flags
+ )"0:00:"$( : Reserved + version
+ )"08:00:"$( : ETH protocol type
+ )"$key"$( : Key
+ )"4"$( : IP version
+ )"5:"$( : IHL
+ )"00:"$( : IP TOS
+ )"00:14:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"30:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"E7:E6:"$( : IP header csum
+ )"C0:00:01:01:"$( : IP saddr : 192.0.1.1
+ )"C0:00:02:01:"$( : IP daddr : 192.0.2.1
+ )
+ echo $p
+}
+
+no_matching_tunnel_test()
+{
+ local trap_name="decap_error"
+ local group_name="tunnel_drops"
+ local desc=$1; shift
+ local sip=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ipip_payload_get "$@")
+
+ ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ # Correct source IP - the remote address
+ local sip=192.0.2.66
+
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ no_matching_tunnel_test "Decap error: Source IP check failed" \
+ 192.0.2.68 "0"
+ no_matching_tunnel_test \
+ "Decap error: Key exists but was not expected" $sip "2" ":E9:"
+
+ # Destroy the tunnel and create new one with key
+ __addr_add_del g1 del 192.0.2.65/32
+ tunnel_destroy g1
+
+ tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233
+ __addr_add_del g1 add 192.0.2.65/32
+
+ no_matching_tunnel_test \
+ "Decap error: Key does not exist but was expected" $sip "0"
+ no_matching_tunnel_test \
+ "Decap error: Packet has a wrong key field" $sip "2" "E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
new file mode 100755
index 000000000000..fd19161dd4ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -0,0 +1,330 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +--------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|---------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+ overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx1 master br1
+ ip link set dev vx1 up
+
+ ip address add dev $rp1 192.0.2.17/28
+ ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+ ip link set dev $rp1 down
+ ip address del dev $rp1 192.0.2.17/28
+
+ ip link set dev vx1 down
+ ip link set dev vx1 nomaster
+ ip link del dev vx1
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ forwarding_restore
+ vrf_cleanup
+}
+
+ecn_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:14:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"00:"$( : IP proto
+ )"D6:E5:"$( : IP header csum
+ )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local group_name="tunnel_drops"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \
+ -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"01:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:14:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"00:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+short_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"01:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )
+ echo $p
+}
+
+corrupted_packet_test()
+{
+ local trap_name="decap_error"
+ local group_name="tunnel_drops"
+ local desc=$1; shift
+ local payload_get=$1; shift
+ local mz_pid
+
+ RET=0
+
+ # In case of too short packet, there is no any inner packet,
+ # so the matching will always succeed
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$($payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name $group_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ corrupted_packet_test "Decap error: Reserved bits in use" \
+ "reserved_bits_payload_get"
+ corrupted_packet_test "Decap error: No L2 header" "short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ source_mac=01:02:03:04:05:06
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:14:"$( : IP total length
+ )"00:00:"$( : IP identification
+ )"20:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"00:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+ local trap_name="overlay_smac_is_mc"
+ local group_name="tunnel_drops"
+ local mz_pid
+
+ RET=0
+
+ # The matching will be checked on devlink_trap_drop_test()
+ # and the filter will be removed on devlink_trap_drop_cleanup()
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_mac 01:02:03:04:05:06 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(mc_smac_payload_get)
+
+ ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $group_name $swp1
+
+ log_test "Overlay source MAC is multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ip"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
new file mode 100755
index 000000000000..eab79b9e58cd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the FIB offload API on top of mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ipv4_identical_routes
+ ipv4_tos
+ ipv4_metric
+ ipv4_replace
+ ipv4_delete
+ ipv4_plen
+ ipv4_replay
+ ipv4_flush
+ ipv4_local_replace
+ ipv6_add
+ ipv6_metric
+ ipv6_append_single
+ ipv6_replace_single
+ ipv6_metric_multipath
+ ipv6_append_multipath
+ ipv6_replace_multipath
+ ipv6_append_multipath_to_single
+ ipv6_delete_single
+ ipv6_delete_multipath
+ ipv6_replay_single
+ ipv6_replay_multipath
+ ipv6_local_replace
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source $lib_dir/fib_offload_lib.sh
+
+ipv4_identical_routes()
+{
+ fib_ipv4_identical_routes_test "testns1"
+}
+
+ipv4_tos()
+{
+ fib_ipv4_tos_test "testns1"
+}
+
+ipv4_metric()
+{
+ fib_ipv4_metric_test "testns1"
+}
+
+ipv4_replace()
+{
+ fib_ipv4_replace_test "testns1"
+}
+
+ipv4_delete()
+{
+ fib_ipv4_delete_test "testns1"
+}
+
+ipv4_plen()
+{
+ fib_ipv4_plen_test "testns1"
+}
+
+ipv4_replay_metric()
+{
+ fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_tos()
+{
+ fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_plen()
+{
+ fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay()
+{
+ ipv4_replay_metric
+ ipv4_replay_tos
+ ipv4_replay_plen
+}
+
+ipv4_flush()
+{
+ fib_ipv4_flush_test "testns1"
+}
+
+ipv4_local_replace()
+{
+ local ns="testns1"
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add table local 192.0.2.1/32 dev dummy1
+ fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+ check_err $? "Local table route not in hardware when should"
+
+ ip -n $ns route add table main 192.0.2.1/32 dev dummy1
+ fib4_trap_check $ns "table main 192.0.2.1/32 dev dummy1" true
+ check_err $? "Main table route in hardware when should not"
+
+ fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+ check_err $? "Local table route was replaced when should not"
+
+ # Test that local routes can replace routes in main table.
+ ip -n $ns route add table main 192.0.2.2/32 dev dummy1
+ fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" false
+ check_err $? "Main table route not in hardware when should"
+
+ ip -n $ns route add table local 192.0.2.2/32 dev dummy1
+ fib4_trap_check $ns "table local 192.0.2.2/32 dev dummy1" false
+ check_err $? "Local table route did not replace route in main table when should"
+
+ fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" true
+ check_err $? "Main table route was not replaced when should"
+
+ log_test "IPv4 local table route replacement"
+
+ ip -n $ns link del dev dummy1
+}
+
+ipv6_add()
+{
+ fib_ipv6_add_test "testns1"
+}
+
+ipv6_metric()
+{
+ fib_ipv6_metric_test "testns1"
+}
+
+ipv6_append_single()
+{
+ fib_ipv6_append_single_test "testns1"
+}
+
+ipv6_replace_single()
+{
+ fib_ipv6_replace_single_test "testns1"
+}
+
+ipv6_metric_multipath()
+{
+ fib_ipv6_metric_multipath_test "testns1"
+}
+
+ipv6_append_multipath()
+{
+ fib_ipv6_append_multipath_test "testns1"
+}
+
+ipv6_replace_multipath()
+{
+ fib_ipv6_replace_multipath_test "testns1"
+}
+
+ipv6_append_multipath_to_single()
+{
+ fib_ipv6_append_multipath_to_single_test "testns1"
+}
+
+ipv6_delete_single()
+{
+ fib_ipv6_delete_single_test "testns1"
+}
+
+ipv6_delete_multipath()
+{
+ fib_ipv6_delete_multipath_test "testns1"
+}
+
+ipv6_replay_single()
+{
+ fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_replay_multipath()
+{
+ fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_local_replace()
+{
+ local ns="testns1"
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add table local 2001:db8:1::1/128 dev dummy1
+ fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+ check_err $? "Local table route not in hardware when should"
+
+ ip -n $ns route add table main 2001:db8:1::1/128 dev dummy1
+ fib6_trap_check $ns "table main 2001:db8:1::1/128 dev dummy1" true
+ check_err $? "Main table route in hardware when should not"
+
+ fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+ check_err $? "Local table route was replaced when should not"
+
+ # Test that local routes can replace routes in main table.
+ ip -n $ns route add table main 2001:db8:1::2/128 dev dummy1
+ fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" false
+ check_err $? "Main table route not in hardware when should"
+
+ ip -n $ns route add table local 2001:db8:1::2/128 dev dummy1
+ fib6_trap_check $ns "table local 2001:db8:1::2/128 dev dummy1" false
+ check_err $? "Local route route did not replace route in main table when should"
+
+ fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" true
+ check_err $? "Main table route was not replaced when should"
+
+ log_test "IPv6 local table route replacement"
+
+ ip -n $ns link del dev dummy1
+}
+
+setup_prepare()
+{
+ ip netns add testns1
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ devlink dev reload $DEVLINK_DEV netns testns1
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload into netns \"testns1\""
+ exit 1
+ fi
+}
+
+cleanup()
+{
+ pre_cleanup
+ devlink -N testns1 dev reload $DEVLINK_DEV netns $$
+ ip netns del testns1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
new file mode 100755
index 000000000000..eff6393ce974
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for port-default priority. Non-IP packets ingress $swp1 and are
+# prioritized according to the default priority specified at the port.
+# rx_octets_prio_* counters are used to verify the prioritization.
+#
+# +-----------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|------------------+
+# |
+# +----|------------------+
+# | SW | |
+# | + $swp1 |
+# | 192.0.2.2/28 |
+# | APP=<prio>,1,0 |
+# +-----------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_defprio
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=2
+: ${HIT_TIMEOUT:=1000} # ms
+source $lib_dir/lib.sh
+
+declare -a APP
+
+defprio_install()
+{
+ local dev=$1; shift
+ local prio=$1; shift
+ local app="app=$prio,1,0"
+
+ lldptool -T -i $dev -V APP $app >/dev/null
+ lldpad_app_wait_set $dev
+ APP[$prio]=$app
+}
+
+defprio_uninstall()
+{
+ local dev=$1; shift
+ local prio=$1; shift
+ local app=${APP[$prio]}
+
+ lldptool -T -i $dev -V APP -d $app >/dev/null
+ lldpad_app_wait_del
+ unset APP[$prio]
+}
+
+defprio_flush()
+{
+ local dev=$1; shift
+ local prio
+
+ if ((${#APP[@]})); then
+ lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
+ fi
+ lldpad_app_wait_del
+ APP=()
+}
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ ip addr add dev $swp1 192.0.2.2/28
+}
+
+switch_destroy()
+{
+ defprio_flush $swp1
+ ip addr del dev $swp1 192.0.2.2/28
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+wait_for_packets()
+{
+ local t0=$1; shift
+ local prio_observe=$1; shift
+
+ local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+ local delta=$((t1 - t0))
+ echo $delta
+ ((delta >= 10))
+}
+
+__test_defprio()
+{
+ local prio_install=$1; shift
+ local prio_observe=$1; shift
+ local delta
+ local key
+ local i
+
+ RET=0
+
+ defprio_install $swp1 $prio_install
+
+ local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+ mausezahn -q $h1 -d 100m -c 10 -t arp reply
+ delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe)
+
+ check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta."
+ log_test "Default priority $prio_install/$prio_observe"
+
+ defprio_uninstall $swp1 $prio_install
+}
+
+test_defprio()
+{
+ local prio
+
+ for prio in {0..7}; do
+ __test_defprio $prio $prio
+ done
+
+ defprio_install $swp1 3
+ __test_defprio 0 3
+ __test_defprio 1 3
+ __test_defprio 2 3
+ __test_defprio 4 4
+ __test_defprio 5 5
+ __test_defprio 6 6
+ __test_defprio 7 7
+ defprio_uninstall $swp1 3
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 40f16f2a3afd..5cbff8038f84 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -36,8 +36,6 @@ source $lib_dir/lib.sh
h1_create()
{
- local dscp;
-
simple_if_init $h1 192.0.2.1/28
tc qdisc add dev $h1 clsact
dscp_capture_install $h1 10
@@ -67,6 +65,7 @@ h2_destroy()
dscp_map()
{
local base=$1; shift
+ local prio
for prio in {0..7}; do
echo app=$prio,5,$((base + prio))
@@ -138,6 +137,7 @@ dscp_ping_test()
local prio=$1; shift
local dev_10=$1; shift
local dev_20=$1; shift
+ local key
local dscp_10=$(((prio + 10) << 2))
local dscp_20=$(((prio + 20) << 2))
@@ -175,6 +175,8 @@ dscp_ping_test()
test_dscp()
{
+ local prio
+
for prio in {0..7}; do
dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index 9faf02e32627..c745ce3befee 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -31,6 +31,7 @@ ALL_TESTS="
ping_ipv4
test_update
test_no_update
+ test_dscp_leftover
"
lib_dir=$(dirname $0)/../../../net/forwarding
@@ -50,10 +51,13 @@ reprioritize()
echo ${reprio[$in]}
}
-h1_create()
+zero()
{
- local dscp;
+ echo 0
+}
+h1_create()
+{
simple_if_init $h1 192.0.2.1/28
tc qdisc add dev $h1 clsact
dscp_capture_install $h1 0
@@ -87,6 +91,7 @@ h2_destroy()
dscp_map()
{
local base=$1; shift
+ local prio
for prio in {0..7}; do
echo app=$prio,5,$((base + prio))
@@ -156,6 +161,7 @@ dscp_ping_test()
local reprio=$1; shift
local dev1=$1; shift
local dev2=$1; shift
+ local i
local prio2=$($reprio $prio) # ICMP Request egress prio
local prio3=$($reprio $prio2) # ICMP Response egress prio
@@ -205,6 +211,7 @@ __test_update()
{
local update=$1; shift
local reprio=$1; shift
+ local prio
sysctl_restore net.ipv4.ip_forward_update_priority
sysctl_set net.ipv4.ip_forward_update_priority $update
@@ -224,6 +231,19 @@ test_no_update()
__test_update 0 echo
}
+# Test that when the last APP rule is removed, the prio->DSCP map is properly
+# set to zeroes, and that the last APP rule does not stay active in the ASIC.
+test_dscp_leftover()
+{
+ lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+ lldpad_app_wait_del
+
+ __test_update 0 zero
+
+ lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+ lldpad_app_wait_set $swp2
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index e80be65799ad..faa51012cdac 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -1,47 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-humanize()
-{
- local speed=$1; shift
-
- for unit in bps Kbps Mbps Gbps; do
- if (($(echo "$speed < 1024" | bc))); then
- break
- fi
-
- speed=$(echo "scale=1; $speed / 1024" | bc)
- done
-
- echo "$speed${unit}"
-}
-
-rate()
-{
- local t0=$1; shift
- local t1=$1; shift
- local interval=$1; shift
-
- echo $((8 * (t1 - t0) / interval))
-}
-
-start_traffic()
-{
- local h_in=$1; shift # Where the traffic egresses the host
- local sip=$1; shift
- local dip=$1; shift
- local dmac=$1; shift
-
- $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
- -a own -b $dmac -t udp -q &
- sleep 1
-}
-
-stop_traffic()
-{
- # Suppress noise from killing mausezahn.
- { kill %% && wait %%; } 2>/dev/null
-}
-
check_rate()
{
local rate=$1; shift
@@ -96,3 +54,31 @@ measure_rate()
echo $ir $er
return $ret
}
+
+bail_on_lldpad()
+{
+ if systemctl is-active --quiet lldpad; then
+
+ cat >/dev/stderr <<-EOF
+ WARNING: lldpad is running
+
+ lldpad will likely configure DCB, and this test will
+ configure Qdiscs. mlxsw does not support both at the
+ same time, one of them is arbitrarily going to overwrite
+ the other. That will cause spurious failures (or,
+ unlikely, passes) of this test.
+ EOF
+
+ if [[ -z $ALLOW_LLDPAD ]]; then
+ cat >/dev/stderr <<-EOF
+
+ If you want to run the test anyway, please set
+ an environment variable ALLOW_LLDPAD to a
+ non-empty string.
+ EOF
+ exit 1
+ else
+ return
+ fi
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 47315fe48d5a..24dd8ed48580 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -232,7 +232,7 @@ test_mc_aware()
stop_traffic
local ucth1=${uc_rate[1]}
- start_traffic $h1 own bc bc
+ start_traffic $h1 192.0.2.65 bc bc
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@@ -254,7 +254,11 @@ test_mc_aware()
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
- check_err $(bc <<< "$deg > 25")
+
+ # Minimum shaper of 200Mbps on MC TCs should cause about 20% of
+ # degradation on 1Gbps link.
+ check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
+ check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
new file mode 100755
index 000000000000..c9fc4d4885c1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in offloaded datapath.
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/sch_ets_core.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ALL_TESTS="
+ ping_ipv4
+ priomap_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+"
+
+switch_create()
+{
+ ets_switch_create
+
+ # Create a bottleneck so that the DWRR process can kick in.
+ ethtool -s $h2 speed 1000 autoneg off
+ ethtool -s $swp2 speed 1000 autoneg off
+
+ # Set the ingress quota high and use the three egress TCs to limit the
+ # amount of traffic that is admitted to the shared buffers. This makes
+ # sure that there is always enough traffic of all types to select from
+ # for the DWRR process.
+ devlink_port_pool_th_set $swp1 0 12
+ devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ devlink_port_pool_th_set $swp2 4 12
+ devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
+
+ # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
+ # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is
+ # not offloaded by mlxsw as of this writing, but the mapping used is
+ # 1:1, which is the mapping currently hard-coded by the driver.
+}
+
+switch_destroy()
+{
+ devlink_tc_bind_pool_th_restore $swp2 5 egress
+ devlink_tc_bind_pool_th_restore $swp2 6 egress
+ devlink_tc_bind_pool_th_restore $swp2 7 egress
+ devlink_port_pool_th_restore $swp2 4
+ devlink_tc_bind_pool_th_restore $swp1 0 ingress
+ devlink_port_pool_th_restore $swp1 0
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $h2 autoneg on
+
+ ets_switch_destroy
+}
+
+# Callback from sch_ets_tests.sh
+get_stats()
+{
+ local band=$1; shift
+
+ ethtool_stats_get "$h2" rx_octets_prio_$band
+}
+
+bail_on_lldpad
+ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
new file mode 100755
index 000000000000..c6ce0b448bf3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
new file mode 100755
index 000000000000..8d245f331619
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_prio.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
new file mode 100755
index 000000000000..013886061f15
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_root.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
new file mode 100644
index 000000000000..f7c168decd1e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get span_agents)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
new file mode 100755
index 000000000000..7b2acba82a49
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then
+ echo "SKIP: test is tailored for Mellanox Spectrum-2"
+ exit 1
+fi
+
+current_test=""
+
+cleanup()
+{
+ pre_cleanup
+ if [ ! -z $current_test ]; then
+ ${current_test}_cleanup
+ fi
+ # Need to reload in order to avoid router abort.
+ devlink_reload
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+ source ${current_test}_scale.sh
+
+ num_netifs_var=${current_test^^}_NUM_NETIFS
+ num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+ for should_fail in 0 1; do
+ RET=0
+ target=$(${current_test}_get_target "$should_fail")
+ ${current_test}_setup_prepare
+ setup_wait $num_netifs
+ ${current_test}_test "$target" "$should_fail"
+ ${current_test}_cleanup
+ devlink_reload
+ if [[ "$should_fail" -eq 0 ]]; then
+ log_test "'$current_test' $target"
+ else
+ log_test "'$current_test' overflow $target"
+ fi
+ done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
new file mode 100644
index 000000000000..1897e163e3ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get kvd)
+
+ if [[ $should_fail -eq 0 ]]; then
+ target=$((target * 85 / 100))
+ else
+ target=$((target + 1))
+ fi
+
+ echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
new file mode 100644
index 000000000000..a0795227216e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+ local should_fail=$1; shift
+
+ # The driver associates a counter with each tc filter, which means the
+ # number of supported filters is bounded by the number of available
+ # counters.
+ # Currently, the driver supports 12K (12,288) flow counters and six of
+ # these are used for multicast routing.
+ local target=12282
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
index 8d2186c7c62b..f7c168decd1e 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -4,10 +4,13 @@ source ../mirror_gre_scale.sh
mirror_gre_get_target()
{
local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get span_agents)
if ((! should_fail)); then
- echo 3
+ echo $target
else
- echo 4
+ echo $((target + 1))
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index ae6146ec5afd..4632f51af7ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev $swp2 group 239.0.0.1
+ dev dummy1 group 239.0.0.1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with a multicast group"
@@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2
+ ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with local interface"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 9d8baf5d14b3..025a84c2ab5a 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -3,7 +3,9 @@
lib_dir=$(dirname $0)/../../../net/forwarding
-ALL_TESTS="fw_flash_test"
+ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+ netns_reload_test resource_test dev_info_test \
+ empty_reporter_test dummy_reporter_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
@@ -30,6 +32,417 @@ fw_flash_test()
log_test "fw flash test"
}
+param_get()
+{
+ local name=$1
+
+ cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
+ '.[][][].values[] | select(.cmode == "driverinit").value'
+}
+
+param_set()
+{
+ local name=$1
+ local value=$2
+
+ devlink dev param set $DL_HANDLE name $name cmode driverinit value $value
+}
+
+check_value()
+{
+ local name=$1
+ local phase_name=$2
+ local expected_param_value=$3
+ local expected_debugfs_value=$4
+ local value
+
+ value=$(param_get $name)
+ check_err $? "Failed to get $name param value"
+ [ "$value" == "$expected_param_value" ]
+ check_err $? "Unexpected $phase_name $name param value"
+ value=$(<$DEBUGFS_DIR/$name)
+ check_err $? "Failed to get $name debugfs value"
+ [ "$value" == "$expected_debugfs_value" ]
+ check_err $? "Unexpected $phase_name $name debugfs value"
+}
+
+params_test()
+{
+ RET=0
+
+ local max_macs
+ local test1
+
+ check_value max_macs initial 32 32
+ check_value test1 initial true Y
+
+ param_set max_macs 16
+ check_err $? "Failed to set max_macs param value"
+ param_set test1 false
+ check_err $? "Failed to set test1 param value"
+
+ check_value max_macs post-set 16 32
+ check_value test1 post-set false Y
+
+ devlink dev reload $DL_HANDLE
+
+ check_value max_macs post-reload 16 16
+ check_value test1 post-reload false N
+
+ log_test "params test"
+}
+
+check_region_size()
+{
+ local name=$1
+ local size
+
+ size=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].size')
+ check_err $? "Failed to get $name region size"
+ [ $size -eq 32768 ]
+ check_err $? "Invalid $name region size"
+}
+
+check_region_snapshot_count()
+{
+ local name=$1
+ local phase_name=$2
+ local expected_count=$3
+ local count
+
+ count=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].snapshot | length')
+ [ $count -eq $expected_count ]
+ check_err $? "Unexpected $phase_name snapshot count"
+}
+
+regions_test()
+{
+ RET=0
+
+ local count
+
+ check_region_size dummy
+ check_region_snapshot_count dummy initial 0
+
+ echo ""> $DEBUGFS_DIR/take_snapshot
+ check_err $? "Failed to take first dummy region snapshot"
+ check_region_snapshot_count dummy post-first-snapshot 1
+
+ echo ""> $DEBUGFS_DIR/take_snapshot
+ check_err $? "Failed to take second dummy region snapshot"
+ check_region_snapshot_count dummy post-second-snapshot 2
+
+ echo ""> $DEBUGFS_DIR/take_snapshot
+ check_err $? "Failed to take third dummy region snapshot"
+ check_region_snapshot_count dummy post-third-snapshot 3
+
+ devlink region del $DL_HANDLE/dummy snapshot 1
+ check_err $? "Failed to delete first dummy region snapshot"
+
+ check_region_snapshot_count dummy post-first-delete 2
+
+ log_test "regions test"
+}
+
+reload_test()
+{
+ RET=0
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload"
+
+ echo "y"> $DEBUGFS_DIR/fail_reload
+ check_err $? "Failed to setup devlink reload to fail"
+
+ devlink dev reload $DL_HANDLE
+ check_fail $? "Unexpected success of devlink reload"
+
+ echo "n"> $DEBUGFS_DIR/fail_reload
+ check_err $? "Failed to setup devlink reload not to fail"
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload after set not to fail"
+
+ echo "y"> $DEBUGFS_DIR/dont_allow_reload
+ check_err $? "Failed to forbid devlink reload"
+
+ devlink dev reload $DL_HANDLE
+ check_fail $? "Unexpected success of devlink reload"
+
+ echo "n"> $DEBUGFS_DIR/dont_allow_reload
+ check_err $? "Failed to re-enable devlink reload"
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload after re-enable"
+
+ log_test "reload test"
+}
+
+netns_reload_test()
+{
+ RET=0
+
+ ip netns add testns1
+ check_err $? "Failed add netns \"testns1\""
+ ip netns add testns2
+ check_err $? "Failed add netns \"testns2\""
+
+ devlink dev reload $DL_HANDLE netns testns1
+ check_err $? "Failed to reload into netns \"testns1\""
+
+ devlink -N testns1 dev reload $DL_HANDLE netns testns2
+ check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\""
+
+ ip netns del testns2
+ ip netns del testns1
+
+ log_test "netns reload test"
+}
+
+DUMMYDEV="dummytest"
+
+res_val_get()
+{
+ local netns=$1
+ local parentname=$2
+ local name=$3
+ local type=$4
+
+ cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \
+ ".[][][] | select(.name == \"$parentname\").resources[] \
+ | select(.name == \"$name\").$type"
+}
+
+resource_test()
+{
+ RET=0
+
+ ip netns add testns1
+ check_err $? "Failed add netns \"testns1\""
+ ip netns add testns2
+ check_err $? "Failed add netns \"testns2\""
+
+ devlink dev reload $DL_HANDLE netns testns1
+ check_err $? "Failed to reload into netns \"testns1\""
+
+ # Create dummy dev to add the address and routes on.
+
+ ip -n testns1 link add name $DUMMYDEV type dummy
+ check_err $? "Failed create dummy device"
+ ip -n testns1 link set $DUMMYDEV up
+ check_err $? "Failed bring up dummy device"
+ ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV
+ check_err $? "Failed add an IP address to dummy device"
+
+ local occ=$(res_val_get testns1 IPv4 fib occ)
+ local limit=$((occ+1))
+
+ # Set fib size limit to handle one another route only.
+
+ devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit
+ check_err $? "Failed to set IPv4/fib resource size"
+ local size_new=$(res_val_get testns1 IPv4 fib size_new)
+ [ "$size_new" -eq "$limit" ]
+ check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)"
+
+ devlink -N testns1 dev reload $DL_HANDLE
+ check_err $? "Failed to reload"
+ local size=$(res_val_get testns1 IPv4 fib size)
+ [ "$size" -eq "$limit" ]
+ check_err $? "Unexpected \"size\" value (got $size, expected $limit)"
+
+ # Insert 2 routes, the first is going to be inserted,
+ # the second is expected to fail to be inserted.
+
+ ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2
+ check_err $? "Failed to add route"
+
+ ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2
+ check_fail $? "Unexpected successful route add over limit"
+
+ # Now create another dummy in second network namespace and
+ # insert two routes. That is over the limit of the netdevsim
+ # instance in the first namespace. Move the netdevsim instance
+ # into the second namespace and expect it to fail.
+
+ ip -n testns2 link add name $DUMMYDEV type dummy
+ check_err $? "Failed create dummy device"
+ ip -n testns2 link set $DUMMYDEV up
+ check_err $? "Failed bring up dummy device"
+ ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV
+ check_err $? "Failed add an IP address to dummy device"
+ ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2
+ check_err $? "Failed to add route"
+ ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2
+ check_err $? "Failed to add route"
+
+ devlink -N testns1 dev reload $DL_HANDLE netns testns2
+ check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\""
+
+ devlink -N testns2 resource set $DL_HANDLE path IPv4/fib size ' -1'
+ check_err $? "Failed to reset IPv4/fib resource size"
+
+ devlink -N testns2 dev reload $DL_HANDLE netns 1
+ check_err $? "Failed to reload devlink back"
+
+ ip netns del testns2
+ ip netns del testns1
+
+ log_test "resource test"
+}
+
+info_get()
+{
+ local name=$1
+
+ cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e"
+}
+
+dev_info_test()
+{
+ RET=0
+
+ driver=$(info_get "driver")
+ check_err $? "Failed to get driver name"
+ [ "$driver" == "netdevsim" ]
+ check_err $? "Unexpected driver name $driver"
+
+ log_test "dev_info test"
+}
+
+empty_reporter_test()
+{
+ RET=0
+
+ devlink health show $DL_HANDLE reporter empty >/dev/null
+ check_err $? "Failed show empty reporter"
+
+ devlink health dump show $DL_HANDLE reporter empty >/dev/null
+ check_err $? "Failed show dump of empty reporter"
+
+ devlink health diagnose $DL_HANDLE reporter empty >/dev/null
+ check_err $? "Failed diagnose empty reporter"
+
+ devlink health recover $DL_HANDLE reporter empty
+ check_err $? "Failed recover empty reporter"
+
+ log_test "empty reporter test"
+}
+
+check_reporter_info()
+{
+ local name=$1
+ local expected_state=$2
+ local expected_error=$3
+ local expected_recover=$4
+ local expected_grace_period=$5
+ local expected_auto_recover=$6
+
+ local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]")
+ check_err $? "Failed show $name reporter"
+
+ local state=$(echo $show | jq -r ".state")
+ [ "$state" == "$expected_state" ]
+ check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)"
+
+ local error=$(echo $show | jq -r ".error")
+ [ "$error" == "$expected_error" ]
+ check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)"
+
+ local recover=`echo $show | jq -r ".recover"`
+ [ "$recover" == "$expected_recover" ]
+ check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)"
+
+ local grace_period=$(echo $show | jq -r ".grace_period")
+ check_err $? "Failed get $name reporter grace_period"
+ [ "$grace_period" == "$expected_grace_period" ]
+ check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)"
+
+ local auto_recover=$(echo $show | jq -r ".auto_recover")
+ [ "$auto_recover" == "$expected_auto_recover" ]
+ check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)"
+}
+
+dummy_reporter_test()
+{
+ RET=0
+
+ check_reporter_info dummy healthy 0 0 0 false
+
+ local BREAK_MSG="foo bar"
+ echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+ check_err $? "Failed to break dummy reporter"
+
+ check_reporter_info dummy error 1 0 0 false
+
+ local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+ check_err $? "Failed show dump of dummy reporter"
+
+ local dump_break_msg=$(echo $dump | jq -r ".break_message")
+ [ "$dump_break_msg" == "$BREAK_MSG" ]
+ check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)"
+
+ devlink health dump clear $DL_HANDLE reporter dummy
+ check_err $? "Failed clear dump of dummy reporter"
+
+ devlink health recover $DL_HANDLE reporter dummy
+ check_err $? "Failed recover dummy reporter"
+
+ check_reporter_info dummy healthy 1 1 0 false
+
+ devlink health set $DL_HANDLE reporter dummy auto_recover true
+ check_err $? "Failed to dummy reporter auto_recover option"
+
+ check_reporter_info dummy healthy 1 1 0 true
+
+ echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+ check_err $? "Failed to break dummy reporter"
+
+ check_reporter_info dummy healthy 2 2 0 true
+
+ local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p)
+ check_err $? "Failed show diagnose of dummy reporter"
+
+ local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message")
+ [ "$rcvrd_break_msg" == "$BREAK_MSG" ]
+ check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)"
+
+ devlink health set $DL_HANDLE reporter dummy grace_period 10
+ check_err $? "Failed to dummy reporter grace_period option"
+
+ check_reporter_info dummy healthy 2 2 10 true
+
+ echo "Y"> $DEBUGFS_DIR/health/fail_recover
+ check_err $? "Failed set dummy reporter recovery to fail"
+
+ echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+ check_fail $? "Unexpected success of dummy reporter break"
+
+ check_reporter_info dummy error 3 2 10 true
+
+ devlink health recover $DL_HANDLE reporter dummy
+ check_fail $? "Unexpected success of dummy reporter recover"
+
+ echo "N"> $DEBUGFS_DIR/health/fail_recover
+ check_err $? "Failed set dummy reporter recovery to be successful"
+
+ devlink health recover $DL_HANDLE reporter dummy
+ check_err $? "Failed recover dummy reporter"
+
+ check_reporter_info dummy healthy 3 3 10 true
+
+ echo 8192> $DEBUGFS_DIR/health/binary_len
+ check_fail $? "Failed set dummy reporter binary len to 8192"
+
+ local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+ check_err $? "Failed show dump of dummy reporter"
+
+ devlink health dump clear $DL_HANDLE reporter dummy
+ check_err $? "Failed clear dump of dummy reporter"
+
+ log_test "dummy reporter test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh
new file mode 100755
index 000000000000..7effd35369e1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="check_devlink_test check_ports_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DL_HANDLE=netdevsim/$DEV_NAME
+NETNS_NAME=testns1
+
+port_netdev_get()
+{
+ local port_index=$1
+
+ cmd_jq "devlink -N $NETNS_NAME port show -j" \
+ ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e"
+}
+
+check_ports_test()
+{
+ RET=0
+
+ for i in $(seq 0 $(expr $PORT_COUNT - 1)); do
+ netdev_name=$(port_netdev_get $i)
+ check_err $? "Failed to get netdev name for port $DL_HANDLE/$i"
+ ip -n $NETNS_NAME link show $netdev_name &> /dev/null
+ check_err $? "Failed to find netdev $netdev_name"
+ done
+
+ log_test "check ports test"
+}
+
+check_devlink_test()
+{
+ RET=0
+
+ devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null
+ check_err $? "Failed to show devlink instance"
+
+ log_test "check devlink test"
+}
+
+setup_prepare()
+{
+ modprobe netdevsim
+ ip netns add $NETNS_NAME
+ ip netns exec $NETNS_NAME \
+ echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+ pre_cleanup
+ echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+ ip netns del $NETNS_NAME
+ modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
new file mode 100755
index 000000000000..f101ab9441e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking devlink-trap functionality. It makes use of
+# netdevsim which implements the required callbacks.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ init_test
+ trap_action_test
+ trap_metadata_test
+ bad_trap_test
+ bad_trap_action_test
+ trap_stats_test
+ trap_group_action_test
+ bad_trap_group_test
+ trap_group_stats_test
+ port_del_test
+ dev_del_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SLEEP_TIME=1
+NETDEV=""
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+require_command udevadm
+
+modprobe netdevsim &> /dev/null
+if [ ! -d "$NETDEVSIM_PATH" ]; then
+ echo "SKIP: No netdevsim support"
+ exit 1
+fi
+
+if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
+ echo "SKIP: Device netdevsim${DEV_ADDR} already exists"
+ exit 1
+fi
+
+init_test()
+{
+ RET=0
+
+ test $(devlink_traps_num_get) -ne 0
+ check_err $? "No traps were registered"
+
+ log_test "Initialization"
+}
+
+trap_action_test()
+{
+ local orig_action
+ local trap_name
+ local action
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ # The action of non-drop traps cannot be changed.
+ if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+ devlink_trap_action_set $trap_name "trap"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "trap" ]; then
+ check_err 1 "Trap $trap_name did not change action to trap"
+ fi
+
+ devlink_trap_action_set $trap_name "drop"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "drop" ]; then
+ check_err 1 "Trap $trap_name did not change action to drop"
+ fi
+ else
+ orig_action=$(devlink_trap_action_get $trap_name)
+
+ devlink_trap_action_set $trap_name "trap"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != $orig_action ]; then
+ check_err 1 "Trap $trap_name changed action when should not"
+ fi
+
+ devlink_trap_action_set $trap_name "drop"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != $orig_action ]; then
+ check_err 1 "Trap $trap_name changed action when should not"
+ fi
+ fi
+ done
+
+ log_test "Trap action"
+}
+
+trap_metadata_test()
+{
+ local trap_name
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_metadata_test $trap_name "input_port"
+ check_err $? "Input port not reported as metadata of trap $trap_name"
+ done
+
+ log_test "Trap metadata"
+}
+
+bad_trap_test()
+{
+ RET=0
+
+ devlink_trap_action_set "made_up_trap" "drop"
+ check_fail $? "Did not get an error for non-existing trap"
+
+ log_test "Non-existing trap"
+}
+
+bad_trap_action_test()
+{
+ local traps_arr
+ local trap_name
+
+ RET=0
+
+ # Pick first trap.
+ traps_arr=($(devlink_traps_get))
+ trap_name=${traps_arr[0]}
+
+ devlink_trap_action_set $trap_name "made_up_action"
+ check_fail $? "Did not get an error for non-existing trap action"
+
+ log_test "Non-existing trap action"
+}
+
+trap_stats_test()
+{
+ local trap_name
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Stats of trap $trap_name not idle when netdev down"
+
+ ip link set dev $NETDEV up
+
+ if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+ devlink_trap_action_set $trap_name "trap"
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Stats of trap $trap_name idle when action is trap"
+
+ devlink_trap_action_set $trap_name "drop"
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Stats of trap $trap_name not idle when action is drop"
+ else
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Stats of non-drop trap $trap_name idle when should not"
+ fi
+
+ ip link set dev $NETDEV down
+ done
+
+ log_test "Trap statistics"
+}
+
+trap_group_action_test()
+{
+ local curr_group group_name
+ local trap_name
+ local trap_type
+ local action
+
+ RET=0
+
+ for group_name in $(devlink_trap_groups_get); do
+ devlink_trap_group_action_set $group_name "trap"
+
+ for trap_name in $(devlink_traps_get); do
+ curr_group=$(devlink_trap_group_get $trap_name)
+ if [ $curr_group != $group_name ]; then
+ continue
+ fi
+
+ trap_type=$(devlink_trap_type_get $trap_name)
+ if [ $trap_type != "drop" ]; then
+ continue
+ fi
+
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "trap" ]; then
+ check_err 1 "Trap $trap_name did not change action to trap"
+ fi
+ done
+
+ devlink_trap_group_action_set $group_name "drop"
+
+ for trap_name in $(devlink_traps_get); do
+ curr_group=$(devlink_trap_group_get $trap_name)
+ if [ $curr_group != $group_name ]; then
+ continue
+ fi
+
+ trap_type=$(devlink_trap_type_get $trap_name)
+ if [ $trap_type != "drop" ]; then
+ continue
+ fi
+
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "drop" ]; then
+ check_err 1 "Trap $trap_name did not change action to drop"
+ fi
+ done
+ done
+
+ log_test "Trap group action"
+}
+
+bad_trap_group_test()
+{
+ RET=0
+
+ devlink_trap_group_action_set "made_up_trap_group" "drop"
+ check_fail $? "Did not get an error for non-existing trap group"
+
+ log_test "Non-existing trap group"
+}
+
+trap_group_stats_test()
+{
+ local group_name
+
+ RET=0
+
+ for group_name in $(devlink_trap_groups_get); do
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Stats of trap group $group_name not idle when netdev down"
+
+ ip link set dev $NETDEV up
+
+ devlink_trap_group_action_set $group_name "trap"
+ devlink_trap_group_stats_idle_test $group_name
+ check_fail $? "Stats of trap group $group_name idle when action is trap"
+
+ devlink_trap_group_action_set $group_name "drop"
+ ip link set dev $NETDEV down
+ done
+
+ log_test "Trap group statistics"
+}
+
+port_del_test()
+{
+ local group_name
+ local i
+
+ # The test never fails. It is meant to exercise different code paths
+ # and make sure we properly dismantle a port while packets are
+ # in-flight.
+ RET=0
+
+ devlink_traps_enable_all
+
+ for i in $(seq 1 10); do
+ ip link set dev $NETDEV up
+
+ sleep $SLEEP_TIME
+
+ netdevsim_port_destroy
+ netdevsim_port_create
+ udevadm settle
+ done
+
+ devlink_traps_disable_all
+
+ log_test "Port delete"
+}
+
+dev_del_test()
+{
+ local group_name
+ local i
+
+ # The test never fails. It is meant to exercise different code paths
+ # and make sure we properly unregister traps while packets are
+ # in-flight.
+ RET=0
+
+ devlink_traps_enable_all
+
+ for i in $(seq 1 10); do
+ ip link set dev $NETDEV up
+
+ sleep $SLEEP_TIME
+
+ cleanup
+ setup_prepare
+ done
+
+ devlink_traps_disable_all
+
+ log_test "Device delete"
+}
+
+netdevsim_dev_create()
+{
+ echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device
+}
+
+netdevsim_dev_destroy()
+{
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+}
+
+netdevsim_port_create()
+{
+ echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port
+}
+
+netdevsim_port_destroy()
+{
+ echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port
+}
+
+setup_prepare()
+{
+ local netdev
+
+ netdevsim_dev_create
+
+ if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then
+ echo "Failed to create netdevsim device"
+ exit 1
+ fi
+
+ netdevsim_port_create
+
+ if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then
+ echo "Failed to create netdevsim port"
+ exit 1
+ fi
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+}
+
+cleanup()
+{
+ pre_cleanup
+ netdevsim_port_destroy
+ netdevsim_dev_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
new file mode 100755
index 000000000000..2f87c3be76a9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the FIB offload API. It makes use of netdevsim
+# which registers a listener to the FIB notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ipv4_identical_routes
+ ipv4_tos
+ ipv4_metric
+ ipv4_replace
+ ipv4_delete
+ ipv4_plen
+ ipv4_replay
+ ipv4_flush
+ ipv4_error_path
+ ipv6_add
+ ipv6_metric
+ ipv6_append_single
+ ipv6_replace_single
+ ipv6_metric_multipath
+ ipv6_append_multipath
+ ipv6_replace_multipath
+ ipv6_append_multipath_to_single
+ ipv6_delete_single
+ ipv6_delete_multipath
+ ipv6_replay_single
+ ipv6_replay_multipath
+ ipv6_error_path
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source $lib_dir/fib_offload_lib.sh
+
+ipv4_identical_routes()
+{
+ fib_ipv4_identical_routes_test "testns1"
+}
+
+ipv4_tos()
+{
+ fib_ipv4_tos_test "testns1"
+}
+
+ipv4_metric()
+{
+ fib_ipv4_metric_test "testns1"
+}
+
+ipv4_replace()
+{
+ fib_ipv4_replace_test "testns1"
+}
+
+ipv4_delete()
+{
+ fib_ipv4_delete_test "testns1"
+}
+
+ipv4_plen()
+{
+ fib_ipv4_plen_test "testns1"
+}
+
+ipv4_replay_metric()
+{
+ fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_tos()
+{
+ fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_plen()
+{
+ fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay()
+{
+ ipv4_replay_metric
+ ipv4_replay_tos
+ ipv4_replay_plen
+}
+
+ipv4_flush()
+{
+ fib_ipv4_flush_test "testns1"
+}
+
+ipv4_error_path_add()
+{
+ local lsb
+
+ RET=0
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
+ devlink -N testns1 dev reload $DEVLINK_DEV
+
+ for lsb in $(seq 1 20); do
+ ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \
+ &> /dev/null
+ done
+
+ log_test "IPv4 error path - add"
+
+ ip -n testns1 link del dev dummy1
+}
+
+ipv4_error_path_replay()
+{
+ local lsb
+
+ RET=0
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
+ devlink -N testns1 dev reload $DEVLINK_DEV
+
+ for lsb in $(seq 1 20); do
+ ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1
+ done
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
+ devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
+
+ log_test "IPv4 error path - replay"
+
+ ip -n testns1 link del dev dummy1
+
+ # Successfully reload after deleting all the routes.
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
+ devlink -N testns1 dev reload $DEVLINK_DEV
+}
+
+ipv4_error_path()
+{
+ # Test the different error paths of the notifiers by limiting the size
+ # of the "IPv4/fib" resource.
+ ipv4_error_path_add
+ ipv4_error_path_replay
+}
+
+ipv6_add()
+{
+ fib_ipv6_add_test "testns1"
+}
+
+ipv6_metric()
+{
+ fib_ipv6_metric_test "testns1"
+}
+
+ipv6_append_single()
+{
+ fib_ipv6_append_single_test "testns1"
+}
+
+ipv6_replace_single()
+{
+ fib_ipv6_replace_single_test "testns1"
+}
+
+ipv6_metric_multipath()
+{
+ fib_ipv6_metric_multipath_test "testns1"
+}
+
+ipv6_append_multipath()
+{
+ fib_ipv6_append_multipath_test "testns1"
+}
+
+ipv6_replace_multipath()
+{
+ fib_ipv6_replace_multipath_test "testns1"
+}
+
+ipv6_append_multipath_to_single()
+{
+ fib_ipv6_append_multipath_to_single_test "testns1"
+}
+
+ipv6_delete_single()
+{
+ fib_ipv6_delete_single_test "testns1"
+}
+
+ipv6_delete_multipath()
+{
+ fib_ipv6_delete_multipath_test "testns1"
+}
+
+ipv6_replay_single()
+{
+ fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_replay_multipath()
+{
+ fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_error_path_add_single()
+{
+ local lsb
+
+ RET=0
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+ devlink -N testns1 dev reload $DEVLINK_DEV
+
+ for lsb in $(seq 1 20); do
+ ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \
+ &> /dev/null
+ done
+
+ log_test "IPv6 error path - add single"
+
+ ip -n testns1 link del dev dummy1
+}
+
+ipv6_error_path_add_multipath()
+{
+ local lsb
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n testns1 link add name dummy$i type dummy
+ ip -n testns1 link set dev dummy$i up
+ ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+ devlink -N testns1 dev reload $DEVLINK_DEV
+
+ for lsb in $(seq 1 20); do
+ ip -n testns1 route add 2001:db8:10::${lsb}/128 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null
+ done
+
+ log_test "IPv6 error path - add multipath"
+
+ for i in $(seq 1 2); do
+ ip -n testns1 link del dev dummy$i
+ done
+}
+
+ipv6_error_path_replay()
+{
+ local lsb
+
+ RET=0
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
+ devlink -N testns1 dev reload $DEVLINK_DEV
+
+ for lsb in $(seq 1 20); do
+ ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1
+ done
+
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+ devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
+
+ log_test "IPv6 error path - replay"
+
+ ip -n testns1 link del dev dummy1
+
+ # Successfully reload after deleting all the routes.
+ devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
+ devlink -N testns1 dev reload $DEVLINK_DEV
+}
+
+ipv6_error_path()
+{
+ # Test the different error paths of the notifiers by limiting the size
+ # of the "IPv6/fib" resource.
+ ipv6_error_path_add_single
+ ipv6_error_path_add_multipath
+ ipv6_error_path_replay
+}
+
+setup_prepare()
+{
+ local netdev
+
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ ip netns add testns1
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ devlink dev reload $DEVLINK_DEV netns testns1
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload into netns \"testns1\""
+ exit 1
+ fi
+}
+
+cleanup()
+{
+ pre_cleanup
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore
new file mode 100644
index 000000000000..9ae8db44ec14
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/.gitignore
@@ -0,0 +1 @@
+epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
new file mode 100644
index 000000000000..78ae4aaf7141
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+LDLIBS += -lpthread
+TEST_GEN_PROGS := epoll_wakeup_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
new file mode 100644
index 000000000000..37a04dab56f0
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -0,0 +1,3074 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include "../../kselftest_harness.h"
+
+struct epoll_mtcontext
+{
+ int efd[3];
+ int sfd[4];
+ int count;
+
+ pthread_t main;
+ pthread_t waiter;
+};
+
+static void signal_handler(int signum)
+{
+}
+
+static void kill_timeout(struct epoll_mtcontext *ctx)
+{
+ usleep(1000000);
+ pthread_kill(ctx->main, SIGUSR1);
+ pthread_kill(ctx->waiter, SIGUSR1);
+}
+
+static void *waiter_entry1a(void *data)
+{
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry1ap(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *waiter_entry1o(void *data)
+{
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry1op(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *waiter_entry2a(void *data)
+{
+ struct epoll_event events[2];
+ struct epoll_mtcontext *ctx = data;
+
+ if (epoll_wait(ctx->efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+
+ return NULL;
+}
+
+static void *waiter_entry2ap(void *data)
+{
+ struct pollfd pfd;
+ struct epoll_event events[2];
+ struct epoll_mtcontext *ctx = data;
+
+ pfd.fd = ctx->efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx->efd[0], events, 2, 0) > 0)
+ __sync_fetch_and_add(&ctx->count, 1);
+ }
+
+ return NULL;
+}
+
+static void *emitter_entry1(void *data)
+{
+ struct epoll_mtcontext *ctx = data;
+
+ usleep(100000);
+ write(ctx->sfd[1], "w", 1);
+
+ kill_timeout(ctx);
+
+ return NULL;
+}
+
+static void *emitter_entry2(void *data)
+{
+ struct epoll_mtcontext *ctx = data;
+
+ usleep(100000);
+ write(ctx->sfd[1], "w", 1);
+ write(ctx->sfd[3], "w", 1);
+
+ kill_timeout(ctx);
+
+ return NULL;
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll1)
+{
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll2)
+{
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll3)
+{
+ int efd;
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll4)
+{
+ int efd;
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll5)
+{
+ int efd;
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll6)
+{
+ int efd;
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 1);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ ASSERT_EQ(poll(&pfd, 1, 0), 0);
+ ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+
+TEST(epoll7)
+{
+ int efd;
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll8)
+{
+ int efd;
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+ pfd.fd = efd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll9)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll10)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll11)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll12)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll13)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * s0
+ */
+TEST(epoll14)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * s0 s2
+ */
+TEST(epoll15)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * s0 s2
+ */
+TEST(epoll16)
+{
+ pthread_t emitter;
+ struct epoll_event events[2];
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll17)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll18)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll19)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll20)
+{
+ int efd[2];
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll21)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll22)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll23)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll24)
+{
+ int efd[2];
+ int sfd[2];
+ struct pollfd pfd;
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll25)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll26)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll27)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll28)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll29)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll30)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll31)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * | (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll32)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 1);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll33)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll34)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll35)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll36)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll37)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll38)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll39)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (ew)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll40)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll41)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll42)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll43)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll44)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll45)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (lt)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll46)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (lt)
+ * s0
+ */
+TEST(epoll47)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ pfd.fd = ctx.efd[1];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (p) | | (p)
+ * | e0
+ * \ / (et)
+ * e1
+ * | (et)
+ * s0
+ */
+TEST(epoll48)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+ __sync_fetch_and_or(&ctx.count, 2);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll49)
+{
+ int efd[3];
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (ew)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll50)
+{
+ int efd[3];
+ int sfd[4];
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll51)
+{
+ int efd[3];
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0
+ * | (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll52)
+{
+ int efd[3];
+ int sfd[4];
+ struct pollfd pfd;
+ struct epoll_event events[2];
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+ efd[0] = epoll_create(1);
+ ASSERT_GE(efd[0], 0);
+
+ efd[1] = epoll_create(1);
+ ASSERT_GE(efd[1], 0);
+
+ efd[2] = epoll_create(1);
+ ASSERT_GE(efd[2], 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+ events[0].events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+ events[0].events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+ ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 1);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+ pfd.fd = efd[0];
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+ EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+ close(efd[0]);
+ close(efd[1]);
+ close(efd[2]);
+ close(sfd[0]);
+ close(sfd[1]);
+ close(sfd[2]);
+ close(sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll53)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll54)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll55)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll56)
+{
+ pthread_t emitter;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (p) \ / (p)
+ * e0
+ * (lt) / \ (lt)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll57)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ pfd.fd = ctx.efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+/*
+ * t0 t1
+ * (p) \ / (p)
+ * e0
+ * (et) / \ (et)
+ * e1 e2
+ * (lt) | | (lt)
+ * s0 s2
+ */
+TEST(epoll58)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.efd[1] = epoll_create(1);
+ ASSERT_GE(ctx.efd[1], 0);
+
+ ctx.efd[2] = epoll_create(1);
+ ASSERT_GE(ctx.efd[2], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+ ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+ pfd.fd = ctx.efd[0];
+ pfd.events = POLLIN;
+ if (poll(&pfd, 1, -1) > 0) {
+ if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+ __sync_fetch_and_add(&ctx.count, 1);
+ }
+
+ ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+ EXPECT_EQ(ctx.count, 2);
+
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+
+ close(ctx.efd[0]);
+ close(ctx.efd[1]);
+ close(ctx.efd[2]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+ close(ctx.sfd[2]);
+ close(ctx.sfd[3]);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index f901076aa2ea..56894477c8bd 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -116,6 +116,16 @@ config_set_name()
echo -n $1 > $DIR/config_name
}
+config_set_into_buf()
+{
+ echo 1 > $DIR/config_into_buf
+}
+
+config_unset_into_buf()
+{
+ echo 0 > $DIR/config_into_buf
+}
+
config_set_sync_direct()
{
echo 1 > $DIR/config_sync_direct
@@ -153,11 +163,14 @@ config_set_read_fw_idx()
read_firmwares()
{
- if [ "$1" = "xzonly" ]; then
- fwfile="${FW}-orig"
+ if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+ fwfile="$FW_INTO_BUF"
else
fwfile="$FW"
fi
+ if [ "$1" = "xzonly" ]; then
+ fwfile="${fwfile}-orig"
+ fi
for i in $(seq 0 3); do
config_set_read_fw_idx $i
# Verify the contents are what we expect.
@@ -194,6 +207,18 @@ test_batched_request_firmware_nofile()
echo "OK"
}
+test_batched_request_firmware_into_buf_nofile()
+{
+ echo -n "Batched request_firmware_into_buf() nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_into_buf
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct_nofile()
{
echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -259,6 +284,18 @@ test_batched_request_firmware()
echo "OK"
}
+test_batched_request_firmware_into_buf()
+{
+ echo -n "Batched request_firmware_into_buf() $2 try #$1: "
+ config_reset
+ config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+ config_set_into_buf
+ config_trigger_sync
+ read_firmwares $2
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct()
{
echo -n "Batched request_firmware_direct() $2 try #$1: "
@@ -308,6 +345,10 @@ for i in $(seq 1 5); do
done
for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf $i normal
+done
+
+for i in $(seq 1 5); do
test_batched_request_firmware_direct $i normal
done
@@ -328,6 +369,10 @@ for i in $(seq 1 5); do
done
for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf_nofile $i
+done
+
+for i in $(seq 1 5); do
test_batched_request_firmware_direct_nofile $i
done
@@ -351,6 +396,10 @@ for i in $(seq 1 5); do
done
for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf $i both
+done
+
+for i in $(seq 1 5); do
test_batched_request_firmware_direct $i both
done
@@ -371,6 +420,10 @@ for i in $(seq 1 5); do
done
for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf $i xzonly
+done
+
+for i in $(seq 1 5); do
test_batched_request_firmware_direct $i xzonly
done
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index f236cc295450..5b8c0fedee76 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,6 +9,12 @@ DIR=/sys/devices/virtual/misc/test_firmware
PROC_CONFIG="/proc/config.gz"
TEST_DIR=$(dirname $0)
+# We need to load a different file to test request_firmware_into_buf
+# I believe the issue is firmware loaded cached vs. non-cached
+# with same filename is bungled.
+# To reproduce rename this to test-firmware.bin
+TEST_FIRMWARE_INTO_BUF_FILENAME=test-firmware-into-buf.bin
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -28,6 +34,12 @@ test_modprobe()
check_mods()
{
+ local uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
+
trap "test_modprobe" EXIT
if [ ! -d $DIR ]; then
modprobe test_firmware
@@ -108,6 +120,8 @@ setup_tmp_file()
FWPATH=$(mktemp -d)
FW="$FWPATH/test-firmware.bin"
echo "ABCD0123" >"$FW"
+ FW_INTO_BUF="$FWPATH/$TEST_FIRMWARE_INTO_BUF_FILENAME"
+ echo "EFGH4567" >"$FW_INTO_BUF"
NAME=$(basename "$FW")
if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
@@ -175,6 +189,9 @@ test_finish()
if [ -f $FW ]; then
rm -f "$FW"
fi
+ if [ -f $FW_INTO_BUF ]; then
+ rm -f "$FW_INTO_BUF"
+ fi
if [ -d $FWPATH ]; then
rm -rf "$FWPATH"
fi
diff --git a/tools/testing/selftests/ftrace/settings b/tools/testing/selftests/ftrace/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc
new file mode 100644
index 000000000000..d75a8695bc21
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc
@@ -0,0 +1,69 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test ftrace direct functions against tracers
+
+rmmod ftrace-direct ||:
+if ! modprobe ftrace-direct ; then
+ echo "No ftrace-direct sample module - please make CONFIG_SAMPLE_FTRACE_DIRECT=m"
+ exit_unresolved;
+fi
+
+echo "Let the module run a little"
+sleep 1
+
+grep -q "my_direct_func: waking up" trace
+
+rmmod ftrace-direct
+
+test_tracer() {
+ tracer=$1
+
+ # tracer -> direct -> no direct > no tracer
+ echo $tracer > current_tracer
+ modprobe ftrace-direct
+ rmmod ftrace-direct
+ echo nop > current_tracer
+
+ # tracer -> direct -> no tracer > no direct
+ echo $tracer > current_tracer
+ modprobe ftrace-direct
+ echo nop > current_tracer
+ rmmod ftrace-direct
+
+ # direct -> tracer -> no tracer > no direct
+ modprobe ftrace-direct
+ echo $tracer > current_tracer
+ echo nop > current_tracer
+ rmmod ftrace-direct
+
+ # direct -> tracer -> no direct > no notracer
+ modprobe ftrace-direct
+ echo $tracer > current_tracer
+ rmmod ftrace-direct
+ echo nop > current_tracer
+}
+
+for t in `cat available_tracers`; do
+ if [ "$t" != "nop" ]; then
+ test_tracer $t
+ fi
+done
+
+echo nop > current_tracer
+rmmod ftrace-direct ||:
+
+# Now do the same thing with another direct function registered
+echo "Running with another ftrace direct function"
+
+rmmod ftrace-direct-too ||:
+modprobe ftrace-direct-too
+
+for t in `cat available_tracers`; do
+ if [ "$t" != "nop" ]; then
+ test_tracer $t
+ fi
+done
+
+echo nop > current_tracer
+rmmod ftrace-direct ||:
+rmmod ftrace-direct-too ||:
diff --git a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc
new file mode 100644
index 000000000000..801ecb63e84c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc
@@ -0,0 +1,84 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test ftrace direct functions against kprobes
+
+rmmod ftrace-direct ||:
+if ! modprobe ftrace-direct ; then
+ echo "No ftrace-direct sample module - please build with CONFIG_SAMPLE_FTRACE_DIRECT=m"
+ exit_unresolved;
+fi
+
+if [ ! -f kprobe_events ]; then
+ echo "No kprobe_events file -please build CONFIG_KPROBE_EVENTS"
+ exit_unresolved;
+fi
+
+echo "Let the module run a little"
+sleep 1
+
+grep -q "my_direct_func: waking up" trace
+
+rmmod ftrace-direct
+
+echo 'p:kwake wake_up_process task=$arg1' > kprobe_events
+
+start_direct() {
+ echo > trace
+ modprobe ftrace-direct
+ sleep 1
+ grep -q "my_direct_func: waking up" trace
+}
+
+stop_direct() {
+ rmmod ftrace-direct
+}
+
+enable_probe() {
+ echo > trace
+ echo 1 > events/kprobes/kwake/enable
+ sleep 1
+ grep -q "kwake:" trace
+}
+
+disable_probe() {
+ echo 0 > events/kprobes/kwake/enable
+}
+
+test_kprobes() {
+ # probe -> direct -> no direct > no probe
+ enable_probe
+ start_direct
+ stop_direct
+ disable_probe
+
+ # probe -> direct -> no probe > no direct
+ enable_probe
+ start_direct
+ disable_probe
+ stop_direct
+
+ # direct -> probe -> no probe > no direct
+ start_direct
+ enable_probe
+ disable_probe
+ stop_direct
+
+ # direct -> probe -> no direct > no noprobe
+ start_direct
+ enable_probe
+ stop_direct
+ disable_probe
+}
+
+test_kprobes
+
+# Now do this with a second registered direct function
+echo "Running with another ftrace direct function"
+
+modprobe ftrace-direct-too
+
+test_kprobes
+
+rmmod ftrace-direct-too
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 27a54a17da65..f4e92afab14b 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -30,7 +30,7 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$'
ftrace_filter_check 'schedule*' '^schedule.*$'
# filter by *mid*end
-ftrace_filter_check '*aw*lock' '.*aw.*lock$'
+ftrace_filter_check '*pin*lock' '.*pin.*lock$'
# filter by start*mid*
ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 36fb59f886ea..1a52f2883fe0 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -3,6 +3,8 @@
# description: ftrace - stacktrace filter command
# flags: instance
+[ ! -f set_ftrace_filter ] && exit_unsupported
+
echo _do_fork:stacktrace >> set_ftrace_filter
grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
index 86a1f07ef2ca..71fa3f49e35e 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
@@ -15,6 +15,11 @@ if [ $NP -eq 1 ] ;then
exit_unresolved
fi
+if ! grep -q "function" available_tracers ; then
+ echo "Function trace is not enabled"
+ exit_unsupported
+fi
+
ORIG_CPUMASK=`cat tracing_cpumask`
do_reset() {
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 1d96c5f7e402..5d4550591ff9 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -46,6 +46,9 @@ reset_events_filter() { # reset all current setting filters
}
reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ if [ ! -f set_ftrace_filter ]; then
+ return 0
+ fi
echo > set_ftrace_filter
grep -v '^#' set_ftrace_filter | while read t; do
tr=`echo $t | cut -d: -f2`
@@ -93,7 +96,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
disable_events
[ -f set_event_pid ] && echo > set_event_pid
[ -f set_ftrace_pid ] && echo > set_ftrace_pid
- [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
+ [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
[ -f set_graph_function ] && echo | tee set_graph_*
[ -f stack_trace_filter ] && echo > stack_trace_filter
[ -f kprobe_events ] && echo > kprobe_events
@@ -115,7 +118,7 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
command=$(echo "$2" | tr -d ^)
echo "Test command: $command"
echo > error_log
- (! echo "$command" > "$3" ) 2> /dev/null
+ (! echo "$command" >> "$3" ) 2> /dev/null
grep "$1: error:" -A 3 error_log
N=$(tail -n 1 error_log | wc -c)
# " Command: " and "^\n" => 13
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 3fb70e01b1fe..3ff236719b6e 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -24,7 +24,21 @@ test -d events/kprobes2/event2 || exit_failure
:;: "Add an event on dot function without name" ;:
-FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+find_dot_func() {
+ if [ ! -f available_filter_functions ]; then
+ grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "
+ return;
+ fi
+
+ grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do
+ if grep -s $f available_filter_functions; then
+ echo $f
+ break
+ fi
+ done
+}
+
+FUNC=`find_dot_func | tail -n 1`
[ "x" != "x$FUNC" ] || exit_unresolved
echo "p $FUNC" > kprobe_events
EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
new file mode 100644
index 000000000000..44494bac86d1
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Create/delete multiprobe on kprobe event
+
+[ -f kprobe_events ] || exit_unsupported
+
+grep -q "Create/append/" README || exit_unsupported
+
+# Choose 2 symbols for target
+SYM1=_do_fork
+SYM2=do_exit
+EVENT_NAME=kprobes/testevent
+
+DEF1="p:$EVENT_NAME $SYM1"
+DEF2="p:$EVENT_NAME $SYM2"
+
+:;: "Define an event which has 2 probes" ;:
+echo $DEF1 >> kprobe_events
+echo $DEF2 >> kprobe_events
+cat kprobe_events | grep "$DEF1"
+cat kprobe_events | grep "$DEF2"
+
+:;: "Remove the event by name (should remove both)" ;:
+echo "-:$EVENT_NAME" >> kprobe_events
+test `cat kprobe_events | wc -l` -eq 0
+
+:;: "Remove just 1 event" ;:
+echo $DEF1 >> kprobe_events
+echo $DEF2 >> kprobe_events
+echo "-:$EVENT_NAME $SYM1" >> kprobe_events
+! cat kprobe_events | grep "$DEF1"
+cat kprobe_events | grep "$DEF2"
+
+:;: "Appending different type must fail" ;:
+! echo "$DEF1 \$stack" >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 29faaec942c6..ef1e9bafb098 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -41,6 +41,11 @@ check_error 'p vfs_read ^%none_reg' # BAD_REG_NAME
check_error 'p vfs_read ^@12345678abcde' # BAD_MEM_ADDR
check_error 'p vfs_read ^@+10' # FILE_ON_KPROBE
+grep -q "imm-value" README && \
+check_error 'p vfs_read arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 'p vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
check_error 'p vfs_read ^+0@0)' # DEREF_NEED_BRACE
check_error 'p vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS
check_error 'p vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE
@@ -82,4 +87,15 @@ case $(uname -m) in
;;
esac
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo 'p:kprobes/testevent _do_fork' > kprobe_events
+check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
+echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 5862eee91e1d..6e3dbe5f96b7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -20,9 +20,9 @@ while read i; do
test $N -eq 256 && break
done
-L=`wc -l kprobe_events`
-if [ $L -ne $N ]; then
- echo "The number of kprobes events ($L) is not $N"
+L=`cat kprobe_events | wc -l`
+if [ $L -ne 256 ]; then
+ echo "The number of kprobes events ($L) is not 256"
exit_fail
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 1221240f8cf6..3f2aee115f6e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -21,10 +21,10 @@ grep -q "snapshot()" README || exit_unsupported # version issue
echo "Test expected snapshot action failure"
-echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail
+echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> events/sched/sched_waking/trigger && exit_fail
echo "Test expected save action failure"
-echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail
+echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> events/sched/sched_waking/trigger && exit_fail
exit_xfail
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index 064a284e4e75..c80007aa9f86 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -16,7 +16,7 @@ grep -q "onchange(var)" README || exit_unsupported # version issue
echo "Test onchange action"
-echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
ping $LOCALHOST -c 3
nice -n 1 ping $LOCALHOST -c 3
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 18fff69fc433..f546c1b66a9b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -23,9 +23,9 @@ grep -q "snapshot()" README || exit_unsupported # version issue
echo "Test snapshot action"
-echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+echo 1 > events/sched/enable
-echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger
ping $LOCALHOST -c 3
nice -n 1 ping $LOCALHOST -c 3
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc
new file mode 100644
index 000000000000..d44087a2f3d1
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram parser errors
+
+if [ ! -f set_event -o ! -d events/kmem ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/kmem/kmalloc/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/kmem/kmalloc/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+[ -f error_log ] || exit_unsupported
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'hist:kmem:kmalloc' "$1" 'events/kmem/kmalloc/trigger'
+}
+
+check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid,^junk' # INVALID_SORT_FIELD
+check_error 'hist:keys=common_pid:vals=bytes_req:^sort=' # EMPTY_ASSIGNMENT
+check_error 'hist:keys=common_pid:vals=bytes_req:^sort=common_pid,' # EMPTY_SORT_FIELD
+check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid.^junk' # INVALID_SORT_MODIFIER
+check_error 'hist:keys=common_pid:vals=bytes_req,bytes_alloc:^sort=common_pid,bytes_req,bytes_alloc' # TOO_MANY_SORT_FIELDS
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
index 7717c0a09686..ac738500d17f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then
exit_unsupported
fi
-echo "Test snapshot tigger"
+echo "Test snapshot trigger"
echo 0 > snapshot
echo 1 > events/sched/sched_process_fork/enable
( echo "forked")
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index a27e2eec3586..8b2b6088540d 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -38,16 +38,21 @@ main()
esac
fi
- install_dir=./kselftest
+ # Create working directory.
+ dest=`pwd`
+ install_work="$dest"/kselftest_install
+ install_name=kselftest
+ install_dir="$install_work"/"$install_name"
+ mkdir -p "$install_dir"
-# Run install using INSTALL_KSFT_PATH override to generate install
-# directory
-./kselftest_install.sh
-tar $copts kselftest${ext} $install_dir
-echo "Kselftest archive kselftest${ext} created!"
+ # Run install using INSTALL_KSFT_PATH override to generate install
+ # directory
+ ./kselftest_install.sh "$install_dir"
+ (cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name)
+ echo "Kselftest archive kselftest${ext} created!"
-# clean up install directory
-rm -rf kselftest
+ # clean up top-level install work directory
+ rm -rf "$install_work"
}
main "$@"
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index e700e09e3682..af7f9c7d59bc 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -54,9 +54,9 @@ static const struct {
{ RC_PROTO_RC6_MCE, "rc-6-mce", 0x00007fff, "rc-6" },
{ RC_PROTO_SHARP, "sharp", 0x1fff, "sharp" },
{ RC_PROTO_IMON, "imon", 0x7fffffff, "imon" },
- { RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rcmm" },
- { RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rcmm" },
- { RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rcmm" },
+ { RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rc-mm" },
+ { RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rc-mm" },
+ { RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rc-mm" },
};
int lirc_open(const char *rc)
diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh
index fa7c24e8eefb..2ff600388c30 100755
--- a/tools/testing/selftests/kexec/test_kexec_file_load.sh
+++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh
@@ -37,11 +37,20 @@ is_ima_sig_required()
# sequentially. As a result, a policy rule may be defined, but
# might not necessarily be used. This test assumes if a policy
# rule is specified, that is the intent.
+
+ # First check for appended signature (modsig), then xattr
if [ $ima_read_policy -eq 1 ]; then
check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
- "appraise_type=imasig"
+ "appraise_type=imasig|modsig"
ret=$?
- [ $ret -eq 1 ] && log_info "IMA signature required";
+ if [ $ret -eq 1 ]; then
+ log_info "IMA or appended(modsig) signature required"
+ else
+ check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \
+ "appraise_type=imasig"
+ ret=$?
+ [ $ret -eq 1 ] && log_info "IMA signature required";
+ fi
fi
return $ret
}
@@ -84,6 +93,22 @@ check_for_imasig()
return $ret
}
+# Return 1 for appended signature (modsig) found and 0 for not found.
+check_for_modsig()
+{
+ local module_sig_string="~Module signature appended~"
+ local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
+ local ret=0
+
+ if [ "$sig" == "$module_sig_string" ]; then
+ ret=1
+ log_info "kexec kernel image modsig signed"
+ else
+ log_info "kexec kernel image not modsig signed"
+ fi
+ return $ret
+}
+
kexec_file_load_test()
{
local succeed_msg="kexec_file_load succeeded"
@@ -98,7 +123,8 @@ kexec_file_load_test()
# In secureboot mode with an architecture specific
# policy, make sure either an IMA or PE signature exists.
if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ] && \
- [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ]; then
+ [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ] \
+ && [ $ima_modsig -eq 0 ]; then
log_fail "$succeed_msg (missing sig)"
fi
@@ -107,7 +133,8 @@ kexec_file_load_test()
log_fail "$succeed_msg (missing PE sig)"
fi
- if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ]; then
+ if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ] \
+ && [ $ima_modsig -eq 0 ]; then
log_fail "$succeed_msg (missing IMA sig)"
fi
@@ -204,5 +231,8 @@ pe_signed=$?
check_for_imasig
ima_signed=$?
+check_for_modsig
+ima_modsig=$?
+
# Test loading the kernel image via kexec_file_load syscall
kexec_file_load_test
diff --git a/tools/testing/selftests/kselftest_module.sh b/tools/testing/selftests/kselftest/module.sh
index 18e1c7992d30..fb4733faff12 100755
--- a/tools/testing/selftests/kselftest_module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -9,7 +9,7 @@
#
# #!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
-# $(dirname $0)/../kselftest_module.sh "description" module_name
+# $(dirname $0)/../kselftest/module.sh "description" module_name
#
# Example: tools/testing/selftests/lib/printf.sh
diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl
index ec7e48118183..31f7c2a0a8bd 100755
--- a/tools/testing/selftests/kselftest/prefix.pl
+++ b/tools/testing/selftests/kselftest/prefix.pl
@@ -3,6 +3,7 @@
# Prefix all lines with "# ", unbuffered. Command being piped in may need
# to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
use strict;
+use IO::Handle;
binmode STDIN;
binmode STDOUT;
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 00c9020bdda8..e84d901f8567 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -3,9 +3,14 @@
#
# Runs a set of tests in a given subdirectory.
export skip_rc=4
+export timeout_rc=124
export logfile=/dev/stdout
export per_test_logging=
+# Defaults for "settings" file fields:
+# "timeout" how many seconds to let each test run before failing.
+export kselftest_default_timeout=45
+
# There isn't a shell-agnostic way to find the path of a sourced file,
# so we must rely on BASE_DIR being set to find other tools.
if [ -z "$BASE_DIR" ]; then
@@ -24,6 +29,16 @@ tap_prefix()
fi
}
+tap_timeout()
+{
+ # Make sure tests will time out if utility is available.
+ if [ -x /usr/bin/timeout ] ; then
+ /usr/bin/timeout "$kselftest_timeout" "$1"
+ else
+ "$1"
+ fi
+}
+
run_one()
{
DIR="$1"
@@ -32,6 +47,18 @@ run_one()
BASENAME_TEST=$(basename $TEST)
+ # Reset any "settings"-file variables.
+ export kselftest_timeout="$kselftest_default_timeout"
+ # Load per-test-directory kselftest "settings" file.
+ settings="$BASE_DIR/$DIR/settings"
+ if [ -r "$settings" ] ; then
+ while read line ; do
+ field=$(echo "$line" | cut -d= -f1)
+ value=$(echo "$line" | cut -d= -f2-)
+ eval "kselftest_$field"="$value"
+ done < "$settings"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
if [ ! -x "$TEST" ]; then
@@ -44,14 +71,18 @@ run_one()
echo "not ok $test_num $TEST_HDR_MSG"
else
cd `dirname $TEST` > /dev/null
- (((((./$BASENAME_TEST 2>&1; echo $? >&3) |
+ ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
tap_prefix >&4) 3>&1) |
(read xs; exit $xs)) 4>>"$logfile" &&
echo "ok $test_num $TEST_HDR_MSG") ||
- (if [ $? -eq $skip_rc ]; then \
+ (rc=$?; \
+ if [ $rc -eq $skip_rc ]; then \
echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ elif [ $rc -eq $timeout_rc ]; then \
+ echo "#"
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
else
- echo "not ok $test_num $TEST_HDR_MSG"
+ echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
cd - >/dev/null
fi
@@ -60,7 +91,7 @@ run_one()
run_many()
{
echo "TAP version 13"
- DIR=$(basename "$PWD")
+ DIR="${PWD#${BASE_DIR}/}"
test_num=0
total=$(echo "$@" | wc -w)
echo "1..$total"
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
index ec304463883c..407af7da7037 100755
--- a/tools/testing/selftests/kselftest_install.sh
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -6,30 +6,30 @@
# Author: Shuah Khan <shuahkh@osg.samsung.com>
# Copyright (C) 2015 Samsung Electronics Co., Ltd.
-install_loc=`pwd`
-
main()
{
- if [ $(basename $install_loc) != "selftests" ]; then
+ base_dir=`pwd`
+ install_dir="$base_dir"/kselftest_install
+
+ # Make sure we're in the selftests top-level directory.
+ if [ $(basename "$base_dir") != "selftests" ]; then
echo "$0: Please run it in selftests directory ..."
exit 1;
fi
+
+ # Only allow installation into an existing location.
if [ "$#" -eq 0 ]; then
- echo "$0: Installing in default location - $install_loc ..."
+ echo "$0: Installing in default location - $install_dir ..."
elif [ ! -d "$1" ]; then
echo "$0: $1 doesn't exist!!"
exit 1;
else
- install_loc=$1
- echo "$0: Installing in specified location - $install_loc ..."
+ install_dir="$1"
+ echo "$0: Installing in specified location - $install_dir ..."
fi
- install_dir=$install_loc/kselftest
-
-# Create install directory
- mkdir -p $install_dir
-# Build tests
- INSTALL_PATH=$install_dir make install
+ # Build tests
+ KSFT_INSTALL_PATH="$install_dir" make install
}
main "$@"
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index b35da375530a..30072c3f52fb 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
/s390x/sync_regs_test
+/s390x/memop
/x86_64/cr4_cpuid_sync_test
/x86_64/evmcs_test
/x86_64/hyperv_cpuid
@@ -9,8 +10,10 @@
/x86_64/state_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
+/x86_64/vmx_dirty_log_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/xss_msr_test
/clear_dirty_log_test
/dirty_log_test
/kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ba7849751989..d91c53b726e6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -7,10 +7,10 @@ top_srcdir = ../../../..
KSFT_KHDR_INSTALL := 1
UNAME_M := $(shell uname -m)
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
-LIBKVM_aarch64 = lib/aarch64/processor.c
-LIBKVM_s390x = lib/s390x/processor.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
@@ -22,8 +22,11 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
@@ -32,7 +35,10 @@ TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += s390x/resets
+TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
@@ -41,12 +47,14 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
- -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
+ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
+ -I$(<D) -Iinclude/$(UNAME_M) -I..
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+ $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
# On s390, build the testcases KVM-enabled
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index ceb52b952637..5614222a6628 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -19,15 +19,13 @@
#include "kvm_util.h"
#include "processor.h"
-#define DEBUG printf
-
#define VCPU_ID 1
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
-/* Default guest test memory offset, 1G */
-#define DEFAULT_GUEST_TEST_MEM 0x40000000
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
/* How many pages to dirty for each guest loop */
#define TEST_PAGES_PER_LOOP 1024
@@ -38,6 +36,27 @@
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/* Dirty bitmaps are always little endian, so we need to swap on big endian */
+#if defined(__s390x__)
+# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
+# define test_bit_le(nr, addr) \
+ test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define set_bit_le(nr, addr) \
+ set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define clear_bit_le(nr, addr) \
+ clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_set_bit_le(nr, addr) \
+ test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define test_and_clear_bit_le(nr, addr) \
+ test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+#else
+# define test_bit_le test_bit
+# define set_bit_le set_bit
+# define clear_bit_le clear_bit
+# define test_and_set_bit_le test_and_set_bit
+# define test_and_clear_bit_le test_and_clear_bit
+#endif
+
/*
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
* sync_global_to/from_guest() are used when accessing from
@@ -69,11 +88,23 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
*/
static void guest_code(void)
{
+ uint64_t addr;
int i;
+ /*
+ * On s390x, all pages of a 1M segment are initially marked as dirty
+ * when a page of the segment is written to for the very first time.
+ * To compensate this specialty in this test, we need to touch all
+ * pages during the first iteration.
+ */
+ for (i = 0; i < guest_num_pages; i++) {
+ addr = guest_test_virt_mem + i * guest_page_size;
+ *(uint64_t *)addr = READ_ONCE(iteration);
+ }
+
while (true) {
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
- uint64_t addr = guest_test_virt_mem;
+ addr = guest_test_virt_mem;
addr += (READ_ONCE(random_array[i]) % guest_num_pages)
* guest_page_size;
addr &= ~(host_page_size - 1);
@@ -158,15 +189,15 @@ static void vm_dirty_log_verify(unsigned long *bmap)
value_ptr = host_test_mem + page * host_page_size;
/* If this is a special page that we were tracking... */
- if (test_and_clear_bit(page, host_bmap_track)) {
+ if (test_and_clear_bit_le(page, host_bmap_track)) {
host_track_next_count++;
- TEST_ASSERT(test_bit(page, bmap),
+ TEST_ASSERT(test_bit_le(page, bmap),
"Page %"PRIu64" should have its dirty bit "
"set in this iteration but it is missing",
page);
}
- if (test_bit(page, bmap)) {
+ if (test_bit_le(page, bmap)) {
host_dirty_count++;
/*
* If the bit is set, the value written onto
@@ -209,21 +240,19 @@ static void vm_dirty_log_verify(unsigned long *bmap)
* should report its dirtyness in the
* next run
*/
- set_bit(page, host_bmap_track);
+ set_bit_le(page, host_bmap_track);
}
}
}
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
- uint64_t extra_mem_pages, void *guest_code,
- unsigned long type)
+ uint64_t extra_mem_pages, void *guest_code)
{
struct kvm_vm *vm;
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
- vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
- O_RDWR, type);
+ vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
#ifdef __x86_64__
vm_create_irqchip(vm);
@@ -232,85 +261,61 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
return vm;
}
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K 12
+
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
unsigned long interval, uint64_t phys_offset)
{
- unsigned int guest_pa_bits, guest_page_shift;
pthread_t vcpu_thread;
struct kvm_vm *vm;
- uint64_t max_gfn;
unsigned long *bmap;
- unsigned long type = 0;
-
- switch (mode) {
- case VM_MODE_P52V48_4K:
- guest_pa_bits = 52;
- guest_page_shift = 12;
- break;
- case VM_MODE_P52V48_64K:
- guest_pa_bits = 52;
- guest_page_shift = 16;
- break;
- case VM_MODE_P48V48_4K:
- guest_pa_bits = 48;
- guest_page_shift = 12;
- break;
- case VM_MODE_P48V48_64K:
- guest_pa_bits = 48;
- guest_page_shift = 16;
- break;
- case VM_MODE_P40V48_4K:
- guest_pa_bits = 40;
- guest_page_shift = 12;
- break;
- case VM_MODE_P40V48_64K:
- guest_pa_bits = 40;
- guest_page_shift = 16;
- break;
- default:
- TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
- }
- DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-#ifdef __x86_64__
/*
- * FIXME
- * The x86_64 kvm selftests framework currently only supports a
- * single PML4 which restricts the number of physical address
- * bits we can change to 39.
+ * We reserve page table for 2 times of extra dirty mem which
+ * will definitely cover the original (1G+) test range. Here
+ * we do the calculation with 4K page size which is the
+ * smallest so the page number will be enough for all archs
+ * (e.g., 64K page size guest will need even less memory for
+ * page tables).
*/
- guest_pa_bits = 39;
-#endif
-#ifdef __aarch64__
- if (guest_pa_bits != 40)
- type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits);
-#endif
- max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
- guest_page_size = (1ul << guest_page_shift);
+ vm = create_vm(mode, VCPU_ID,
+ 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
+ guest_code);
+
+ guest_page_size = vm_get_page_size(vm);
/*
* A little more than 1G of guest page sized pages. Cover the
* case where the size is not aligned to 64 pages.
*/
- guest_num_pages = (1ul << (30 - guest_page_shift)) + 16;
+ guest_num_pages = (1ul << (DIRTY_MEM_BITS -
+ vm_get_page_shift(vm))) + 16;
+#ifdef __s390x__
+ /* Round up to multiple of 1M (segment size) */
+ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
+#endif
host_page_size = getpagesize();
host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
!!((guest_num_pages * guest_page_size) % host_page_size);
if (!phys_offset) {
- guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size;
+ guest_test_phys_mem = (vm_get_max_gfn(vm) -
+ guest_num_pages) * guest_page_size;
guest_test_phys_mem &= ~(host_page_size - 1);
} else {
guest_test_phys_mem = phys_offset;
}
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
bmap = bitmap_alloc(host_num_pages);
host_bmap_track = bitmap_alloc(host_num_pages);
- vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type);
-
#ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = {};
@@ -337,7 +342,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
#endif
#ifdef __aarch64__
- ucall_init(vm, UCALL_MMIO, NULL);
+ ucall_init(vm, NULL);
#endif
/* Export the shared variables to the guest */
@@ -440,7 +445,7 @@ int main(int argc, char *argv[])
#endif
#ifdef __x86_64__
- vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true);
+ vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
#endif
#ifdef __aarch64__
vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
@@ -454,6 +459,9 @@ int main(int argc, char *argv[])
vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true);
}
#endif
+#ifdef __s390x__
+ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
+#endif
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
switch (opt) {
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
index 4059014d93ea..4912d23844bc 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -220,6 +220,8 @@ struct hv_enlightened_vmcs {
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
+int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
+
static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
{
u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index e0e66b115ef2..ae0d14c2540a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -24,6 +24,12 @@ struct kvm_vm;
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+#ifndef NDEBUG
+#define DEBUG(...) printf(__VA_ARGS__);
+#else
+#define DEBUG(...)
+#endif
+
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
@@ -38,11 +44,14 @@ enum vm_guest_mode {
VM_MODE_P48V48_64K,
VM_MODE_P40V48_4K,
VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
NUM_VM_MODES,
};
-#ifdef __aarch64__
+#if defined(__aarch64__)
#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#elif defined(__x86_64__)
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
#else
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
#endif
@@ -60,8 +69,7 @@ int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
- int perm, unsigned long type);
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
@@ -117,6 +125,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs);
int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs);
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
#ifdef __KVM_HAVE_VCPU_EVENTS
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
@@ -146,6 +160,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+
struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
uint64_t end);
@@ -165,12 +183,6 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
memcpy(&(g), _p, sizeof(g)); \
})
-/* ucall implementation types */
-typedef enum {
- UCALL_PIO,
- UCALL_MMIO,
-} ucall_type_t;
-
/* Common ucalls */
enum {
UCALL_NONE,
@@ -186,7 +198,7 @@ struct ucall {
uint64_t args[UCALL_MAX_ARGS];
};
-void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
+void ucall_init(struct kvm_vm *vm, void *arg);
void ucall_uninit(struct kvm_vm *vm);
void ucall(uint64_t cmd, int nargs, ...);
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 80d19740d2dc..7428513a4c68 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -11,6 +11,8 @@
#include <assert.h>
#include <stdint.h>
+#include <asm/msr-index.h>
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -34,24 +36,24 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
-/* The enum values match the intruction encoding of each register */
-enum x86_register {
- RAX = 0,
- RCX,
- RDX,
- RBX,
- RSP,
- RBP,
- RSI,
- RDI,
- R8,
- R9,
- R10,
- R11,
- R12,
- R13,
- R14,
- R15,
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 rsp;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
};
struct desc64 {
@@ -218,20 +220,20 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
-static inline uint64_t get_gdt_base(void)
+static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
__asm__ __volatile__("sgdt %[gdt]"
: /* output */ [gdt]"=m"(gdt));
- return gdt.address;
+ return gdt;
}
-static inline uint64_t get_idt_base(void)
+static inline struct desc_ptr get_idt(void)
{
struct desc_ptr idt;
__asm__ __volatile__("sidt %[idt]"
: /* output */ [idt]"=m"(idt));
- return idt.address;
+ return idt;
}
#define SET_XMM(__var, __xmm) \
@@ -308,6 +310,8 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+struct kvm_msr_list *kvm_get_msr_index_list(void);
+
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
@@ -322,9 +326,15 @@ kvm_get_supported_cpuid_entry(uint32_t function)
}
uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
uint64_t msr_value);
+uint32_t kvm_get_cpuid_max_basic(void);
+uint32_t kvm_get_cpuid_max_extended(void);
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+
/*
* Basic CPU control in CR0
*/
@@ -340,444 +350,6 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-/*
- * CPU model specific register (MSR) numbers.
- */
-
-/* x86-64 specific MSRs */
-#define MSR_EFER 0xc0000080 /* extended feature register */
-#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
-#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
-#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
-#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
-#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
-#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
-#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
-#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
-
-/* EFER bits: */
-#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
-#define EFER_LME (1<<8) /* Long mode enable */
-#define EFER_LMA (1<<10) /* Long mode active (read-only) */
-#define EFER_NX (1<<11) /* No execute enable */
-#define EFER_SVME (1<<12) /* Enable virtualization */
-#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
-#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
-
-/* Intel MSRs. Some also available on other CPUs */
-
-#define MSR_PPIN_CTL 0x0000004e
-#define MSR_PPIN 0x0000004f
-
-#define MSR_IA32_PERFCTR0 0x000000c1
-#define MSR_IA32_PERFCTR1 0x000000c2
-#define MSR_FSB_FREQ 0x000000cd
-#define MSR_PLATFORM_INFO 0x000000ce
-#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
-#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
-
-#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
-#define NHM_C3_AUTO_DEMOTE (1UL << 25)
-#define NHM_C1_AUTO_DEMOTE (1UL << 26)
-#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
-#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
-
-#define MSR_MTRRcap 0x000000fe
-#define MSR_IA32_BBL_CR_CTL 0x00000119
-#define MSR_IA32_BBL_CR_CTL3 0x0000011e
-
-#define MSR_IA32_SYSENTER_CS 0x00000174
-#define MSR_IA32_SYSENTER_ESP 0x00000175
-#define MSR_IA32_SYSENTER_EIP 0x00000176
-
-#define MSR_IA32_MCG_CAP 0x00000179
-#define MSR_IA32_MCG_STATUS 0x0000017a
-#define MSR_IA32_MCG_CTL 0x0000017b
-#define MSR_IA32_MCG_EXT_CTL 0x000004d0
-
-#define MSR_OFFCORE_RSP_0 0x000001a6
-#define MSR_OFFCORE_RSP_1 0x000001a7
-#define MSR_TURBO_RATIO_LIMIT 0x000001ad
-#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
-#define MSR_TURBO_RATIO_LIMIT2 0x000001af
-
-#define MSR_LBR_SELECT 0x000001c8
-#define MSR_LBR_TOS 0x000001c9
-#define MSR_LBR_NHM_FROM 0x00000680
-#define MSR_LBR_NHM_TO 0x000006c0
-#define MSR_LBR_CORE_FROM 0x00000040
-#define MSR_LBR_CORE_TO 0x00000060
-
-#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
-#define LBR_INFO_MISPRED BIT_ULL(63)
-#define LBR_INFO_IN_TX BIT_ULL(62)
-#define LBR_INFO_ABORT BIT_ULL(61)
-#define LBR_INFO_CYCLES 0xffff
-
-#define MSR_IA32_PEBS_ENABLE 0x000003f1
-#define MSR_IA32_DS_AREA 0x00000600
-#define MSR_IA32_PERF_CAPABILITIES 0x00000345
-#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-
-#define MSR_IA32_RTIT_CTL 0x00000570
-#define MSR_IA32_RTIT_STATUS 0x00000571
-#define MSR_IA32_RTIT_ADDR0_A 0x00000580
-#define MSR_IA32_RTIT_ADDR0_B 0x00000581
-#define MSR_IA32_RTIT_ADDR1_A 0x00000582
-#define MSR_IA32_RTIT_ADDR1_B 0x00000583
-#define MSR_IA32_RTIT_ADDR2_A 0x00000584
-#define MSR_IA32_RTIT_ADDR2_B 0x00000585
-#define MSR_IA32_RTIT_ADDR3_A 0x00000586
-#define MSR_IA32_RTIT_ADDR3_B 0x00000587
-#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
-#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
-#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
-
-#define MSR_MTRRfix64K_00000 0x00000250
-#define MSR_MTRRfix16K_80000 0x00000258
-#define MSR_MTRRfix16K_A0000 0x00000259
-#define MSR_MTRRfix4K_C0000 0x00000268
-#define MSR_MTRRfix4K_C8000 0x00000269
-#define MSR_MTRRfix4K_D0000 0x0000026a
-#define MSR_MTRRfix4K_D8000 0x0000026b
-#define MSR_MTRRfix4K_E0000 0x0000026c
-#define MSR_MTRRfix4K_E8000 0x0000026d
-#define MSR_MTRRfix4K_F0000 0x0000026e
-#define MSR_MTRRfix4K_F8000 0x0000026f
-#define MSR_MTRRdefType 0x000002ff
-
-#define MSR_IA32_CR_PAT 0x00000277
-
-#define MSR_IA32_DEBUGCTLMSR 0x000001d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
-#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
-#define MSR_IA32_LASTINTFROMIP 0x000001dd
-#define MSR_IA32_LASTINTTOIP 0x000001de
-
-/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
-#define DEBUGCTLMSR_BTF_SHIFT 1
-#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
-#define DEBUGCTLMSR_TR (1UL << 6)
-#define DEBUGCTLMSR_BTS (1UL << 7)
-#define DEBUGCTLMSR_BTINT (1UL << 8)
-#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
-#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
-#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
-#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
-#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
-
-#define MSR_PEBS_FRONTEND 0x000003f7
-
-#define MSR_IA32_POWER_CTL 0x000001fc
-
-#define MSR_IA32_MC0_CTL 0x00000400
-#define MSR_IA32_MC0_STATUS 0x00000401
-#define MSR_IA32_MC0_ADDR 0x00000402
-#define MSR_IA32_MC0_MISC 0x00000403
-
-/* C-state Residency Counters */
-#define MSR_PKG_C3_RESIDENCY 0x000003f8
-#define MSR_PKG_C6_RESIDENCY 0x000003f9
-#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
-#define MSR_PKG_C7_RESIDENCY 0x000003fa
-#define MSR_CORE_C3_RESIDENCY 0x000003fc
-#define MSR_CORE_C6_RESIDENCY 0x000003fd
-#define MSR_CORE_C7_RESIDENCY 0x000003fe
-#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
-#define MSR_PKG_C2_RESIDENCY 0x0000060d
-#define MSR_PKG_C8_RESIDENCY 0x00000630
-#define MSR_PKG_C9_RESIDENCY 0x00000631
-#define MSR_PKG_C10_RESIDENCY 0x00000632
-
-/* Interrupt Response Limit */
-#define MSR_PKGC3_IRTL 0x0000060a
-#define MSR_PKGC6_IRTL 0x0000060b
-#define MSR_PKGC7_IRTL 0x0000060c
-#define MSR_PKGC8_IRTL 0x00000633
-#define MSR_PKGC9_IRTL 0x00000634
-#define MSR_PKGC10_IRTL 0x00000635
-
-/* Run Time Average Power Limiting (RAPL) Interface */
-
-#define MSR_RAPL_POWER_UNIT 0x00000606
-
-#define MSR_PKG_POWER_LIMIT 0x00000610
-#define MSR_PKG_ENERGY_STATUS 0x00000611
-#define MSR_PKG_PERF_STATUS 0x00000613
-#define MSR_PKG_POWER_INFO 0x00000614
-
-#define MSR_DRAM_POWER_LIMIT 0x00000618
-#define MSR_DRAM_ENERGY_STATUS 0x00000619
-#define MSR_DRAM_PERF_STATUS 0x0000061b
-#define MSR_DRAM_POWER_INFO 0x0000061c
-
-#define MSR_PP0_POWER_LIMIT 0x00000638
-#define MSR_PP0_ENERGY_STATUS 0x00000639
-#define MSR_PP0_POLICY 0x0000063a
-#define MSR_PP0_PERF_STATUS 0x0000063b
-
-#define MSR_PP1_POWER_LIMIT 0x00000640
-#define MSR_PP1_ENERGY_STATUS 0x00000641
-#define MSR_PP1_POLICY 0x00000642
-
-/* Config TDP MSRs */
-#define MSR_CONFIG_TDP_NOMINAL 0x00000648
-#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
-#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
-#define MSR_CONFIG_TDP_CONTROL 0x0000064B
-#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
-
-#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
-
-#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
-#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
-#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
-#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
-
-#define MSR_CORE_C1_RES 0x00000660
-#define MSR_MODULE_C6_RES_MS 0x00000664
-
-#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
-#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
-
-#define MSR_ATOM_CORE_RATIOS 0x0000066a
-#define MSR_ATOM_CORE_VIDS 0x0000066b
-#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
-#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
-
-
-#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
-#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
-#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
-
-/* Hardware P state interface */
-#define MSR_PPERF 0x0000064e
-#define MSR_PERF_LIMIT_REASONS 0x0000064f
-#define MSR_PM_ENABLE 0x00000770
-#define MSR_HWP_CAPABILITIES 0x00000771
-#define MSR_HWP_REQUEST_PKG 0x00000772
-#define MSR_HWP_INTERRUPT 0x00000773
-#define MSR_HWP_REQUEST 0x00000774
-#define MSR_HWP_STATUS 0x00000777
-
-/* CPUID.6.EAX */
-#define HWP_BASE_BIT (1<<7)
-#define HWP_NOTIFICATIONS_BIT (1<<8)
-#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
-#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
-#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
-
-/* IA32_HWP_CAPABILITIES */
-#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
-#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
-#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
-#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
-
-/* IA32_HWP_REQUEST */
-#define HWP_MIN_PERF(x) (x & 0xff)
-#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
-#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
-#define HWP_EPP_PERFORMANCE 0x00
-#define HWP_EPP_BALANCE_PERFORMANCE 0x80
-#define HWP_EPP_BALANCE_POWERSAVE 0xC0
-#define HWP_EPP_POWERSAVE 0xFF
-#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
-
-/* IA32_HWP_STATUS */
-#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
-#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
-
-/* IA32_HWP_INTERRUPT */
-#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
-#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
-
-#define MSR_AMD64_MC0_MASK 0xc0010044
-
-#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
-#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
-#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
-#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
-
-#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
-
-/* These are consecutive and not in the normal 4er MCE bank block */
-#define MSR_IA32_MC0_CTL2 0x00000280
-#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
-
-#define MSR_P6_PERFCTR0 0x000000c1
-#define MSR_P6_PERFCTR1 0x000000c2
-#define MSR_P6_EVNTSEL0 0x00000186
-#define MSR_P6_EVNTSEL1 0x00000187
-
-#define MSR_KNC_PERFCTR0 0x00000020
-#define MSR_KNC_PERFCTR1 0x00000021
-#define MSR_KNC_EVNTSEL0 0x00000028
-#define MSR_KNC_EVNTSEL1 0x00000029
-
-/* Alternative perfctr range with full access. */
-#define MSR_IA32_PMC0 0x000004c1
-
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
- complete list. */
-
-#define MSR_AMD64_PATCH_LEVEL 0x0000008b
-#define MSR_AMD64_TSC_RATIO 0xc0000104
-#define MSR_AMD64_NB_CFG 0xc001001f
-#define MSR_AMD64_PATCH_LOADER 0xc0010020
-#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
-#define MSR_AMD64_OSVW_STATUS 0xc0010141
-#define MSR_AMD64_LS_CFG 0xc0011020
-#define MSR_AMD64_DC_CFG 0xc0011022
-#define MSR_AMD64_BU_CFG2 0xc001102a
-#define MSR_AMD64_IBSFETCHCTL 0xc0011030
-#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
-#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
-#define MSR_AMD64_IBSFETCH_REG_COUNT 3
-#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
-#define MSR_AMD64_IBSOPCTL 0xc0011033
-#define MSR_AMD64_IBSOPRIP 0xc0011034
-#define MSR_AMD64_IBSOPDATA 0xc0011035
-#define MSR_AMD64_IBSOPDATA2 0xc0011036
-#define MSR_AMD64_IBSOPDATA3 0xc0011037
-#define MSR_AMD64_IBSDCLINAD 0xc0011038
-#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
-#define MSR_AMD64_IBSOP_REG_COUNT 7
-#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
-#define MSR_AMD64_IBSCTL 0xc001103a
-#define MSR_AMD64_IBSBRTARGET 0xc001103b
-#define MSR_AMD64_IBSOPDATA4 0xc001103d
-#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
-#define MSR_AMD64_SEV 0xc0010131
-#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
-
-/* Fam 17h MSRs */
-#define MSR_F17H_IRPERF 0xc00000e9
-
-/* Fam 16h MSRs */
-#define MSR_F16H_L2I_PERF_CTL 0xc0010230
-#define MSR_F16H_L2I_PERF_CTR 0xc0010231
-#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
-#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
-#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
-#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
-
-/* Fam 15h MSRs */
-#define MSR_F15H_PERF_CTL 0xc0010200
-#define MSR_F15H_PERF_CTR 0xc0010201
-#define MSR_F15H_NB_PERF_CTL 0xc0010240
-#define MSR_F15H_NB_PERF_CTR 0xc0010241
-#define MSR_F15H_PTSC 0xc0010280
-#define MSR_F15H_IC_CFG 0xc0011021
-
-/* Fam 10h MSRs */
-#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
-#define FAM10H_MMIO_CONF_ENABLE (1<<0)
-#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
-#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
-#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
-#define FAM10H_MMIO_CONF_BASE_SHIFT 20
-#define MSR_FAM10H_NODE_ID 0xc001100c
-#define MSR_F10H_DECFG 0xc0011029
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
-
-/* K8 MSRs */
-#define MSR_K8_TOP_MEM1 0xc001001a
-#define MSR_K8_TOP_MEM2 0xc001001d
-#define MSR_K8_SYSCFG 0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
-#define MSR_K8_INT_PENDING_MSG 0xc0010055
-/* C1E active bits in int pending message */
-#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
-#define MSR_K8_TSEG_ADDR 0xc0010112
-#define MSR_K8_TSEG_MASK 0xc0010113
-#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
-#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
-#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
-
-/* K7 MSRs */
-#define MSR_K7_EVNTSEL0 0xc0010000
-#define MSR_K7_PERFCTR0 0xc0010004
-#define MSR_K7_EVNTSEL1 0xc0010001
-#define MSR_K7_PERFCTR1 0xc0010005
-#define MSR_K7_EVNTSEL2 0xc0010002
-#define MSR_K7_PERFCTR2 0xc0010006
-#define MSR_K7_EVNTSEL3 0xc0010003
-#define MSR_K7_PERFCTR3 0xc0010007
-#define MSR_K7_CLK_CTL 0xc001001b
-#define MSR_K7_HWCR 0xc0010015
-#define MSR_K7_HWCR_SMMLOCK_BIT 0
-#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
-#define MSR_K7_FID_VID_CTL 0xc0010041
-#define MSR_K7_FID_VID_STATUS 0xc0010042
-
-/* K6 MSRs */
-#define MSR_K6_WHCR 0xc0000082
-#define MSR_K6_UWCCR 0xc0000085
-#define MSR_K6_EPMR 0xc0000086
-#define MSR_K6_PSOR 0xc0000087
-#define MSR_K6_PFIR 0xc0000088
-
-/* Centaur-Hauls/IDT defined MSRs. */
-#define MSR_IDT_FCR1 0x00000107
-#define MSR_IDT_FCR2 0x00000108
-#define MSR_IDT_FCR3 0x00000109
-#define MSR_IDT_FCR4 0x0000010a
-
-#define MSR_IDT_MCR0 0x00000110
-#define MSR_IDT_MCR1 0x00000111
-#define MSR_IDT_MCR2 0x00000112
-#define MSR_IDT_MCR3 0x00000113
-#define MSR_IDT_MCR4 0x00000114
-#define MSR_IDT_MCR5 0x00000115
-#define MSR_IDT_MCR6 0x00000116
-#define MSR_IDT_MCR7 0x00000117
-#define MSR_IDT_MCR_CTRL 0x00000120
-
-/* VIA Cyrix defined MSRs*/
-#define MSR_VIA_FCR 0x00001107
-#define MSR_VIA_LONGHAUL 0x0000110a
-#define MSR_VIA_RNG 0x0000110b
-#define MSR_VIA_BCR2 0x00001147
-
-/* Transmeta defined MSRs */
-#define MSR_TMTA_LONGRUN_CTRL 0x80868010
-#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
-#define MSR_TMTA_LRTI_READOUT 0x80868018
-#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
-
-/* Intel defined MSRs. */
-#define MSR_IA32_P5_MC_ADDR 0x00000000
-#define MSR_IA32_P5_MC_TYPE 0x00000001
-#define MSR_IA32_TSC 0x00000010
-#define MSR_IA32_PLATFORM_ID 0x00000017
-#define MSR_IA32_EBL_CR_POWERON 0x0000002a
-#define MSR_EBC_FREQUENCY_ID 0x0000002c
-#define MSR_SMI_COUNT 0x00000034
-#define MSR_IA32_FEATURE_CONTROL 0x0000003a
-#define MSR_IA32_TSC_ADJUST 0x0000003b
-#define MSR_IA32_BNDCFGS 0x00000d90
-
-#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
-
-#define MSR_IA32_XSS 0x00000da0
-
-#define FEATURE_CONTROL_LOCKED (1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
-#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
-#define FEATURE_CONTROL_LMCE (1<<20)
-
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
#define APIC_BASE_MSR 0x800
#define X2APIC_ENABLE (1UL << 10)
#define APIC_ICR 0x300
@@ -805,288 +377,7 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
-#define MSR_IA32_TSCDEADLINE 0x000006e0
-
-#define MSR_IA32_UCODE_WRITE 0x00000079
-#define MSR_IA32_UCODE_REV 0x0000008b
-
-#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
-#define MSR_IA32_SMBASE 0x0000009e
-
-#define MSR_IA32_PERF_STATUS 0x00000198
-#define MSR_IA32_PERF_CTL 0x00000199
-#define INTEL_PERF_CTL_MASK 0xffff
-#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
-#define MSR_AMD_PERF_STATUS 0xc0010063
-#define MSR_AMD_PERF_CTL 0xc0010062
-
-#define MSR_IA32_MPERF 0x000000e7
-#define MSR_IA32_APERF 0x000000e8
-
-#define MSR_IA32_THERM_CONTROL 0x0000019a
-#define MSR_IA32_THERM_INTERRUPT 0x0000019b
-
-#define THERM_INT_HIGH_ENABLE (1 << 0)
-#define THERM_INT_LOW_ENABLE (1 << 1)
-#define THERM_INT_PLN_ENABLE (1 << 24)
-
-#define MSR_IA32_THERM_STATUS 0x0000019c
-
-#define THERM_STATUS_PROCHOT (1 << 0)
-#define THERM_STATUS_POWER_LIMIT (1 << 10)
-
-#define MSR_THERM2_CTL 0x0000019d
-
-#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
-
-#define MSR_IA32_MISC_ENABLE 0x000001a0
-
-#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
-
-#define MSR_MISC_FEATURE_CONTROL 0x000001a4
-#define MSR_MISC_PWR_MGMT 0x000001aa
-
-#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
-#define ENERGY_PERF_BIAS_PERFORMANCE 0
-#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
-#define ENERGY_PERF_BIAS_NORMAL 6
-#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
-#define ENERGY_PERF_BIAS_POWERSAVE 15
-
-#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
-
-#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
-#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
-
-#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
-
-#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
-#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
-#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
-
-/* Thermal Thresholds Support */
-#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
-#define THERM_SHIFT_THRESHOLD0 8
-#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
-#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
-#define THERM_SHIFT_THRESHOLD1 16
-#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
-#define THERM_STATUS_THRESHOLD0 (1 << 6)
-#define THERM_LOG_THRESHOLD0 (1 << 7)
-#define THERM_STATUS_THRESHOLD1 (1 << 8)
-#define THERM_LOG_THRESHOLD1 (1 << 9)
-
-/* MISC_ENABLE bits: architectural */
-#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
-#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
-#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
-#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
-#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
-#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
-#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
-#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
-#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
-#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
-#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
-#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
-#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
-#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
-#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
-#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
-#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
-
-/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
-#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
-#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
-#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
-#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
-#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
-#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
-#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
-#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
-#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
-#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
-#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
-#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
-#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
-#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
-#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
-#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
-#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
-#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
-#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
-#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
-#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
-#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
-#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
-#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
-#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
-
-/* MISC_FEATURES_ENABLES non-architectural features */
-#define MSR_MISC_FEATURES_ENABLES 0x00000140
-
-#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
-#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
-#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
-
-#define MSR_IA32_TSC_DEADLINE 0x000006E0
-
-/* P4/Xeon+ specific */
-#define MSR_IA32_MCG_EAX 0x00000180
-#define MSR_IA32_MCG_EBX 0x00000181
-#define MSR_IA32_MCG_ECX 0x00000182
-#define MSR_IA32_MCG_EDX 0x00000183
-#define MSR_IA32_MCG_ESI 0x00000184
-#define MSR_IA32_MCG_EDI 0x00000185
-#define MSR_IA32_MCG_EBP 0x00000186
-#define MSR_IA32_MCG_ESP 0x00000187
-#define MSR_IA32_MCG_EFLAGS 0x00000188
-#define MSR_IA32_MCG_EIP 0x00000189
-#define MSR_IA32_MCG_RESERVED 0x0000018a
-
-/* Pentium IV performance counter MSRs */
-#define MSR_P4_BPU_PERFCTR0 0x00000300
-#define MSR_P4_BPU_PERFCTR1 0x00000301
-#define MSR_P4_BPU_PERFCTR2 0x00000302
-#define MSR_P4_BPU_PERFCTR3 0x00000303
-#define MSR_P4_MS_PERFCTR0 0x00000304
-#define MSR_P4_MS_PERFCTR1 0x00000305
-#define MSR_P4_MS_PERFCTR2 0x00000306
-#define MSR_P4_MS_PERFCTR3 0x00000307
-#define MSR_P4_FLAME_PERFCTR0 0x00000308
-#define MSR_P4_FLAME_PERFCTR1 0x00000309
-#define MSR_P4_FLAME_PERFCTR2 0x0000030a
-#define MSR_P4_FLAME_PERFCTR3 0x0000030b
-#define MSR_P4_IQ_PERFCTR0 0x0000030c
-#define MSR_P4_IQ_PERFCTR1 0x0000030d
-#define MSR_P4_IQ_PERFCTR2 0x0000030e
-#define MSR_P4_IQ_PERFCTR3 0x0000030f
-#define MSR_P4_IQ_PERFCTR4 0x00000310
-#define MSR_P4_IQ_PERFCTR5 0x00000311
-#define MSR_P4_BPU_CCCR0 0x00000360
-#define MSR_P4_BPU_CCCR1 0x00000361
-#define MSR_P4_BPU_CCCR2 0x00000362
-#define MSR_P4_BPU_CCCR3 0x00000363
-#define MSR_P4_MS_CCCR0 0x00000364
-#define MSR_P4_MS_CCCR1 0x00000365
-#define MSR_P4_MS_CCCR2 0x00000366
-#define MSR_P4_MS_CCCR3 0x00000367
-#define MSR_P4_FLAME_CCCR0 0x00000368
-#define MSR_P4_FLAME_CCCR1 0x00000369
-#define MSR_P4_FLAME_CCCR2 0x0000036a
-#define MSR_P4_FLAME_CCCR3 0x0000036b
-#define MSR_P4_IQ_CCCR0 0x0000036c
-#define MSR_P4_IQ_CCCR1 0x0000036d
-#define MSR_P4_IQ_CCCR2 0x0000036e
-#define MSR_P4_IQ_CCCR3 0x0000036f
-#define MSR_P4_IQ_CCCR4 0x00000370
-#define MSR_P4_IQ_CCCR5 0x00000371
-#define MSR_P4_ALF_ESCR0 0x000003ca
-#define MSR_P4_ALF_ESCR1 0x000003cb
-#define MSR_P4_BPU_ESCR0 0x000003b2
-#define MSR_P4_BPU_ESCR1 0x000003b3
-#define MSR_P4_BSU_ESCR0 0x000003a0
-#define MSR_P4_BSU_ESCR1 0x000003a1
-#define MSR_P4_CRU_ESCR0 0x000003b8
-#define MSR_P4_CRU_ESCR1 0x000003b9
-#define MSR_P4_CRU_ESCR2 0x000003cc
-#define MSR_P4_CRU_ESCR3 0x000003cd
-#define MSR_P4_CRU_ESCR4 0x000003e0
-#define MSR_P4_CRU_ESCR5 0x000003e1
-#define MSR_P4_DAC_ESCR0 0x000003a8
-#define MSR_P4_DAC_ESCR1 0x000003a9
-#define MSR_P4_FIRM_ESCR0 0x000003a4
-#define MSR_P4_FIRM_ESCR1 0x000003a5
-#define MSR_P4_FLAME_ESCR0 0x000003a6
-#define MSR_P4_FLAME_ESCR1 0x000003a7
-#define MSR_P4_FSB_ESCR0 0x000003a2
-#define MSR_P4_FSB_ESCR1 0x000003a3
-#define MSR_P4_IQ_ESCR0 0x000003ba
-#define MSR_P4_IQ_ESCR1 0x000003bb
-#define MSR_P4_IS_ESCR0 0x000003b4
-#define MSR_P4_IS_ESCR1 0x000003b5
-#define MSR_P4_ITLB_ESCR0 0x000003b6
-#define MSR_P4_ITLB_ESCR1 0x000003b7
-#define MSR_P4_IX_ESCR0 0x000003c8
-#define MSR_P4_IX_ESCR1 0x000003c9
-#define MSR_P4_MOB_ESCR0 0x000003aa
-#define MSR_P4_MOB_ESCR1 0x000003ab
-#define MSR_P4_MS_ESCR0 0x000003c0
-#define MSR_P4_MS_ESCR1 0x000003c1
-#define MSR_P4_PMH_ESCR0 0x000003ac
-#define MSR_P4_PMH_ESCR1 0x000003ad
-#define MSR_P4_RAT_ESCR0 0x000003bc
-#define MSR_P4_RAT_ESCR1 0x000003bd
-#define MSR_P4_SAAT_ESCR0 0x000003ae
-#define MSR_P4_SAAT_ESCR1 0x000003af
-#define MSR_P4_SSU_ESCR0 0x000003be
-#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
-
-#define MSR_P4_TBPU_ESCR0 0x000003c2
-#define MSR_P4_TBPU_ESCR1 0x000003c3
-#define MSR_P4_TC_ESCR0 0x000003c4
-#define MSR_P4_TC_ESCR1 0x000003c5
-#define MSR_P4_U2L_ESCR0 0x000003b0
-#define MSR_P4_U2L_ESCR1 0x000003b1
-
-#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
-
-/* Intel Core-based CPU performance counters */
-#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
-#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
-#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
-#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
-#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
-#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
-#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
-
-/* Geode defined MSRs */
-#define MSR_GEODE_BUSCONT_CONF0 0x00001900
-
-/* Intel VT MSRs */
-#define MSR_IA32_VMX_BASIC 0x00000480
-#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
-#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
-#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
-#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
-#define MSR_IA32_VMX_MISC 0x00000485
-#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
-#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
-#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
-#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
-#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
-#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
-#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
-#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
-#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
-#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
-#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
-#define MSR_IA32_VMX_VMFUNC 0x00000491
-
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT 32
-#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
-#define VMX_BASIC_64 0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT 50
-#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB 6LLU
-#define VMX_BASIC_INOUT 0x0040000000000000LLU
-
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
-/* AMD-V MSRs */
-
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_IGNNE 0xc0010115
-#define MSR_VM_HSAVE_PA 0xc0010117
+/* VMX_EPT_VPID_CAP bits */
+#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
new file mode 100644
index 000000000000..f4ea2355dbc2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm.h
+ * This is a copy of arch/x86/include/asm/svm.h
+ *
+ */
+
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+ INTERCEPT_INTR,
+ INTERCEPT_NMI,
+ INTERCEPT_SMI,
+ INTERCEPT_INIT,
+ INTERCEPT_VINTR,
+ INTERCEPT_SELECTIVE_CR0,
+ INTERCEPT_STORE_IDTR,
+ INTERCEPT_STORE_GDTR,
+ INTERCEPT_STORE_LDTR,
+ INTERCEPT_STORE_TR,
+ INTERCEPT_LOAD_IDTR,
+ INTERCEPT_LOAD_GDTR,
+ INTERCEPT_LOAD_LDTR,
+ INTERCEPT_LOAD_TR,
+ INTERCEPT_RDTSC,
+ INTERCEPT_RDPMC,
+ INTERCEPT_PUSHF,
+ INTERCEPT_POPF,
+ INTERCEPT_CPUID,
+ INTERCEPT_RSM,
+ INTERCEPT_IRET,
+ INTERCEPT_INTn,
+ INTERCEPT_INVD,
+ INTERCEPT_PAUSE,
+ INTERCEPT_HLT,
+ INTERCEPT_INVLPG,
+ INTERCEPT_INVLPGA,
+ INTERCEPT_IOIO_PROT,
+ INTERCEPT_MSR_PROT,
+ INTERCEPT_TASK_SWITCH,
+ INTERCEPT_FERR_FREEZE,
+ INTERCEPT_SHUTDOWN,
+ INTERCEPT_VMRUN,
+ INTERCEPT_VMMCALL,
+ INTERCEPT_VMLOAD,
+ INTERCEPT_VMSAVE,
+ INTERCEPT_STGI,
+ INTERCEPT_CLGI,
+ INTERCEPT_SKINIT,
+ INTERCEPT_RDTSCP,
+ INTERCEPT_ICEBP,
+ INTERCEPT_WBINVD,
+ INTERCEPT_MONITOR,
+ INTERCEPT_MWAIT,
+ INTERCEPT_MWAIT_COND,
+ INTERCEPT_XSETBV,
+ INTERCEPT_RDPRU,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ u32 intercept_cr;
+ u32 intercept_dr;
+ u32 intercept_exceptions;
+ u64 intercept;
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+ u64 iopm_base_pa;
+ u64 msrpm_base_pa;
+ u64 tsc_offset;
+ u32 asid;
+ u8 tlb_ctl;
+ u8 reserved_2[3];
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+ u8 reserved_3[4];
+ u32 exit_code;
+ u32 exit_code_hi;
+ u64 exit_info_1;
+ u64 exit_info_2;
+ u32 exit_int_info;
+ u32 exit_int_info_err;
+ u64 nested_ctl;
+ u64 avic_vapic_bar;
+ u8 reserved_4[8];
+ u32 event_inj;
+ u32 event_inj_err;
+ u64 nested_cr3;
+ u64 virt_ext;
+ u32 clean;
+ u32 reserved_5;
+ u64 next_rip;
+ u8 insn_len;
+ u8 insn_bytes[15];
+ u64 avic_backing_page; /* Offset 0xe0 */
+ u8 reserved_6[8]; /* Offset 0xe8 */
+ u64 avic_logical_id; /* Offset 0xf0 */
+ u64 avic_physical_id; /* Offset 0xf8 */
+ u8 reserved_7[768];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK 0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ u16 selector;
+ u16 attrib;
+ u32 limit;
+ u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u8 reserved_1[43];
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[112];
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u8 reserved_4[88];
+ u64 rsp;
+ u8 reserved_5[24];
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_6[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ 0
+#define INTERCEPT_CR3_READ 3
+#define INTERCEPT_CR4_READ 4
+#define INTERCEPT_CR8_READ 8
+#define INTERCEPT_CR0_WRITE (16 + 0)
+#define INTERCEPT_CR3_WRITE (16 + 3)
+#define INTERCEPT_CR4_WRITE (16 + 4)
+#define INTERCEPT_CR8_WRITE (16 + 8)
+
+#define INTERCEPT_DR0_READ 0
+#define INTERCEPT_DR1_READ 1
+#define INTERCEPT_DR2_READ 2
+#define INTERCEPT_DR3_READ 3
+#define INTERCEPT_DR4_READ 4
+#define INTERCEPT_DR5_READ 5
+#define INTERCEPT_DR6_READ 6
+#define INTERCEPT_DR7_READ 7
+#define INTERCEPT_DR0_WRITE (16 + 0)
+#define INTERCEPT_DR1_WRITE (16 + 1)
+#define INTERCEPT_DR2_WRITE (16 + 2)
+#define INTERCEPT_DR3_WRITE (16 + 3)
+#define INTERCEPT_DR4_WRITE (16 + 4)
+#define INTERCEPT_DR5_WRITE (16 + 5)
+#define INTERCEPT_DR6_WRITE (16 + 6)
+#define INTERCEPT_DR7_WRITE (16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
new file mode 100644
index 000000000000..cd037917fece
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
+ * Header for nested SVM testing
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+#define CPUID_SVM_BIT 2
+#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
+
+#define SVM_EXIT_VMMCALL 0x081
+
+struct svm_test_data {
+ /* VMCB */
+ struct vmcb *vmcb; /* gva */
+ void *vmcb_hva;
+ uint64_t vmcb_gpa;
+
+ /* host state-save area */
+ struct vmcb_save_area *save_area; /* gva */
+ void *save_area_hva;
+ uint64_t save_area_gpa;
+};
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+void nested_svm_check_supported(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 69b17055f63d..3d27069b9ed9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -18,8 +18,8 @@
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
-#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080
#define CPU_BASED_INVLPG_EXITING 0x00000200
#define CPU_BASED_MWAIT_EXITING 0x00000400
@@ -30,7 +30,7 @@
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_USE_IO_BITMAPS 0x02000000
@@ -103,7 +103,7 @@
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
#define EXIT_REASON_TRIPLE_FAULT 2
-#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_INTERRUPT_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
#define EXIT_REASON_TASK_SWITCH 9
#define EXIT_REASON_CPUID 10
@@ -569,6 +569,10 @@ struct vmx_pages {
void *enlightened_vmcs_hva;
uint64_t enlightened_vmcs_gpa;
void *enlightened_vmcs;
+
+ void *eptp_hva;
+ uint64_t eptp_gpa;
+ void *eptp;
};
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
@@ -576,4 +580,16 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
+void nested_vmx_check_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ uint32_t eptp_memslot);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t memslot, uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot);
+
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 231d79e57774..6f38c3dc0d56 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -29,12 +29,9 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- for (i = 0; i < num_vcpus; i++) {
- int vcpu_id = first_vcpu_id + i;
-
+ for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
/* This asserts that the vCPU was created. */
- vm_vcpu_add(vm, vcpu_id);
- }
+ vm_vcpu_add(vm, i);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 486400a97374..86036a59a668 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
case VM_MODE_P52V48_4K:
TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
"with 52-bit physical address ranges");
+ case VM_MODE_PXXV48_4K:
+ TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
+ "with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
new file mode 100644
index 000000000000..6cd91970fbad
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+ return false;
+
+ virt_pg_map(vm, gpa, gpa, 0);
+
+ ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+ sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+ return true;
+}
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+ vm_paddr_t gpa, start, end, step, offset;
+ unsigned int bits;
+ bool ret;
+
+ if (arg) {
+ gpa = (vm_paddr_t)arg;
+ ret = ucall_mmio_init(vm, gpa);
+ TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+ return;
+ }
+
+ /*
+ * Find an address within the allowed physical and virtual address
+ * spaces, that does _not_ have a KVM memory region associated with
+ * it. Identity mapping an address like this allows the guest to
+ * access it, but as KVM doesn't know what to do with it, it
+ * will assume it's something userspace handles and exit with
+ * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+ * Here we start with a guess that the addresses around 5/8th
+ * of the allowed space are unmapped and then work both down and
+ * up from there in 1/16th allowed space sized steps.
+ *
+ * Note, we need to use VA-bits - 1 when calculating the allowed
+ * virtual address space for an identity mapping because the upper
+ * half of the virtual address space is the two's complement of the
+ * lower and won't match physical addresses.
+ */
+ bits = vm->va_bits - 1;
+ bits = vm->pa_bits < bits ? vm->pa_bits : bits;
+ end = 1ul << bits;
+ start = end * 5 / 8;
+ step = end / 16;
+ for (offset = 0; offset < end - start; offset += step) {
+ if (ucall_mmio_init(vm, start - offset))
+ return;
+ if (ucall_mmio_init(vm, start + offset))
+ return;
+ }
+ TEST_ASSERT(false, "Can't find a ucall mmio address");
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+ ucall_exit_mmio_addr = 0;
+ sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+ vm_vaddr_t gva;
+
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+ "Unexpected ucall exit mmio address access");
+ memcpy(&gva, run->mmio.data, sizeof(gva));
+ memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 4911fc77d0f6..d1cf9f6e0e6b 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -55,7 +55,7 @@ static void test_dump_stack(void)
#pragma GCC diagnostic pop
}
-static pid_t gettid(void)
+static pid_t _gettid(void)
{
return syscall(SYS_gettid);
}
@@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
" pid=%d tid=%d - %s\n",
- file, line, exp_str, getpid(), gettid(),
+ file, line, exp_str, getpid(), _gettid(),
strerror(errno));
test_dump_stack();
if (fmt) {
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6e49bb039376..a6dd0401eb50 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -8,6 +8,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kvm_util_internal.h"
+#include "processor.h"
#include <assert.h>
#include <sys/mman.h>
@@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
return ret;
}
-static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
+static void vm_open(struct kvm_vm *vm, int perm)
{
vm->kvm_fd = open(KVM_DEV_PATH, perm);
if (vm->kvm_fd < 0)
@@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
exit(KSFT_SKIP);
}
- vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
+ vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
}
const char * const vm_guest_mode_string[] = {
- "PA-bits:52, VA-bits:48, 4K pages",
- "PA-bits:52, VA-bits:48, 64K pages",
- "PA-bits:48, VA-bits:48, 4K pages",
- "PA-bits:48, VA-bits:48, 64K pages",
- "PA-bits:40, VA-bits:48, 4K pages",
- "PA-bits:40, VA-bits:48, 64K pages",
+ "PA-bits:52, VA-bits:48, 4K pages",
+ "PA-bits:52, VA-bits:48, 64K pages",
+ "PA-bits:48, VA-bits:48, 4K pages",
+ "PA-bits:48, VA-bits:48, 64K pages",
+ "PA-bits:40, VA-bits:48, 4K pages",
+ "PA-bits:40, VA-bits:48, 64K pages",
+ "PA-bits:ANY, VA-bits:48, 4K pages",
};
_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
* descriptor to control the created VM is created with the permissions
* given by perm (e.g. O_RDWR).
*/
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
- int perm, unsigned long type)
+struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
struct kvm_vm *vm;
+ DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
vm->mode = mode;
- vm->type = type;
- vm_open(vm, perm, type);
+ vm->type = 0;
/* Setup mode specific traits. */
switch (vm->mode) {
@@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
vm->page_size = 0x10000;
vm->page_shift = 16;
break;
+ case VM_MODE_PXXV48_4K:
+#ifdef __x86_64__
+ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+ TEST_ASSERT(vm->va_bits == 48, "Linear address width "
+ "(%d bits) not supported", vm->va_bits);
+ vm->pgtable_levels = 4;
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+ DEBUG("Guest physical address width detected: %d\n",
+ vm->pa_bits);
+#else
+ TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
+ "non-x86 platforms");
+#endif
+ break;
default:
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
}
+#ifdef __aarch64__
+ if (vm->pa_bits != 40)
+ vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
+#endif
+
+ vm_open(vm, perm);
+
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
sparsebit_set_num(vm->vpages_valid,
@@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
- return _vm_create(mode, phy_pages, perm, 0);
+ return _vm_create(mode, phy_pages, perm);
}
/*
@@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
{
struct userspace_mem_region *region;
- vm_open(vmp, perm, vmp->type);
+ vm_open(vmp, perm);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
@@ -681,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
* on error (e.g. currently no memory region using memslot as a KVM
* memory slot ID).
*/
-static struct userspace_mem_region *
+struct userspace_mem_region *
memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
@@ -1349,6 +1373,42 @@ int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
}
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+{
+ int ret;
+
+ ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
+ TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
+ ret, errno, strerror(errno));
+}
+
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+{
+ int ret;
+
+ ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
+ TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
+ ret, errno, strerror(errno));
+}
+
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+{
+ int ret;
+
+ ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
+ TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
+ ret, errno, strerror(errno));
+}
+
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+{
+ int ret;
+
+ ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
+ TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
+ ret, errno, strerror(errno));
+}
+
/*
* VCPU Ioctl
*
@@ -1628,3 +1688,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
return val == 'Y';
}
+
+unsigned int vm_get_page_size(struct kvm_vm *vm)
+{
+ return vm->page_size;
+}
+
+unsigned int vm_get_page_shift(struct kvm_vm *vm)
+{
+ return vm->page_shift;
+}
+
+unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+{
+ return vm->max_gfn;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index f36262e0f655..ac50c42750cf 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
new file mode 100644
index 000000000000..fd589dc9bfab
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+ asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+ run->s390_sieic.icptcode == 4 &&
+ (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
+ (run->s390_sieic.ipb >> 16) == 0x501) {
+ int reg = run->s390_sieic.ipa & 0xf;
+
+ memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
+ sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c
deleted file mode 100644
index dd9a66700f96..000000000000
--- a/tools/testing/selftests/kvm/lib/ucall.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-#include "kvm_util_internal.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-static ucall_type_t ucall_type;
-static vm_vaddr_t *ucall_exit_mmio_addr;
-
-static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
-{
- if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
- return false;
-
- virt_pg_map(vm, gpa, gpa, 0);
-
- ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-
- return true;
-}
-
-void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
-{
- ucall_type = type;
- sync_global_to_guest(vm, ucall_type);
-
- if (type == UCALL_PIO)
- return;
-
- if (type == UCALL_MMIO) {
- vm_paddr_t gpa, start, end, step, offset;
- unsigned bits;
- bool ret;
-
- if (arg) {
- gpa = (vm_paddr_t)arg;
- ret = ucall_mmio_init(vm, gpa);
- TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
- return;
- }
-
- /*
- * Find an address within the allowed physical and virtual address
- * spaces, that does _not_ have a KVM memory region associated with
- * it. Identity mapping an address like this allows the guest to
- * access it, but as KVM doesn't know what to do with it, it
- * will assume it's something userspace handles and exit with
- * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
- * Here we start with a guess that the addresses around 5/8th
- * of the allowed space are unmapped and then work both down and
- * up from there in 1/16th allowed space sized steps.
- *
- * Note, we need to use VA-bits - 1 when calculating the allowed
- * virtual address space for an identity mapping because the upper
- * half of the virtual address space is the two's complement of the
- * lower and won't match physical addresses.
- */
- bits = vm->va_bits - 1;
- bits = vm->pa_bits < bits ? vm->pa_bits : bits;
- end = 1ul << bits;
- start = end * 5 / 8;
- step = end / 16;
- for (offset = 0; offset < end - start; offset += step) {
- if (ucall_mmio_init(vm, start - offset))
- return;
- if (ucall_mmio_init(vm, start + offset))
- return;
- }
- TEST_ASSERT(false, "Can't find a ucall mmio address");
- }
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
- ucall_type = 0;
- sync_global_to_guest(vm, ucall_type);
- ucall_exit_mmio_addr = 0;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-}
-
-static void ucall_pio_exit(struct ucall *uc)
-{
-#ifdef __x86_64__
- asm volatile("in %[port], %%al"
- : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
-#endif
-}
-
-static void ucall_mmio_exit(struct ucall *uc)
-{
- *ucall_exit_mmio_addr = (vm_vaddr_t)uc;
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- switch (ucall_type) {
- case UCALL_PIO:
- ucall_pio_exit(&uc);
- break;
- case UCALL_MMIO:
- ucall_mmio_exit(&uc);
- break;
- };
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
- bool got_ucall = false;
-
-#ifdef __x86_64__
- if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
- run->io.port == UCALL_PIO_PORT) {
- struct kvm_regs regs;
- vcpu_regs_get(vm, vcpu_id, &regs);
- memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall));
- got_ucall = true;
- }
-#endif
- if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
- vm_vaddr_t gva;
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
- "Unexpected ucall exit mmio address access");
- memcpy(&gva, run->mmio.data, sizeof(gva));
- memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
- got_ucall = true;
- }
-
- if (got_ucall) {
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
- }
-
- return ucall.cmd;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 6cb34a0fa200..683d3bdb8f6a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
{
- TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
@@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint16_t index[4];
struct pageMapL4Entry *pml4e;
- TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
struct pageDirectoryEntry *pde;
struct pageTableEntry *pte;
- TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
index[0] = (gva >> 12) & 0x1ffu;
@@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
+ case VM_MODE_PXXV48_4K:
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
@@ -869,7 +869,7 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
return buffer.entry.data;
}
-/* VCPU Set MSR
+/* _VCPU Set MSR
*
* Input Args:
* vm - Virtual Machine
@@ -879,12 +879,12 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
*
* Output Args: None
*
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ * Return: The result of KVM_SET_MSRS.
*
- * Set value of MSR for VCPU.
+ * Sets the value of an MSR for the given VCPU.
*/
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
@@ -899,6 +899,29 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ return r;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ int r;
+
+ r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
" rc: %i errno: %i", r, errno);
}
@@ -1000,19 +1023,45 @@ struct kvm_x86_state {
struct kvm_msrs msrs;
};
-static int kvm_get_num_msrs(struct kvm_vm *vm)
+static int kvm_get_num_msrs_fd(int kvm_fd)
{
struct kvm_msr_list nmsrs;
int r;
nmsrs.nmsrs = 0;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
r);
return nmsrs.nmsrs;
}
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+ return kvm_get_num_msrs_fd(vm->kvm_fd);
+}
+
+struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+ struct kvm_msr_list *list;
+ int nmsrs, r, kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ nmsrs = kvm_get_num_msrs_fd(kvm_fd);
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ close(kvm_fd);
+
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
+
+ return list;
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1060,9 +1109,11 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
r);
- r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
- r);
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+ r);
+ }
r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
@@ -1083,7 +1134,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
for (i = 0; i < nmsrs; i++)
state->msrs.entries[i].index = list->indices[i];
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
r, r == nmsrs ? -1 : list->indices[r]);
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
@@ -1103,9 +1154,11 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
- r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
- r);
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+ r);
+ }
r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
@@ -1153,3 +1206,30 @@ bool is_intel_cpu(void)
chunk = (const uint32_t *)("GenuineIntel");
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
+
+uint32_t kvm_get_cpuid_max_basic(void)
+{
+ return kvm_get_supported_cpuid_entry(0)->eax;
+}
+
+uint32_t kvm_get_cpuid_max_extended(void)
+{
+ return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+ struct kvm_cpuid_entry2 *entry;
+ bool pae;
+
+ /* SDM 4.1.4 */
+ if (kvm_get_cpuid_max_extended() < 0x80000008) {
+ pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
+ *pa_bits = pae ? 36 : 32;
+ *va_bits = 32;
+ } else {
+ entry = kvm_get_supported_cpuid_entry(0x80000008);
+ *pa_bits = entry->eax & 0xff;
+ *va_bits = (entry->eax >> 8) & 0xff;
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
new file mode 100644
index 000000000000..6e05a8fc3fe0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/svm.c
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "svm_util.h"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+ vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+ svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+ svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+ svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+ svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+ *p_svm_gva = svm_gva;
+ return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ u64 base, u32 limit, u32 attr)
+{
+ seg->selector = selector;
+ seg->attrib = attr;
+ seg->limit = limit;
+ seg->base = base;
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ uint64_t vmcb_gpa = svm->vmcb_gpa;
+ struct vmcb_save_area *save = &vmcb->save;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+ u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+ u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+ | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+ uint64_t efer;
+
+ efer = rdmsr(MSR_EFER);
+ wrmsr(MSR_EFER, efer | EFER_SVME);
+ wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+ memset(vmcb, 0, sizeof(*vmcb));
+ asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+ vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+ vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+ vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+ ctrl->asid = 1;
+ save->cpl = 0;
+ save->efer = rdmsr(MSR_EFER);
+ asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+ asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+ asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+ asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+ asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+ asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+ save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+ save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+ ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+ (1ULL << INTERCEPT_VMMCALL);
+
+ vmcb->save.rip = (u64)guest_rip;
+ vmcb->save.rsp = (u64)guest_rsp;
+ guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C \
+ "xchg %%rbx, guest_regs+0x20\n\t" \
+ "xchg %%rcx, guest_regs+0x10\n\t" \
+ "xchg %%rdx, guest_regs+0x18\n\t" \
+ "xchg %%rbp, guest_regs+0x30\n\t" \
+ "xchg %%rsi, guest_regs+0x38\n\t" \
+ "xchg %%rdi, guest_regs+0x40\n\t" \
+ "xchg %%r8, guest_regs+0x48\n\t" \
+ "xchg %%r9, guest_regs+0x50\n\t" \
+ "xchg %%r10, guest_regs+0x58\n\t" \
+ "xchg %%r11, guest_regs+0x60\n\t" \
+ "xchg %%r12, guest_regs+0x68\n\t" \
+ "xchg %%r13, guest_regs+0x70\n\t" \
+ "xchg %%r14, guest_regs+0x78\n\t" \
+ "xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+ asm volatile (
+ "vmload\n\t"
+ "mov rflags, %%r15\n\t" // rflags
+ "mov %%r15, 0x170(%[vmcb])\n\t"
+ "mov guest_regs, %%r15\n\t" // rax
+ "mov %%r15, 0x1f8(%[vmcb])\n\t"
+ LOAD_GPR_C
+ "vmrun\n\t"
+ SAVE_GPR_C
+ "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
+ "mov %%r15, rflags\n\t"
+ "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
+ "mov %%r15, guest_regs\n\t"
+ "vmsave\n\t"
+ : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+ : "r15", "memory");
+}
+
+void nested_svm_check_supported(void)
+{
+ struct kvm_cpuid_entry2 *entry =
+ kvm_get_supported_cpuid_entry(0x80000001);
+
+ if (!(entry->ecx & CPUID_SVM)) {
+ fprintf(stderr, "nested SVM not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+}
+
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
new file mode 100644
index 000000000000..da4d89ad5419
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ asm volatile("in %[port], %%al"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vm, vcpu_id, &regs);
+ memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
+ sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 204f847bd065..7aaa99ca4dbc 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -7,11 +7,59 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
+#define PAGE_SHIFT_4K 12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
bool enable_evmcs;
+struct eptPageTableEntry {
+ uint64_t readable:1;
+ uint64_t writable:1;
+ uint64_t executable:1;
+ uint64_t memory_type:3;
+ uint64_t ignore_pat:1;
+ uint64_t page_size:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t ignored_11_10:2;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+ uint64_t memory_type:3;
+ uint64_t page_walk_length:3;
+ uint64_t ad_enabled:1;
+ uint64_t reserved_11_07:5;
+ uint64_t address:40;
+ uint64_t reserved_63_52:12;
+};
+int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+{
+ uint16_t evmcs_ver;
+
+ struct kvm_enable_cap enable_evmcs_cap = {
+ .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ .args[0] = (unsigned long)&evmcs_ver
+ };
+
+ vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+ /* KVM should return supported EVMCS version range */
+ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
+ (evmcs_ver & 0xff) > 0,
+ "Incorrect EVMCS version range: %x:%x\n",
+ evmcs_ver & 0xff, evmcs_ver >> 8);
+
+ return evmcs_ver;
+}
+
/* Allocate memory regions for nested VMX tests.
*
* Input Args:
@@ -109,11 +157,11 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
* Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
* outside of SMX causes a #GP.
*/
- required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- required |= FEATURE_CONTROL_LOCKED;
- feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+ required |= FEAT_CTL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEAT_CTL);
if ((feature_control & required) != required)
- wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+ wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
/* Enter VMX root operation. */
*(uint32_t *)(vmx->vmxon) = vmcs_revision();
@@ -154,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx)
*/
static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
{
+ uint32_t sec_exec_ctl = 0;
+
vmwrite(VIRTUAL_PROCESSOR_ID, 0);
vmwrite(POSTED_INTR_NV, 0);
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
- if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+
+ if (vmx->eptp_gpa) {
+ uint64_t ept_paddr;
+ struct eptPageTablePointer eptp = {
+ .memory_type = VMX_BASIC_MEM_TYPE_WB,
+ .page_walk_length = 3, /* + 1 */
+ .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+ };
+
+ memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+ vmwrite(EPT_POINTER, ept_paddr);
+ sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+ }
+
+ if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
vmwrite(CPU_BASED_VM_EXEC_CONTROL,
rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
- else
+ else {
vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+ GUEST_ASSERT(!sec_exec_ctl);
+ }
+
vmwrite(EXCEPTION_BITMAP, 0);
vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
@@ -220,9 +288,9 @@ static inline void init_vmcs_host_state(void)
vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
vmwrite(HOST_TR_BASE,
- get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
- vmwrite(HOST_GDTR_BASE, get_gdt_base());
- vmwrite(HOST_IDTR_BASE, get_idt_base());
+ get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt().address);
+ vmwrite(HOST_IDTR_BASE, get_idt().address);
vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
}
@@ -307,3 +375,162 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_host_state();
init_vmcs_guest_state(guest_rip, guest_rsp);
}
+
+void nested_vmx_check_supported(void)
+{
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+{
+ uint16_t index[4];
+ struct eptPageTableEntry *pml4e;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ "Nested physical address not on page boundary,\n"
+ " nested_paddr: 0x%lx vm->page_size: 0x%x",
+ nested_paddr, vm->page_size);
+ TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ index[0] = (nested_paddr >> 12) & 0x1ffu;
+ index[1] = (nested_paddr >> 21) & 0x1ffu;
+ index[2] = (nested_paddr >> 30) & 0x1ffu;
+ index[3] = (nested_paddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = vmx->eptp_hva;
+ if (!pml4e[index[3]].readable) {
+ pml4e[index[3]].address = vm_phy_page_alloc(vm,
+ KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+ >> vm->page_shift;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].readable = true;
+ pml4e[index[3]].executable = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct eptPageTableEntry *pdpe;
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].readable) {
+ pdpe[index[2]].address = vm_phy_page_alloc(vm,
+ KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+ >> vm->page_shift;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].readable = true;
+ pdpe[index[2]].executable = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct eptPageTableEntry *pde;
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].readable) {
+ pde[index[1]].address = vm_phy_page_alloc(vm,
+ KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+ >> vm->page_shift;
+ pde[index[1]].writable = true;
+ pde[index[1]].readable = true;
+ pde[index[1]].executable = true;
+ }
+
+ /* Fill in page table entry. */
+ struct eptPageTableEntry *pte;
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr >> vm->page_shift;
+ pte[index[0]].writable = true;
+ pte[index[0]].readable = true;
+ pte[index[0]].executable = true;
+
+ /*
+ * For now mark these as accessed and dirty because the only
+ * testcase we have needs that. Can be reconsidered later.
+ */
+ pte[index[0]].accessed = true;
+ pte[index[0]].dirty = true;
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * nested_paddr - Nested guest physical address to map
+ * paddr - VM Physical Address
+ * size - The size of the range to map
+ * eptp_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ uint32_t eptp_memslot)
+{
+ size_t page_size = vm->page_size;
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ nested_paddr += page_size;
+ paddr += page_size;
+ }
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t memslot, uint32_t eptp_memslot)
+{
+ sparsebit_idx_t i, last;
+ struct userspace_mem_region *region =
+ memslot2region(vm, memslot);
+
+ i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+ last = i + (region->region.memory_size >> vm->page_shift);
+ for (;;) {
+ i = sparsebit_next_clear(region->unused_phy_pages, i);
+ if (i > last)
+ break;
+
+ nested_map(vmx, vm,
+ (uint64_t)i << vm->page_shift,
+ (uint64_t)i << vm->page_shift,
+ 1 << vm->page_shift,
+ eptp_memslot);
+ }
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot)
+{
+ vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+ vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
new file mode 100644
index 000000000000..9edaa9a134ce
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 1
+
+static uint8_t mem1[65536];
+static uint8_t mem2[65536];
+
+static void guest_code(void)
+{
+ int i;
+
+ for (;;) {
+ for (i = 0; i < sizeof(mem2); i++)
+ mem2[i] = mem1[i];
+ GUEST_SYNC(0);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_s390_mem_op ksmo;
+ int rv, i, maxsize;
+
+ setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
+
+ maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
+ if (!maxsize) {
+ fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n");
+ exit(KSFT_SKIP);
+ }
+ if (maxsize > sizeof(mem1))
+ maxsize = sizeof(mem1);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ for (i = 0; i < sizeof(mem1); i++)
+ mem1[i] = i * i + i;
+
+ /* Set the first array */
+ ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
+ ksmo.flags = 0;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+
+ /* Let the guest code copy the first array to the second */
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+ "Unexpected exit reason: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(mem2, 0xaa, sizeof(mem2));
+
+ /* Get the second array */
+ ksmo.gaddr = (uintptr_t)mem2;
+ ksmo.flags = 0;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+ ksmo.buf = (uintptr_t)mem2;
+ ksmo.ar = 0;
+ vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
+
+ /* Check error conditions - first bad size: */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = 0;
+ ksmo.size = -1;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+ /* Zero size: */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = 0;
+ ksmo.size = 0;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+ "ioctl allows 0 as size");
+
+ /* Bad flags: */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = -1;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+ /* Bad operation: */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = 0;
+ ksmo.size = maxsize;
+ ksmo.op = -1;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+ /* Bad guest address: */
+ ksmo.gaddr = ~0xfffUL;
+ ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
+
+ /* Bad host address: */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = 0;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = 0;
+ ksmo.ar = 0;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && errno == EFAULT,
+ "ioctl does not report bad host memory address");
+
+ /* Bad access register: */
+ run->psw_mask &= ~(3UL << (63 - 17));
+ run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+ vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */
+ ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.flags = 0;
+ ksmo.size = maxsize;
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ ksmo.buf = (uintptr_t)mem1;
+ ksmo.ar = 17;
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+ run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
+ vcpu_run(vm, VCPU_ID); /* Run to sync new state */
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
new file mode 100644
index 000000000000..1485bc6c8999
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x CPU resets
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 3
+#define LOCAL_IRQS 32
+
+struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
+
+struct kvm_vm *vm;
+struct kvm_run *run;
+struct kvm_sync_regs *regs;
+static uint64_t regs_null[16];
+
+static uint64_t crs[16] = { 0x40000ULL,
+ 0x42000ULL,
+ 0, 0, 0, 0, 0,
+ 0x43000ULL,
+ 0, 0, 0, 0, 0,
+ 0x44000ULL,
+ 0, 0
+};
+
+static void guest_code_initial(void)
+{
+ /* Round toward 0 */
+ uint32_t fpc = 0x11;
+
+ /* Dirty registers */
+ asm volatile (
+ " lctlg 0,15,%0\n"
+ " sfpc %1\n"
+ : : "Q" (crs), "d" (fpc));
+ GUEST_SYNC(0);
+}
+
+static void test_one_reg(uint64_t id, uint64_t value)
+{
+ struct kvm_one_reg reg;
+ uint64_t eval_reg;
+
+ reg.addr = (uintptr_t)&eval_reg;
+ reg.id = id;
+ vcpu_get_reg(vm, VCPU_ID, &reg);
+ TEST_ASSERT(eval_reg == value, "value == %s", value);
+}
+
+static void assert_noirq(void)
+{
+ struct kvm_s390_irq_state irq_state;
+ int irqs;
+
+ irq_state.len = sizeof(buf);
+ irq_state.buf = (unsigned long)buf;
+ irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
+ /*
+ * irqs contains the number of retrieved interrupts. Any interrupt
+ * (notably, the emergency call interrupt we have injected) should
+ * be cleared by the resets, so this should be 0.
+ */
+ TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+ TEST_ASSERT(!irqs, "IRQ pending");
+}
+
+static void assert_clear(void)
+{
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ struct kvm_fpu fpu;
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
+
+ vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+}
+
+static void assert_initial(void)
+{
+ struct kvm_sregs sregs;
+ struct kvm_fpu fpu;
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0");
+ TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000");
+ TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
+ "cr1-13 == 0");
+ TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0");
+
+ vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ TEST_ASSERT(!fpu.fpc, "fpc == 0");
+
+ test_one_reg(KVM_REG_S390_GBEA, 1);
+ test_one_reg(KVM_REG_S390_PP, 0);
+ test_one_reg(KVM_REG_S390_TODPR, 0);
+ test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
+ test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
+}
+
+static void assert_normal(void)
+{
+ test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ assert_noirq();
+}
+
+static void inject_irq(int cpu_id)
+{
+ struct kvm_s390_irq_state irq_state;
+ struct kvm_s390_irq *irq = &buf[0];
+ int irqs;
+
+ /* Inject IRQ */
+ irq_state.len = sizeof(struct kvm_s390_irq);
+ irq_state.buf = (unsigned long)buf;
+ irq->type = KVM_S390_INT_EMERGENCY;
+ irq->u.emerg.code = cpu_id;
+ irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
+ TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+}
+
+static void test_normal(void)
+{
+ printf("Testing normal reset\n");
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+ run = vcpu_state(vm, VCPU_ID);
+ regs = &run->s.regs;
+
+ vcpu_run(vm, VCPU_ID);
+
+ inject_irq(VCPU_ID);
+
+ vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+ assert_normal();
+ kvm_vm_free(vm);
+}
+
+static void test_initial(void)
+{
+ printf("Testing initial reset\n");
+ vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+ run = vcpu_state(vm, VCPU_ID);
+ regs = &run->s.regs;
+
+ vcpu_run(vm, VCPU_ID);
+
+ inject_irq(VCPU_ID);
+
+ vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+ assert_normal();
+ assert_initial();
+ kvm_vm_free(vm);
+}
+
+static void test_clear(void)
+{
+ printf("Testing clear reset\n");
+ vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
+ run = vcpu_state(vm, VCPU_ID);
+ regs = &run->s.regs;
+
+ vcpu_run(vm, VCPU_ID);
+
+ inject_irq(VCPU_ID);
+
+ vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+ assert_normal();
+ assert_initial();
+ assert_clear();
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
+
+ test_initial();
+ if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
+ test_normal();
+ test_clear();
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index e85ff0d69548..b705637ca14b 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -25,10 +25,15 @@
static void guest_code(void)
{
- for (;;) {
- asm volatile ("diag 0,0,0x501");
- asm volatile ("ahi 11,1");
- }
+ /*
+ * We embed diag 501 here instead of doing a ucall to avoid that
+ * the compiler has messed with r11 at the time of the ucall.
+ */
+ asm volatile (
+ "0: diag 0,0,0x501\n"
+ " ahi 11,1\n"
+ " j 0b\n"
+ );
}
#define REG_COMPARE(reg) \
@@ -83,6 +88,36 @@ int main(int argc, char *argv[])
run = vcpu_state(vm, VCPU_ID);
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
rv = _vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index f95c08343b48..92915e6408e7 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -79,11 +79,6 @@ int main(int argc, char *argv[])
struct kvm_x86_state *state;
struct ucall uc;
int stage;
- uint16_t evmcs_ver;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
@@ -96,13 +91,7 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
- vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
-
- /* KVM should return supported EVMCS version range */
- TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
- (evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x\n",
- evmcs_ver & 0xff, evmcs_ver >> 8);
+ vcpu_enable_evmcs(vm, VCPU_ID);
run = vcpu_state(vm, VCPU_ID);
@@ -146,7 +135,7 @@ int main(int argc, char *argv[])
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
free(state);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index f72b3043db0e..443a2b54645b 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -18,6 +18,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "vmx.h"
#define VCPU_ID 0
@@ -25,6 +26,25 @@ static void guest_code(void)
{
}
+static int smt_possible(void)
+{
+ char buf[16];
+ FILE *f;
+ bool res = 1;
+
+ f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ if (f) {
+ if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+ if (!strncmp(buf, "forceoff", 8) ||
+ !strncmp(buf, "notsupported", 12))
+ res = 0;
+ }
+ fclose(f);
+ }
+
+ return res;
+}
+
static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
int evmcs_enabled)
{
@@ -58,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
!entry->padding[2], "padding should be zero");
+ if (entry->function == 0x40000004) {
+ int nononarchcs = !!(entry->eax & (1UL << 18));
+
+ TEST_ASSERT(nononarchcs == !smt_possible(),
+ "NoNonArchitecturalCoreSharing bit"
+ " doesn't reflect SMT setting");
+ }
+
/*
* If needed for debug:
* fprintf(stdout,
@@ -106,12 +134,7 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
int rv;
- uint16_t evmcs_ver;
struct kvm_cpuid2 *hv_cpuid_entries;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
@@ -136,14 +159,14 @@ int main(int argc, char *argv[])
free(hv_cpuid_entries);
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
-
- if (rv) {
+ if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
fprintf(stderr,
"Enlightened VMCS is unsupported, skip related test\n");
goto vm_free;
}
+ vcpu_enable_evmcs(vm, VCPU_ID);
+
hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
if (!hv_cpuid_entries)
return 1;
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 40050e44ec0a..f9334bd3cce9 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -99,8 +99,8 @@ int main(int argc, char *argv[])
msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_disabled(vm);
test_msr_platform_info_enabled(vm);
+ test_msr_platform_info_disabled(vm);
vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
new file mode 100644
index 000000000000..e280f68f6365
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define VCPU_ID 5
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t svm_gva;
+
+ nested_svm_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s",
+ (const char *)uc.args[0]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false,
+ "Unknown ucall 0x%x.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 11c2a70a7b87..5c8224256294 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -22,18 +22,19 @@
#define VCPU_ID 5
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
void guest_code(void)
{
- /*
- * use a callee-save register, otherwise the compiler
- * saves it around the call to GUEST_SYNC.
- */
- register u32 stage asm("rbx");
- for (;;) {
- GUEST_SYNC(0);
- stage++;
- asm volatile ("" : : "r" (stage));
- }
+ asm volatile("1: in %[port], %%al\n"
+ "add $0x1, %%rbx\n"
+ "jmp 1b"
+ : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 3b0ffe01dacd..5dfb53546a26 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
new file mode 100644
index 000000000000..a223a6401258
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+#define TEST_MEM_SIZE 3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM 0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1 0xc0001000
+#define NESTED_TEST_MEM2 0xc0002000
+
+static void l2_guest_code(void)
+{
+ *(volatile uint64_t *)NESTED_TEST_MEM1;
+ *(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+ GUEST_SYNC(true);
+ GUEST_SYNC(false);
+
+ *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ GUEST_SYNC(true);
+ *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ GUEST_SYNC(true);
+ GUEST_SYNC(false);
+
+ /* Exit to L1 and never come back. */
+ vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(false);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_SYNC(false);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct vmx_pages *vmx;
+ unsigned long *bmap;
+ uint64_t *host_test_mem;
+
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct ucall uc;
+ bool done = false;
+
+ nested_vmx_check_supported();
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ GUEST_TEST_MEM,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_SIZE,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ /*
+ * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
+ * affects both L1 and L2. However...
+ */
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
+ TEST_MEM_SIZE * 4096, 0);
+
+ /*
+ * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+ * 0xc0000000.
+ *
+ * Note that prepare_eptp should be called only L1's GPA map is done,
+ * meaning after the last call to virt_map.
+ */
+ prepare_eptp(vmx, vm, 0);
+ nested_map_memslot(vmx, vm, 0, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+
+ bmap = bitmap_alloc(TEST_MEM_SIZE);
+ host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+ while (!done) {
+ memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ /*
+ * The nested guest wrote at offset 0x1000 in the memslot, but the
+ * dirty bitmap must be filled in according to L1 GPA, not L2.
+ */
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+ if (uc.args[1]) {
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+ } else {
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+ }
+
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
+ TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
+ TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index ed7218d166da..9ef7fab39d48 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -25,24 +25,17 @@
#define VMCS12_REVISION 0x11e57ed0
#define VCPU_ID 5
+bool have_evmcs;
+
void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
{
- volatile struct kvm_run *run;
-
vcpu_nested_state_set(vm, VCPU_ID, state, false);
- run = vcpu_state(vm, VCPU_ID);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
}
void test_nested_state_expect_errno(struct kvm_vm *vm,
struct kvm_nested_state *state,
int expected_errno)
{
- volatile struct kvm_run *run;
int rv;
rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
@@ -50,12 +43,6 @@ void test_nested_state_expect_errno(struct kvm_vm *vm,
"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
strerror(expected_errno), expected_errno, rv, strerror(errno),
errno);
- run = vcpu_state(vm, VCPU_ID);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
}
void test_nested_state_expect_einval(struct kvm_vm *vm,
@@ -90,8 +77,9 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
{
memset(state, 0, size);
state->flags = KVM_STATE_NESTED_GUEST_MODE |
- KVM_STATE_NESTED_RUN_PENDING |
- KVM_STATE_NESTED_EVMCS;
+ KVM_STATE_NESTED_RUN_PENDING;
+ if (have_evmcs)
+ state->flags |= KVM_STATE_NESTED_EVMCS;
state->format = 0;
state->size = size;
state->hdr.vmx.vmxon_pa = 0x1000;
@@ -141,13 +129,19 @@ void test_vmx_nested_state(struct kvm_vm *vm)
/*
* Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
* setting the nested state but flags other than eVMCS must be clear.
+ * The eVMCS flag can be set if the enlightened VMCS capability has
+ * been enabled.
*/
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
test_nested_state_expect_einval(vm, state);
- state->flags = KVM_STATE_NESTED_EVMCS;
+ state->flags &= KVM_STATE_NESTED_EVMCS;
+ if (have_evmcs) {
+ test_nested_state_expect_einval(vm, state);
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ }
test_nested_state(vm, state);
/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
@@ -230,7 +224,8 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
printf("KVM_CAP_NESTED_STATE not available, skipping test\n");
@@ -241,10 +236,7 @@ int main(int argc, char *argv[])
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, 0);
@@ -275,12 +267,7 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vm, &state);
- /*
- * TODO: When SVM support is added for KVM_SET_NESTED_STATE
- * add tests here to support it like VMX.
- */
- if (entry->ecx & CPUID_VMX)
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vm);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index f36c10eba71e..69e482a95c47 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -98,7 +98,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
prepare_vmcs(vmx_pages, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
@@ -128,12 +128,8 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
new file mode 100644
index 000000000000..851ea81b9d9f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+#define MSR_BITS 64
+
+#define X86_FEATURE_XSAVES (1<<3)
+
+bool is_supported_msr(u32 msr_index)
+{
+ struct kvm_msr_list *list;
+ bool found = false;
+ int i;
+
+ list = kvm_get_msr_index_list();
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index) {
+ found = true;
+ break;
+ }
+ }
+
+ free(list);
+ return found;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ bool xss_supported = false;
+ struct kvm_vm *vm;
+ uint64_t xss_val;
+ int i, r;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, 0);
+
+ if (kvm_get_cpuid_max_basic() >= 0xd) {
+ entry = kvm_get_supported_cpuid_index(0xd, 1);
+ xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
+ }
+ if (!xss_supported) {
+ printf("IA32_XSS is not supported by the vCPU.\n");
+ exit(KSFT_SKIP);
+ }
+
+ xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+ TEST_ASSERT(xss_val == 0,
+ "MSR_IA32_XSS should be initialized to zero\n");
+
+ vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
+ /*
+ * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+ * that trying to set the guest IA32_XSS to an unsupported value fails.
+ * Also, in the future when a non-zero value succeeds check that
+ * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+ */
+ for (i = 0; i < MSR_BITS; ++i) {
+ r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
+ TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
+ "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+ }
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 5511dddc5c2d..00a416fbc0ef 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,3 +1,3 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-$(dirname $0)/../kselftest_module.sh "bitmap" test_bitmap
+$(dirname $0)/../kselftest/module.sh "bitmap" test_bitmap
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index 43b28f24e453..370b79a9cb2e 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -1,4 +1,4 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest_module.sh "prime numbers" prime_numbers selftest=65536
+$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 2ffa61da0296..05f4544e87f9 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,4 +1,4 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest_module.sh "printf" test_printf
+$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh
index 71f2be6afba6..be60ef6e1a7f 100755
--- a/tools/testing/selftests/lib/strscpy.sh
+++ b/tools/testing/selftests/lib/strscpy.sh
@@ -1,3 +1,3 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
-$(dirname $0)/../kselftest_module.sh "strscpy*" test_strscpy
+$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index fd405402c3ff..3876d8d62494 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -4,6 +4,8 @@ TEST_PROGS_EXTENDED := functions.sh
TEST_PROGS := \
test-livepatch.sh \
test-callbacks.sh \
- test-shadow-vars.sh
+ test-shadow-vars.sh \
+ test-state.sh \
+ test-ftrace.sh
include ../lib.mk
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index b73cd0e2dd51..621d325425c2 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -35,7 +35,7 @@ Adding tests
------------
See the common functions.sh file for the existing collection of utility
-functions, most importantly set_dynamic_debug() and check_result(). The
+functions, most importantly setup_config() and check_result(). The
latter function greps the kernel's ring buffer for "livepatch:" and
"test_klp" strings, so tests be sure to include one of those strings for
result comparison. Other utility functions include general module
diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config
index 0dd7700464a8..ad23100cb27c 100644
--- a/tools/testing/selftests/livepatch/config
+++ b/tools/testing/selftests/livepatch/config
@@ -1 +1,3 @@
+CONFIG_LIVEPATCH=y
+CONFIG_DYNAMIC_DEBUG=y
CONFIG_TEST_LIVEPATCH=m
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 79b0affd21fb..2aab9791791d 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -7,6 +7,9 @@
MAX_RETRIES=600
RETRY_INTERVAL=".1" # seconds
+# Kselftest framework requirement - SKIP code is 4
+ksft_skip=4
+
# log(msg) - write message to kernel log
# msg - insightful words
function log() {
@@ -18,7 +21,16 @@ function log() {
function skip() {
log "SKIP: $1"
echo "SKIP: $1" >&2
- exit 4
+ exit $ksft_skip
+}
+
+# root test
+function is_root() {
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
}
# die(msg) - game over, man
@@ -29,29 +41,45 @@ function die() {
exit 1
}
-function push_dynamic_debug() {
- DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
- awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
+function push_config() {
+ DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
+ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
+ FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled)
}
-function pop_dynamic_debug() {
+function pop_config() {
if [[ -n "$DYNAMIC_DEBUG" ]]; then
echo -n "$DYNAMIC_DEBUG" > /sys/kernel/debug/dynamic_debug/control
fi
+ if [[ -n "$FTRACE_ENABLED" ]]; then
+ sysctl kernel.ftrace_enabled="$FTRACE_ENABLED" &> /dev/null
+ fi
}
-# set_dynamic_debug() - save the current dynamic debug config and tweak
-# it for the self-tests. Set a script exit trap
-# that restores the original config.
function set_dynamic_debug() {
- push_dynamic_debug
- trap pop_dynamic_debug EXIT INT TERM HUP
cat <<-EOF > /sys/kernel/debug/dynamic_debug/control
file kernel/livepatch/* +p
func klp_try_switch_task -p
EOF
}
+function set_ftrace_enabled() {
+ result=$(sysctl kernel.ftrace_enabled="$1" 2>&1 | paste --serial --delimiters=' ')
+ echo "livepatch: $result" > /dev/kmsg
+}
+
+# setup_config - save the current config and set a script exit trap that
+# restores the original config. Setup the dynamic debug
+# for verbose livepatching output and turn on
+# the ftrace_enabled sysctl.
+function setup_config() {
+ is_root
+ push_config
+ set_dynamic_debug
+ set_ftrace_enabled 1
+ trap pop_config EXIT INT TERM HUP
+}
+
# loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES,
# sleep $RETRY_INTERVAL between attempts
# cmd - command and its arguments to run
diff --git a/tools/testing/selftests/livepatch/settings b/tools/testing/selftests/livepatch/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/livepatch/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index e97a9dcb73c7..a35289b13c9c 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -9,7 +9,7 @@ MOD_LIVEPATCH2=test_klp_callbacks_demo2
MOD_TARGET=test_klp_callbacks_mod
MOD_TARGET_BUSY=test_klp_callbacks_busy
-set_dynamic_debug
+setup_config
# TEST: target module before livepatch
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
new file mode 100755
index 000000000000..e2a76887f40a
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 Joe Lawrence <joe.lawrence@redhat.com>
+
+. $(dirname $0)/functions.sh
+
+MOD_LIVEPATCH=test_klp_livepatch
+
+setup_config
+
+
+# TEST: livepatch interaction with ftrace_enabled sysctl
+# - turn ftrace_enabled OFF and verify livepatches can't load
+# - turn ftrace_enabled ON and verify livepatch can load
+# - verify that ftrace_enabled can't be turned OFF while a livepatch is loaded
+
+echo -n "TEST: livepatch interaction with ftrace_enabled sysctl ... "
+dmesg -C
+
+set_ftrace_enabled 0
+load_failing_mod $MOD_LIVEPATCH
+
+set_ftrace_enabled 1
+load_lp $MOD_LIVEPATCH
+if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+ echo -e "FAIL\n\n"
+ die "livepatch kselftest(s) failed"
+fi
+
+set_ftrace_enabled 0
+if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+ echo -e "FAIL\n\n"
+ die "livepatch kselftest(s) failed"
+fi
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "livepatch: kernel.ftrace_enabled = 0
+% modprobe $MOD_LIVEPATCH
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16)
+livepatch: failed to patch object 'vmlinux'
+livepatch: failed to enable patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Device or resource busy
+livepatch: kernel.ftrace_enabled = 1
+% modprobe $MOD_LIVEPATCH
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy kernel.ftrace_enabled = 0
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index f05268aea859..493e3df415a1 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -7,7 +7,7 @@
MOD_LIVEPATCH=test_klp_livepatch
MOD_REPLACE=test_klp_atomic_replace
-set_dynamic_debug
+setup_config
# TEST: basic function patching
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index 04a37831e204..1aae73299114 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -6,7 +6,7 @@
MOD_TEST=test_klp_shadow_vars
-set_dynamic_debug
+setup_config
# TEST: basic shadow variable API
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
new file mode 100755
index 000000000000..a08212708115
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 SUSE
+
+. $(dirname $0)/functions.sh
+
+MOD_LIVEPATCH=test_klp_state
+MOD_LIVEPATCH2=test_klp_state2
+MOD_LIVEPATCH3=test_klp_state3
+
+setup_config
+
+# TEST: Loading and removing a module that modifies the system state
+
+echo -n "TEST: system state modification ... "
+dmesg -C
+
+load_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% modprobe $MOD_LIVEPATCH
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+$MOD_LIVEPATCH: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH: allocate_loglevel_state: allocating space to store console_loglevel
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+$MOD_LIVEPATCH: post_patch_callback: vmlinux
+$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
+$MOD_LIVEPATCH: restore_console_loglevel: restoring console_loglevel
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+$MOD_LIVEPATCH: post_unpatch_callback: vmlinux
+$MOD_LIVEPATCH: free_loglevel_state: freeing space for the stored console_loglevel
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+
+# TEST: Take over system state change by a cumulative patch
+
+echo -n "TEST: taking over system state modification ... "
+dmesg -C
+
+load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+
+check_result "% modprobe $MOD_LIVEPATCH
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+$MOD_LIVEPATCH: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH: allocate_loglevel_state: allocating space to store console_loglevel
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+$MOD_LIVEPATCH: post_patch_callback: vmlinux
+$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
+livepatch: '$MOD_LIVEPATCH': patching complete
+% modprobe $MOD_LIVEPATCH2
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH2: allocate_loglevel_state: space to store console_loglevel already allocated
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% rmmod $MOD_LIVEPATCH
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2"
+
+
+# TEST: Take over system state change by a cumulative patch
+
+echo -n "TEST: compatible cumulative livepatches ... "
+dmesg -C
+
+load_lp $MOD_LIVEPATCH2
+load_lp $MOD_LIVEPATCH3
+unload_lp $MOD_LIVEPATCH2
+load_lp $MOD_LIVEPATCH2
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH3
+
+check_result "% modprobe $MOD_LIVEPATCH2
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH2: allocate_loglevel_state: allocating space to store console_loglevel
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% modprobe $MOD_LIVEPATCH3
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH3: allocate_loglevel_state: space to store console_loglevel already allocated
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+$MOD_LIVEPATCH3: post_patch_callback: vmlinux
+$MOD_LIVEPATCH3: fix_console_loglevel: taking over the console_loglevel change
+livepatch: '$MOD_LIVEPATCH3': patching complete
+% rmmod $MOD_LIVEPATCH2
+% modprobe $MOD_LIVEPATCH2
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH2: allocate_loglevel_state: space to store console_loglevel already allocated
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2
+% rmmod $MOD_LIVEPATCH3"
+
+
+# TEST: Failure caused by incompatible cumulative livepatches
+
+echo -n "TEST: incompatible cumulative livepatches ... "
+dmesg -C
+
+load_lp $MOD_LIVEPATCH2
+load_failing_mod $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+
+check_result "% modprobe $MOD_LIVEPATCH2
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH2: allocate_loglevel_state: allocating space to store console_loglevel
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% modprobe $MOD_LIVEPATCH
+livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches.
+modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Invalid argument
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2"
+
+exit 0
diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile
new file mode 100644
index 000000000000..1bcc9ee990eb
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for LKDTM regression tests
+
+include ../lib.mk
+
+# NOTE: $(OUTPUT) won't get default value if used before lib.mk
+TEST_FILES := tests.txt
+TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//'))
+all: $(TEST_GEN_PROGS)
+
+$(OUTPUT)/%: run.sh tests.txt
+ install -m 0744 run.sh $@
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
new file mode 100644
index 000000000000..d874990e442b
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/config
@@ -0,0 +1 @@
+CONFIG_LKDTM=y
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
new file mode 100755
index 000000000000..dadf819148a4
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# This reads tests.txt for the list of LKDTM tests to invoke. Any marked
+# with a leading "#" are skipped. The rest of the line after the
+# test name is either the text to look for in dmesg for a "success",
+# or the rationale for why a test is marked to be skipped.
+#
+set -e
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+# Figure out which test to run from our script name.
+test=$(basename $0 .sh)
+# Look up details about the test from master list of LKDTM tests.
+line=$(egrep '^#?'"$test"'\b' tests.txt)
+if [ -z "$line" ]; then
+ echo "Skipped: missing test '$test' in tests.txt"
+ exit $KSELFTEST_SKIP_TEST
+fi
+# Check that the test is known to LKDTM.
+if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then
+ echo "Skipped: test '$test' missing in $TRIGGER!"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+# Extract notes/expected output from test list.
+test=$(echo "$line" | cut -d" " -f1)
+if echo "$line" | grep -q ' ' ; then
+ expect=$(echo "$line" | cut -d" " -f2-)
+else
+ expect=""
+fi
+
+# If the test is commented out, report a skip
+if echo "$test" | grep -q '^#' ; then
+ test=$(echo "$test" | cut -c2-)
+ if [ -z "$expect" ]; then
+ expect="crashes entire system"
+ fi
+ echo "Skipping $test: $expect"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+# If no expected output given, assume an Oops with back trace is success.
+if [ -z "$expect" ]; then
+ expect="call trace:"
+fi
+
+# Clear out dmesg for output reporting
+dmesg -c >/dev/null
+
+# Prepare log for report checking
+LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX)
+cleanup() {
+ rm -f "$LOG"
+}
+trap cleanup EXIT
+
+# Most shells yell about signals and we're expecting the "cat" process
+# to usually be killed by the kernel. So we have to run it in a sub-shell
+# and silence errors.
+($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
+
+# Record and dump the results
+dmesg -c >"$LOG"
+cat "$LOG"
+# Check for expected output
+if egrep -qi "$expect" "$LOG" ; then
+ echo "$test: saw '$expect': ok"
+ exit 0
+else
+ if egrep -qi XFAIL: "$LOG" ; then
+ echo "$test: saw 'XFAIL': [SKIP]"
+ exit $KSELFTEST_SKIP_TEST
+ else
+ echo "$test: missing '$expect': [FAIL]"
+ exit 1
+ fi
+fi
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
new file mode 100644
index 000000000000..92ca32143ae5
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -0,0 +1,71 @@
+#PANIC
+BUG kernel BUG at
+WARNING WARNING:
+WARNING_MESSAGE message trigger
+EXCEPTION
+#LOOP Hangs the system
+#EXHAUST_STACK Corrupts memory on failure
+#CORRUPT_STACK Crashes entire system on success
+#CORRUPT_STACK_STRONG Crashes entire system on success
+CORRUPT_LIST_ADD list_add corruption
+CORRUPT_LIST_DEL list_del corruption
+CORRUPT_USER_DS Invalid address limit on user-mode return
+STACK_GUARD_PAGE_LEADING
+STACK_GUARD_PAGE_TRAILING
+UNSET_SMEP CR4 bits went missing
+DOUBLE_FAULT
+UNALIGNED_LOAD_STORE_WRITE
+#OVERWRITE_ALLOCATION Corrupts memory on failure
+#WRITE_AFTER_FREE Corrupts memory on failure
+READ_AFTER_FREE
+#WRITE_BUDDY_AFTER_FREE Corrupts memory on failure
+READ_BUDDY_AFTER_FREE
+SLAB_FREE_DOUBLE
+SLAB_FREE_CROSS
+SLAB_FREE_PAGE
+#SOFTLOCKUP Hangs the system
+#HARDLOCKUP Hangs the system
+#SPINLOCKUP Hangs the system
+#HUNG_TASK Hangs the system
+EXEC_DATA
+EXEC_STACK
+EXEC_KMALLOC
+EXEC_VMALLOC
+EXEC_RODATA
+EXEC_USERSPACE
+EXEC_NULL
+ACCESS_USERSPACE
+ACCESS_NULL
+WRITE_RO
+WRITE_RO_AFTER_INIT
+WRITE_KERN
+REFCOUNT_INC_OVERFLOW
+REFCOUNT_ADD_OVERFLOW
+REFCOUNT_INC_NOT_ZERO_OVERFLOW
+REFCOUNT_ADD_NOT_ZERO_OVERFLOW
+REFCOUNT_DEC_ZERO
+REFCOUNT_DEC_NEGATIVE Negative detected: saturated
+REFCOUNT_DEC_AND_TEST_NEGATIVE Negative detected: saturated
+REFCOUNT_SUB_AND_TEST_NEGATIVE Negative detected: saturated
+REFCOUNT_INC_ZERO
+REFCOUNT_ADD_ZERO
+REFCOUNT_INC_SATURATED Saturation detected: still saturated
+REFCOUNT_DEC_SATURATED Saturation detected: still saturated
+REFCOUNT_ADD_SATURATED Saturation detected: still saturated
+REFCOUNT_INC_NOT_ZERO_SATURATED
+REFCOUNT_ADD_NOT_ZERO_SATURATED
+REFCOUNT_DEC_AND_TEST_SATURATED Saturation detected: still saturated
+REFCOUNT_SUB_AND_TEST_SATURATED Saturation detected: still saturated
+#REFCOUNT_TIMING timing only
+#ATOMIC_TIMING timing only
+USERCOPY_HEAP_SIZE_TO
+USERCOPY_HEAP_SIZE_FROM
+USERCOPY_HEAP_WHITELIST_TO
+USERCOPY_HEAP_WHITELIST_FROM
+USERCOPY_STACK_FRAME_TO
+USERCOPY_STACK_FRAME_FROM
+USERCOPY_STACK_BEYOND
+USERCOPY_KERNEL
+USERCOPY_KERNEL_DS
+STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
+CFI_FORWARD_PROTO
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
index 020c44f49a9e..f2f7ec0a99b4 100644
--- a/tools/testing/selftests/membarrier/.gitignore
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -1 +1,2 @@
-membarrier_test
+membarrier_test_multi_thread
+membarrier_test_single_thread
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index 97e3bdf3d1e9..34d1c81a2324 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -g -I../../../../usr/include/
+LDLIBS += -lpthread
-TEST_GEN_PROGS := membarrier_test
+TEST_GEN_PROGS := membarrier_test_single_thread \
+ membarrier_test_multi_thread
include ../lib.mk
-
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index 70b4ddbf126b..186be69f0a59 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -1,10 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#define _GNU_SOURCE
#include <linux/membarrier.h>
#include <syscall.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <pthread.h>
#include "../kselftest.h"
@@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void)
return 0;
}
-static int test_membarrier(void)
+static int test_membarrier_fail(void)
{
int status;
@@ -233,10 +234,27 @@ static int test_membarrier(void)
status = test_membarrier_flags_fail();
if (status)
return status;
- status = test_membarrier_global_success();
+ status = test_membarrier_private_expedited_fail();
if (status)
return status;
- status = test_membarrier_private_expedited_fail();
+ status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+ if (status < 0) {
+ ksft_test_result_fail("sys_membarrier() failed\n");
+ return status;
+ }
+ if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+ status = test_membarrier_private_expedited_sync_core_fail();
+ if (status)
+ return status;
+ }
+ return 0;
+}
+
+static int test_membarrier_success(void)
+{
+ int status;
+
+ status = test_membarrier_global_success();
if (status)
return status;
status = test_membarrier_register_private_expedited_success();
@@ -251,9 +269,6 @@ static int test_membarrier(void)
return status;
}
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
- status = test_membarrier_private_expedited_sync_core_fail();
- if (status)
- return status;
status = test_membarrier_register_private_expedited_sync_core_success();
if (status)
return status;
@@ -300,14 +315,3 @@ static int test_membarrier_query(void)
ksft_test_result_pass("sys_membarrier available\n");
return 0;
}
-
-int main(int argc, char **argv)
-{
- ksft_print_header();
- ksft_set_plan(13);
-
- test_membarrier_query();
- test_membarrier();
-
- return ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
new file mode 100644
index 000000000000..ac5613e5b0eb
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+static int thread_ready, thread_quit;
+static pthread_mutex_t test_membarrier_thread_mutex =
+ PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t test_membarrier_thread_cond =
+ PTHREAD_COND_INITIALIZER;
+
+void *test_membarrier_thread(void *arg)
+{
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ thread_ready = 1;
+ pthread_cond_broadcast(&test_membarrier_thread_cond);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ while (!thread_quit)
+ pthread_cond_wait(&test_membarrier_thread_cond,
+ &test_membarrier_thread_mutex);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ return NULL;
+}
+
+static int test_mt_membarrier(void)
+{
+ int i;
+ pthread_t test_thread;
+
+ pthread_create(&test_thread, NULL,
+ test_membarrier_thread, NULL);
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ while (!thread_ready)
+ pthread_cond_wait(&test_membarrier_thread_cond,
+ &test_membarrier_thread_mutex);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ test_membarrier_fail();
+
+ test_membarrier_success();
+
+ pthread_mutex_lock(&test_membarrier_thread_mutex);
+ thread_quit = 1;
+ pthread_cond_broadcast(&test_membarrier_thread_cond);
+ pthread_mutex_unlock(&test_membarrier_thread_mutex);
+
+ pthread_join(test_thread, NULL);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(13);
+
+ test_membarrier_query();
+
+ /* Multi-threaded */
+ test_mt_membarrier();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
new file mode 100644
index 000000000000..c1c963902854
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "membarrier_test_impl.h"
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(13);
+
+ test_membarrier_query();
+
+ test_membarrier_fail();
+
+ test_membarrier_success();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index c67d32eeb668..334a7eea2004 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -290,6 +290,40 @@ static void mfd_assert_read_shared(int fd)
munmap(p, mfd_def_size);
}
+static void mfd_assert_fork_private_write(int fd)
+{
+ int *p;
+ pid_t pid;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ p[0] = 22;
+
+ pid = fork();
+ if (pid == 0) {
+ p[0] = 33;
+ exit(0);
+ } else {
+ waitpid(pid, NULL, 0);
+
+ if (p[0] != 22) {
+ printf("MAP_PRIVATE copy-on-write failed: %m\n");
+ abort();
+ }
+ }
+
+ munmap(p, mfd_def_size);
+}
+
static void mfd_assert_write(int fd)
{
ssize_t l;
@@ -760,6 +794,8 @@ static void test_seal_future_write(void)
mfd_assert_read_shared(fd2);
mfd_fail_write(fd2);
+ mfd_assert_fork_private_write(fd);
+
munmap(p, mfd_def_size);
close(fd2);
close(fd);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c7cced739c34..ecc52d4c034d 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -21,3 +21,5 @@ ipv6_flowlabel
ipv6_flowlabel_mgr
so_txtime
tcp_fastopen_backup_key
+nettest
+fin_ack_lat
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1b24e36b4047..b5694196430a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -10,14 +10,16 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
+TEST_PROGS += fin_ack_lat.sh
TEST_PROGS_EXTENDED := in_netns.sh
-TEST_GEN_FILES = socket
+TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
TEST_GEN_FILES += tcp_fastopen_backup_key
+TEST_GEN_FILES += fin_ack_lat
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
new file mode 100755
index 000000000000..4254ddc3f70b
--- /dev/null
+++ b/tools/testing/selftests/net/altnames.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/forwarding
+
+ALL_TESTS="altnames_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DUMMY_DEV=dummytest
+SHORT_NAME=shortname
+LONG_NAME=someveryveryveryveryveryverylongname
+
+altnames_test()
+{
+ RET=0
+ local output
+ local name
+
+ ip link property add $DUMMY_DEV altname $SHORT_NAME
+ check_err $? "Failed to add short alternative name"
+
+ output=$(ip -j -p link show $SHORT_NAME)
+ check_err $? "Failed to do link show with short alternative name"
+
+ name=$(echo $output | jq -e -r ".[0].altnames[0]")
+ check_err $? "Failed to get short alternative name from link show JSON"
+
+ [ "$name" == "$SHORT_NAME" ]
+ check_err $? "Got unexpected short alternative name from link show JSON"
+
+ ip -j -p link show $DUMMY_DEV &>/dev/null
+ check_err $? "Failed to do link show with original name"
+
+ ip link property add $DUMMY_DEV altname $LONG_NAME
+ check_err $? "Failed to add long alternative name"
+
+ output=$(ip -j -p link show $LONG_NAME)
+ check_err $? "Failed to do link show with long alternative name"
+
+ name=$(echo $output | jq -e -r ".[0].altnames[1]")
+ check_err $? "Failed to get long alternative name from link show JSON"
+
+ [ "$name" == "$LONG_NAME" ]
+ check_err $? "Got unexpected long alternative name from link show JSON"
+
+ ip link property del $DUMMY_DEV altname $SHORT_NAME
+ check_err $? "Failed to add short alternative name"
+
+ ip -j -p link show $SHORT_NAME &>/dev/null
+ check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
+
+ # long name is left there on purpose to be removed alongside the device
+
+ log_test "altnames test"
+}
+
+setup_prepare()
+{
+ ip link add name $DUMMY_DEV type dummy
+}
+
+cleanup()
+{
+ pre_cleanup
+ ip link del name $DUMMY_DEV
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
new file mode 100755
index 000000000000..fb5c55dd6df8
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -0,0 +1,3890 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+#
+# IPv4 and IPv6 functional tests focusing on VRF and routing lookups
+# for various permutations:
+# 1. icmp, tcp, udp and netfilter
+# 2. client, server, no-server
+# 3. global address on interface
+# 4. global address on 'lo'
+# 5. remote and local traffic
+# 6. VRF and non-VRF permutations
+#
+# Setup:
+# ns-A | ns-B
+# No VRF case:
+# [ lo ] [ eth1 ]---|---[ eth1 ] [ lo ]
+# remote address
+# VRF case:
+# [ red ]---[ eth1 ]---|---[ eth1 ] [ lo ]
+#
+# ns-A:
+# eth1: 172.16.1.1/24, 2001:db8:1::1/64
+# lo: 127.0.0.1/8, ::1/128
+# 172.16.2.1/32, 2001:db8:2::1/128
+# red: 127.0.0.1/8, ::1/128
+# 172.16.3.1/32, 2001:db8:3::1/128
+#
+# ns-B:
+# eth1: 172.16.1.2/24, 2001:db8:1::2/64
+# lo2: 127.0.0.1/8, ::1/128
+# 172.16.2.2/32, 2001:db8:2::2/128
+#
+# ns-A to ns-C connection - only for VRF and same config
+# as ns-A to ns-B
+#
+# server / client nomenclature relative to ns-A
+
+VERBOSE=0
+
+NSA_DEV=eth1
+NSA_DEV2=eth2
+NSB_DEV=eth1
+NSC_DEV=eth2
+VRF=red
+VRF_TABLE=1101
+
+# IPv4 config
+NSA_IP=172.16.1.1
+NSB_IP=172.16.1.2
+VRF_IP=172.16.3.1
+NS_NET=172.16.1.0/24
+
+# IPv6 config
+NSA_IP6=2001:db8:1::1
+NSB_IP6=2001:db8:1::2
+VRF_IP6=2001:db8:3::1
+NS_NET6=2001:db8:1::/120
+
+NSA_LO_IP=172.16.2.1
+NSB_LO_IP=172.16.2.2
+NSA_LO_IP6=2001:db8:2::1
+NSB_LO_IP6=2001:db8:2::2
+
+MD5_PW=abc123
+MD5_WRONG_PW=abc1234
+
+MCAST=ff02::1
+# set after namespace create
+NSA_LINKIP6=
+NSB_LINKIP6=
+
+NSA=ns-A
+NSB=ns-B
+NSC=ns-C
+
+NSA_CMD="ip netns exec ${NSA}"
+NSB_CMD="ip netns exec ${NSB}"
+NSC_CMD="ip netns exec ${NSC}"
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+# utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ [ "${VERBOSE}" = "1" ] && echo
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "TEST: %-70s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "TEST: %-70s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ kill_procs
+}
+
+log_test_addr()
+{
+ local addr=$1
+ local rc=$2
+ local expected=$3
+ local msg="$4"
+ local astr
+
+ astr=$(addr2str ${addr})
+ log_test $rc $expected "$msg - ${astr}"
+}
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_subsection()
+{
+ echo
+ echo "#################################################################"
+ echo "$*"
+ echo
+}
+
+log_start()
+{
+ # make sure we have no test instances running
+ kill_procs
+
+ if [ "${VERBOSE}" = "1" ]; then
+ echo
+ echo "#######################################################"
+ fi
+}
+
+log_debug()
+{
+ if [ "${VERBOSE}" = "1" ]; then
+ echo
+ echo "$*"
+ echo
+ fi
+}
+
+show_hint()
+{
+ if [ "${VERBOSE}" = "1" ]; then
+ echo "HINT: $*"
+ echo
+ fi
+}
+
+kill_procs()
+{
+ killall nettest ping ping6 >/dev/null 2>&1
+ sleep 1
+}
+
+do_run_cmd()
+{
+ local cmd="$*"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${cmd}"
+ fi
+
+ out=$($cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ return $rc
+}
+
+run_cmd()
+{
+ do_run_cmd ${NSA_CMD} $*
+}
+
+run_cmd_nsb()
+{
+ do_run_cmd ${NSB_CMD} $*
+}
+
+run_cmd_nsc()
+{
+ do_run_cmd ${NSC_CMD} $*
+}
+
+setup_cmd()
+{
+ local cmd="$*"
+ local rc
+
+ run_cmd ${cmd}
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ # show user the command if not done so already
+ if [ "$VERBOSE" = "0" ]; then
+ echo "setup command: $cmd"
+ fi
+ echo "failed. stopping tests"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue"
+ read a
+ fi
+ exit $rc
+ fi
+}
+
+setup_cmd_nsb()
+{
+ local cmd="$*"
+ local rc
+
+ run_cmd_nsb ${cmd}
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ # show user the command if not done so already
+ if [ "$VERBOSE" = "0" ]; then
+ echo "setup command: $cmd"
+ fi
+ echo "failed. stopping tests"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue"
+ read a
+ fi
+ exit $rc
+ fi
+}
+
+# set sysctl values in NS-A
+set_sysctl()
+{
+ echo "SYSCTL: $*"
+ echo
+ run_cmd sysctl -q -w $*
+}
+
+################################################################################
+# Setup for tests
+
+addr2str()
+{
+ case "$1" in
+ 127.0.0.1) echo "loopback";;
+ ::1) echo "IPv6 loopback";;
+
+ ${NSA_IP}) echo "ns-A IP";;
+ ${NSA_IP6}) echo "ns-A IPv6";;
+ ${NSA_LO_IP}) echo "ns-A loopback IP";;
+ ${NSA_LO_IP6}) echo "ns-A loopback IPv6";;
+ ${NSA_LINKIP6}|${NSA_LINKIP6}%*) echo "ns-A IPv6 LLA";;
+
+ ${NSB_IP}) echo "ns-B IP";;
+ ${NSB_IP6}) echo "ns-B IPv6";;
+ ${NSB_LO_IP}) echo "ns-B loopback IP";;
+ ${NSB_LO_IP6}) echo "ns-B loopback IPv6";;
+ ${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";;
+
+ ${VRF_IP}) echo "VRF IP";;
+ ${VRF_IP6}) echo "VRF IPv6";;
+
+ ${MCAST}%*) echo "multicast IP";;
+
+ *) echo "unknown";;
+ esac
+}
+
+get_linklocal()
+{
+ local ns=$1
+ local dev=$2
+ local addr
+
+ addr=$(ip -netns ${ns} -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+################################################################################
+# create namespaces and vrf
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+ local addr=$4
+ local addr6=$5
+
+ ip -netns ${ns} link add ${vrf} type vrf table ${table}
+ ip -netns ${ns} link set ${vrf} up
+ ip -netns ${ns} route add vrf ${vrf} unreachable default metric 8192
+ ip -netns ${ns} -6 route add vrf ${vrf} unreachable default metric 8192
+
+ ip -netns ${ns} addr add 127.0.0.1/8 dev ${vrf}
+ ip -netns ${ns} -6 addr add ::1 dev ${vrf} nodad
+ if [ "${addr}" != "-" ]; then
+ ip -netns ${ns} addr add dev ${vrf} ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip -netns ${ns} -6 addr add dev ${vrf} ${addr6}
+ fi
+
+ ip -netns ${ns} ru del pref 0
+ ip -netns ${ns} ru add pref 32765 from all lookup local
+ ip -netns ${ns} -6 ru del pref 0
+ ip -netns ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ ip netns add ${ns}
+
+ ip -netns ${ns} link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip -netns ${ns} addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip -netns ${ns} -6 addr add dev lo ${addr6}
+ fi
+
+ ip -netns ${ns} ro add unreachable default metric 8192
+ ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+
+ ip -netns ${ns1} li add ${ns1_dev} type veth peer name tmp
+ ip -netns ${ns1} li set ${ns1_dev} up
+ ip -netns ${ns1} li set tmp netns ${ns2} name ${ns2_dev}
+ ip -netns ${ns2} li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr}
+ ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr6}
+ ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr6}
+ fi
+}
+
+cleanup()
+{
+ # explicit cleanups to check those code paths
+ ip netns | grep -q ${NSA}
+ if [ $? -eq 0 ]; then
+ ip -netns ${NSA} link delete ${VRF}
+ ip -netns ${NSA} ro flush table ${VRF_TABLE}
+
+ ip -netns ${NSA} addr flush dev ${NSA_DEV}
+ ip -netns ${NSA} -6 addr flush dev ${NSA_DEV}
+ ip -netns ${NSA} link set dev ${NSA_DEV} down
+ ip -netns ${NSA} link del dev ${NSA_DEV}
+
+ ip netns del ${NSA}
+ fi
+
+ ip netns del ${NSB}
+ ip netns del ${NSC} >/dev/null 2>&1
+}
+
+setup()
+{
+ local with_vrf=${1}
+
+ # make sure we are starting with a clean slate
+ kill_procs
+ cleanup 2>/dev/null
+
+ log_debug "Configuring network namespaces"
+ set -e
+
+ create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128
+ create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128
+ connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \
+ ${NSB} ${NSB_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+
+ NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+ NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+
+ # tell ns-A how to get to remote addresses of ns-B
+ if [ "${with_vrf}" = "yes" ]; then
+ create_vrf ${NSA} ${VRF} ${VRF_TABLE} ${VRF_IP} ${VRF_IP6}
+
+ ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+ ip -netns ${NSA} ro add vrf ${VRF} ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
+ ip -netns ${NSA} -6 ro add vrf ${VRF} ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
+
+ ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
+ ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
+
+ # some VRF tests use ns-C which has the same config as
+ # ns-B but for a device NOT in the VRF
+ create_ns ${NSC} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+ else
+ ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
+ ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
+ fi
+
+
+ # tell ns-B how to get to remote addresses of ns-A
+ ip -netns ${NSB} ro add ${NSA_LO_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
+ ip -netns ${NSB} ro add ${NSA_LO_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
+
+ set +e
+
+ sleep 1
+}
+
+################################################################################
+# IPv4
+
+ipv4_ping_novrf()
+{
+ local a
+
+ #
+ # out
+ #
+ for a in ${NSB_IP} ${NSB_LO_IP}
+ do
+ log_start
+ run_cmd ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping out"
+
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping out, device bind"
+
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_LO_IP} ${a}
+ log_test_addr ${a} $? 0 "ping out, address bind"
+ done
+
+ #
+ # in
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP}
+ do
+ log_start
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping in"
+ done
+
+ #
+ # local traffic
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping local"
+ done
+
+ #
+ # local traffic, socket bound to device
+ #
+ # address on device
+ a=${NSA_IP}
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping local, device bind"
+
+ # loopback addresses not reachable from device bind
+ # fails in a really weird way though because ipv4 special cases
+ # route lookups with oif set.
+ for a in ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Fails since address on loopback device is out of device scope"
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 1 "ping local, device bind"
+ done
+
+ #
+ # ip rule blocks reachability to remote address
+ #
+ log_start
+ setup_cmd ip rule add pref 32765 from all lookup local
+ setup_cmd ip rule del pref 0 from all lookup local
+ setup_cmd ip rule add pref 50 to ${NSB_LO_IP} prohibit
+ setup_cmd ip rule add pref 51 from ${NSB_IP} prohibit
+
+ a=${NSB_LO_IP}
+ run_cmd ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, blocked by rule"
+
+ # NOTE: ipv4 actually allows the lookup to fail and yet still create
+ # a viable rtable if the oif (e.g., bind to device) is set, so this
+ # case succeeds despite the rule
+ # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Response generates ICMP (or arp request is ignored) due to ip rule"
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by rule"
+
+ [ "$VERBOSE" = "1" ] && echo
+ setup_cmd ip rule del pref 32765 from all lookup local
+ setup_cmd ip rule add pref 0 from all lookup local
+ setup_cmd ip rule del pref 50 to ${NSB_LO_IP} prohibit
+ setup_cmd ip rule del pref 51 from ${NSB_IP} prohibit
+
+ #
+ # route blocks reachability to remote address
+ #
+ log_start
+ setup_cmd ip route replace unreachable ${NSB_LO_IP}
+ setup_cmd ip route replace unreachable ${NSB_IP}
+
+ a=${NSB_LO_IP}
+ run_cmd ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, blocked by route"
+
+ # NOTE: ipv4 actually allows the lookup to fail and yet still create
+ # a viable rtable if the oif (e.g., bind to device) is set, so this
+ # case succeeds despite not having a route for the address
+ # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Response is dropped (or arp request is ignored) due to ip route"
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by route"
+
+ #
+ # remove 'remote' routes; fallback to default
+ #
+ log_start
+ setup_cmd ip ro del ${NSB_LO_IP}
+
+ a=${NSB_LO_IP}
+ run_cmd ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, unreachable default route"
+
+ # NOTE: ipv4 actually allows the lookup to fail and yet still create
+ # a viable rtable if the oif (e.g., bind to device) is set, so this
+ # case succeeds despite not having a route for the address
+ # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+}
+
+ipv4_ping_vrf()
+{
+ local a
+
+ # should default on; does not exist on older kernels
+ set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
+
+ #
+ # out
+ #
+ for a in ${NSB_IP} ${NSB_LO_IP}
+ do
+ log_start
+ run_cmd ping -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 0 "ping out, VRF bind"
+
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping out, device bind"
+
+ log_start
+ run_cmd ip vrf exec ${VRF} ping -c1 -w1 -I ${NSA_IP} ${a}
+ log_test_addr ${a} $? 0 "ping out, vrf device + dev address bind"
+
+ log_start
+ run_cmd ip vrf exec ${VRF} ping -c1 -w1 -I ${VRF_IP} ${a}
+ log_test_addr ${a} $? 0 "ping out, vrf device + vrf address bind"
+ done
+
+ #
+ # in
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping in"
+ done
+
+ #
+ # local traffic, local address
+ #
+ for a in ${NSA_IP} ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Source address should be ${a}"
+ run_cmd ping -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 0 "ping local, VRF bind"
+ done
+
+ #
+ # local traffic, socket bound to device
+ #
+ # address on device
+ a=${NSA_IP}
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping local, device bind"
+
+ # vrf device is out of scope
+ for a in ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Fails since address on vrf device is out of device scope"
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 1 "ping local, device bind"
+ done
+
+ #
+ # ip rule blocks address
+ #
+ log_start
+ setup_cmd ip rule add pref 50 to ${NSB_LO_IP} prohibit
+ setup_cmd ip rule add pref 51 from ${NSB_IP} prohibit
+
+ a=${NSB_LO_IP}
+ run_cmd ping -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 2 "ping out, vrf bind, blocked by rule"
+
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule"
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Response lost due to ip rule"
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by rule"
+
+ [ "$VERBOSE" = "1" ] && echo
+ setup_cmd ip rule del pref 50 to ${NSB_LO_IP} prohibit
+ setup_cmd ip rule del pref 51 from ${NSB_IP} prohibit
+
+ #
+ # remove 'remote' routes; fallback to default
+ #
+ log_start
+ setup_cmd ip ro del vrf ${VRF} ${NSB_LO_IP}
+
+ a=${NSB_LO_IP}
+ run_cmd ping -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 2 "ping out, vrf bind, unreachable route"
+
+ log_start
+ run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, unreachable route"
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Response lost by unreachable route"
+ run_cmd_nsb ping -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, unreachable route"
+}
+
+ipv4_ping()
+{
+ log_section "IPv4 ping"
+
+ log_subsection "No VRF"
+ setup
+ set_sysctl net.ipv4.raw_l3mdev_accept=0 2>/dev/null
+ ipv4_ping_novrf
+ setup
+ set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
+ ipv4_ping_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv4_ping_vrf
+}
+
+################################################################################
+# IPv4 TCP
+
+#
+# MD5 tests without VRF
+#
+ipv4_tcp_md5_novrf()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
+}
+
+#
+# MD5 tests with VRF
+#
+ipv4_tcp_md5()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout since server does not have MD5 auth"
+ run_cmd nettest -s -d ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout since server config differs from client"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout since client address is outside of prefix"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
+
+ #
+ # duplicate config between default VRF and a VRF
+ #
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ #
+ # negative tests
+ #
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
+
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+
+}
+
+ipv4_tcp_novrf()
+{
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP}
+ do
+ log_start
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+
+ # verify TCP reset sent and received
+ for a in ${NSA_IP} ${NSA_LO_IP}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # client
+ #
+ for a in ${NSB_IP} ${NSB_LO_IP}
+ do
+ log_start
+ run_cmd_nsb nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -0 ${NSA_IP}
+ log_test_addr ${a} $? 0 "Client"
+
+ log_start
+ run_cmd_nsb nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 0 "Client, device bind"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -r ${a}
+ log_test_addr ${a} $? 1 "No server, unbound client"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -0 ${a} -1 ${a}
+ log_test_addr ${a} $? 0 "Global server, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -r ${a} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
+
+ for a in ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
+ run_cmd nettest -s -d ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -r ${a}
+ log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 0 "Global server, device client, local connection"
+
+ for a in ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "Global server, device client, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local connection"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+
+ ipv4_tcp_md5_novrf
+}
+
+ipv4_tcp_vrf()
+{
+ local a
+
+ # disable global server
+ log_subsection "Global server disabled"
+
+ set_sysctl net.ipv4.tcp_l3mdev_accept=0
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 1 "Global server"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+
+ # verify TCP reset received
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ # local address tests
+ # (${VRF_IP} and 127.0.0.1 both timeout)
+ a=${NSA_IP}
+ log_start
+ show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "Global server, local connection"
+
+ # run MD5 tests
+ ipv4_tcp_md5
+
+ #
+ # enable VRF global server
+ #
+ log_subsection "VRF Global server enabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ show_hint "client socket should be bound to VRF"
+ run_cmd nettest -s -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ log_start
+ show_hint "client socket should be bound to VRF"
+ run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ # verify TCP reset received
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ a=${NSA_IP}
+ log_start
+ show_hint "client socket should be bound to device"
+ run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+
+ # local address tests
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since client is not bound to VRF"
+ run_cmd nettest -s -d ${VRF} &
+ sleep 1
+ run_cmd nettest -r ${a}
+ log_test_addr ${a} $? 1 "Global server, local connection"
+ done
+
+ #
+ # client
+ #
+ for a in ${NSB_IP} ${NSB_LO_IP}
+ do
+ log_start
+ run_cmd_nsb nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${VRF}
+ log_test_addr ${a} $? 0 "Client, VRF bind"
+
+ log_start
+ run_cmd_nsb nettest -s &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 0 "Client, device bind"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -r ${a} -d ${VRF}
+ log_test_addr ${a} $? 1 "No server, VRF client"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ for a in ${NSA_IP} ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
+ log_test_addr ${a} $? 0 "VRF server, device client, local connection"
+
+ log_start
+ show_hint "Should fail 'No route to host' since client is out of VRF scope"
+ run_cmd nettest -s -d ${VRF} &
+ sleep 1
+ run_cmd nettest -r ${a}
+ log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
+
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
+
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local connection"
+}
+
+ipv4_tcp()
+{
+ log_section "IPv4/TCP"
+ log_subsection "No VRF"
+ setup
+
+ # tcp_l3mdev_accept should have no affect without VRF;
+ # run tests with it enabled and disabled to verify
+ log_subsection "tcp_l3mdev_accept disabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=0
+ ipv4_tcp_novrf
+ log_subsection "tcp_l3mdev_accept enabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+ ipv4_tcp_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv4_tcp_vrf
+}
+
+################################################################################
+# IPv4 UDP
+
+ipv4_udp_novrf()
+{
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP}
+ do
+ log_start
+ run_cmd nettest -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+
+ #
+ # client
+ #
+ for a in ${NSB_IP} ${NSB_LO_IP}
+ do
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -0 ${NSA_IP}
+ log_test_addr ${a} $? 0 "Client"
+
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP}
+ log_test_addr ${a} $? 0 "Client, device bind"
+
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP}
+ log_test_addr ${a} $? 0 "Client, device send via cmsg"
+
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP}
+ log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "No server, unbound client"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -0 ${a} -1 ${a}
+ log_test_addr ${a} $? 0 "Global server, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
+
+ for a in ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since address is out of device scope"
+ run_cmd nettest -s -D -d ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -D &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device client, local connection"
+
+ log_start
+ run_cmd nettest -s -D &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -C -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
+
+ log_start
+ run_cmd nettest -s -D &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
+
+ # IPv4 with device bind has really weird behavior - it overrides the
+ # fib lookup, generates an rtable and tries to send the packet. This
+ # causes failures for local traffic at different places
+ for a in ${NSA_LO_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Should fail since addresses on loopback are out of device scope"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 2 "Global server, device client, local connection"
+
+ log_start
+ show_hint "Should fail since addresses on loopback are out of device scope"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C
+ log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
+
+ log_start
+ show_hint "Should fail since addresses on loopback are out of device scope"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
+ log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local conn"
+
+ log_start
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 2 "No server, device client, local conn"
+}
+
+ipv4_udp_vrf()
+{
+ local a
+
+ # disable global server
+ log_subsection "Global server disabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=0
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ show_hint "Fails because ingress is in a VRF and global server is disabled"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "Global server"
+
+ log_start
+ run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ log_start
+ run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server"
+
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+
+ log_start
+ show_hint "Should fail 'Connection refused' since global server is out of scope"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 1 "Global server, VRF client, local connection"
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
+
+ # enable global server
+ log_subsection "Global server enabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=1
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ log_start
+ run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ log_start
+ run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd_nsb nettest -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # client tests
+ #
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP}
+ log_test $? 0 "VRF client"
+
+ log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP}
+ log_test $? 0 "Enslaved device client"
+
+ # negative test - should fail
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -D -d ${VRF} -r ${NSB_IP}
+ log_test $? 1 "No server, VRF client"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -D -d ${NSA_DEV} -r ${NSB_IP}
+ log_test $? 1 "No server, enslaved device client"
+
+ #
+ # local address tests
+ #
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, device client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
+
+ for a in ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd nettest -D -s -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
+ done
+
+ for a in ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ run_cmd nettest -s -D -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+ done
+
+ # negative test - should fail
+ # verifies ECONNREFUSED
+ for a in ${NSA_IP} ${VRF_IP} 127.0.0.1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 1 "No server, VRF client, local conn"
+ done
+}
+
+ipv4_udp()
+{
+ log_section "IPv4/UDP"
+ log_subsection "No VRF"
+
+ setup
+
+ # udp_l3mdev_accept should have no affect without VRF;
+ # run tests with it enabled and disabled to verify
+ log_subsection "udp_l3mdev_accept disabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=0
+ ipv4_udp_novrf
+ log_subsection "udp_l3mdev_accept enabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=1
+ ipv4_udp_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv4_udp_vrf
+}
+
+################################################################################
+# IPv4 address bind
+#
+# verifies ability or inability to bind to an address / device
+
+ipv4_addr_bind_novrf()
+{
+ #
+ # raw socket
+ #
+ for a in ${NSA_IP} ${NSA_LO_IP}
+ do
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address"
+
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
+ done
+
+ #
+ # tcp sockets
+ #
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address"
+
+ log_start
+ run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+
+ # Sadly, the kernel allows binding a socket to a device and then
+ # binding to an address not on the device. The only restriction
+ # is that the address is valid in the L3 domain. So this test
+ # passes when it really should not
+ #a=${NSA_LO_IP}
+ #log_start
+ #show_hint "Should fail with 'Cannot assign requested address'"
+ #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ #log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
+}
+
+ipv4_addr_bind_vrf()
+{
+ #
+ # raw socket
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address"
+
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind"
+ done
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Address on loopback is out of VRF scope"
+ run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+ log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
+
+ #
+ # tcp sockets
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address"
+
+ log_start
+ run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+ done
+
+ a=${NSA_LO_IP}
+ log_start
+ show_hint "Address on loopback out of scope for VRF"
+ run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
+
+ log_start
+ show_hint "Address on loopback out of scope for device in VRF"
+ run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
+}
+
+ipv4_addr_bind()
+{
+ log_section "IPv4 address binds"
+
+ log_subsection "No VRF"
+ setup
+ ipv4_addr_bind_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv4_addr_bind_vrf
+}
+
+################################################################################
+# IPv4 runtime tests
+
+ipv4_rt()
+{
+ local desc="$1"
+ local varg="$2"
+ local with_vrf="yes"
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server"
+
+ setup ${with_vrf}
+ done
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest ${varg} -s -d ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server"
+
+ setup ${with_vrf}
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest ${varg} -s -d ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, enslaved device server"
+
+ setup ${with_vrf}
+
+ #
+ # client test
+ #
+ log_start
+ run_cmd_nsb nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF client"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd_nsb nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, enslaved device client"
+
+ setup ${with_vrf}
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server, VRF client, local"
+
+ setup ${with_vrf}
+ done
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest ${varg} -d ${VRF} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server and client, local"
+
+ setup ${with_vrf}
+ done
+
+ a=${NSA_IP}
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server, enslaved device client, local"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd nettest ${varg} -d ${VRF} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server, enslaved device client, local"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, enslaved device server and client, local"
+}
+
+ipv4_ping_rt()
+{
+ local with_vrf="yes"
+ local a
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd_nsb ping -f ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "Device delete with active traffic - ping in"
+
+ setup ${with_vrf}
+ done
+
+ a=${NSB_IP}
+ log_start
+ run_cmd ping -f -I ${VRF} ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "Device delete with active traffic - ping out"
+}
+
+ipv4_runtime()
+{
+ log_section "Run time tests - ipv4"
+
+ setup "yes"
+ ipv4_ping_rt
+
+ setup "yes"
+ ipv4_rt "TCP active socket" "-n -1"
+
+ setup "yes"
+ ipv4_rt "TCP passive socket" "-i"
+}
+
+################################################################################
+# IPv6
+
+ipv6_ping_novrf()
+{
+ local a
+
+ # should not have an impact, but make a known state
+ set_sysctl net.ipv4.raw_l3mdev_accept=0 2>/dev/null
+
+ #
+ # out
+ #
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping out"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping out, device bind"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_LO_IP6} ${a}
+ log_test_addr ${a} $? 0 "ping out, loopback address bind"
+ done
+
+ #
+ # in
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV} ${MCAST}%${NSB_DEV}
+ do
+ log_start
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping in"
+ done
+
+ #
+ # local traffic, local address
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ::1 ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping local, no bind"
+ done
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping local, device bind"
+ done
+
+ for a in ${NSA_LO_IP6} ::1
+ do
+ log_start
+ show_hint "Fails since address on loopback is out of device scope"
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping local, device bind"
+ done
+
+ #
+ # ip rule blocks address
+ #
+ log_start
+ setup_cmd ip -6 rule add pref 32765 from all lookup local
+ setup_cmd ip -6 rule del pref 0 from all lookup local
+ setup_cmd ip -6 rule add pref 50 to ${NSB_LO_IP6} prohibit
+ setup_cmd ip -6 rule add pref 51 from ${NSB_IP6} prohibit
+
+ a=${NSB_LO_IP6}
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, blocked by rule"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule"
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Response lost due to ip rule"
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by rule"
+
+ setup_cmd ip -6 rule add pref 0 from all lookup local
+ setup_cmd ip -6 rule del pref 32765 from all lookup local
+ setup_cmd ip -6 rule del pref 50 to ${NSB_LO_IP6} prohibit
+ setup_cmd ip -6 rule del pref 51 from ${NSB_IP6} prohibit
+
+ #
+ # route blocks reachability to remote address
+ #
+ log_start
+ setup_cmd ip -6 route del ${NSB_LO_IP6}
+ setup_cmd ip -6 route add unreachable ${NSB_LO_IP6} metric 10
+ setup_cmd ip -6 route add unreachable ${NSB_IP6} metric 10
+
+ a=${NSB_LO_IP6}
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, blocked by route"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, blocked by route"
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Response lost due to ip route"
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by route"
+
+
+ #
+ # remove 'remote' routes; fallback to default
+ #
+ log_start
+ setup_cmd ip -6 ro del unreachable ${NSB_LO_IP6}
+ setup_cmd ip -6 ro del unreachable ${NSB_IP6}
+
+ a=${NSB_LO_IP6}
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, unreachable route"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, unreachable route"
+}
+
+ipv6_ping_vrf()
+{
+ local a
+
+ # should default on; does not exist on older kernels
+ set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
+
+ #
+ # out
+ #
+ for a in ${NSB_IP6} ${NSB_LO_IP6}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 0 "ping out, VRF bind"
+ done
+
+ for a in ${NSB_LINKIP6}%${VRF} ${MCAST}%${VRF}
+ do
+ log_start
+ show_hint "Fails since VRF device does not support linklocal or multicast"
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, VRF bind"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping out, device bind"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ip vrf exec ${VRF} ${ping6} -c1 -w1 -I ${VRF_IP6} ${a}
+ log_test_addr ${a} $? 0 "ping out, vrf device+address bind"
+ done
+
+ #
+ # in
+ #
+ for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} ${MCAST}%${NSB_DEV}
+ do
+ log_start
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 0 "ping in"
+ done
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Fails since loopback address is out of VRF scope"
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in"
+
+ #
+ # local traffic, local address
+ #
+ for a in ${NSA_IP6} ${VRF_IP6} ::1
+ do
+ log_start
+ show_hint "Source address should be ${a}"
+ run_cmd ${ping6} -c1 -w1 -I ${VRF} ${a}
+ log_test_addr ${a} $? 0 "ping local, VRF bind"
+ done
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+ do
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 0 "ping local, device bind"
+ done
+
+ # LLA to GUA - remove ipv6 global addresses from ns-B
+ setup_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV}
+ setup_cmd_nsb ip -6 addr del ${NSB_LO_IP6}/128 dev lo
+ setup_cmd_nsb ip -6 ro add ${NSA_IP6}/128 via ${NSA_LINKIP6} dev ${NSB_DEV}
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6}
+ log_test_addr ${a} $? 0 "ping in, LLA to GUA"
+ done
+
+ setup_cmd_nsb ip -6 ro del ${NSA_IP6}/128 via ${NSA_LINKIP6} dev ${NSB_DEV}
+ setup_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV}
+ setup_cmd_nsb ip -6 addr add ${NSB_LO_IP6}/128 dev lo
+
+ #
+ # ip rule blocks address
+ #
+ log_start
+ setup_cmd ip -6 rule add pref 50 to ${NSB_LO_IP6} prohibit
+ setup_cmd ip -6 rule add pref 51 from ${NSB_IP6} prohibit
+
+ a=${NSB_LO_IP6}
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, blocked by rule"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule"
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Response lost due to ip rule"
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 1 "ping in, blocked by rule"
+
+ log_start
+ setup_cmd ip -6 rule del pref 50 to ${NSB_LO_IP6} prohibit
+ setup_cmd ip -6 rule del pref 51 from ${NSB_IP6} prohibit
+
+ #
+ # remove 'remote' routes; fallback to default
+ #
+ log_start
+ setup_cmd ip -6 ro del ${NSB_LO_IP6} vrf ${VRF}
+
+ a=${NSB_LO_IP6}
+ run_cmd ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping out, unreachable route"
+
+ log_start
+ run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a}
+ log_test_addr ${a} $? 2 "ping out, device bind, unreachable route"
+
+ ip -netns ${NSB} -6 ro del ${NSA_LO_IP6}
+ a=${NSA_LO_IP6}
+ log_start
+ run_cmd_nsb ${ping6} -c1 -w1 ${a}
+ log_test_addr ${a} $? 2 "ping in, unreachable route"
+}
+
+ipv6_ping()
+{
+ log_section "IPv6 ping"
+
+ log_subsection "No VRF"
+ setup
+ ipv6_ping_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv6_ping_vrf
+}
+
+################################################################################
+# IPv6 TCP
+
+#
+# MD5 tests without VRF
+#
+ipv6_tcp_md5_novrf()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
+}
+
+#
+# MD5 tests with VRF
+#
+ipv6_tcp_md5()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout since server does not have MD5 auth"
+ run_cmd nettest -6 -s -d ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout since server config differs from client"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout since client address is outside of prefix"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
+
+ #
+ # duplicate config between default VRF and a VRF
+ #
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ #
+ # negative tests
+ #
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
+
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+
+}
+
+ipv6_tcp_novrf()
+{
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+ done
+
+ # verify TCP reset received
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # client
+ #
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Client"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 0 "Client, device bind"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ::1
+ do
+ log_start
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Global server, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
+
+ for a in ${NSA_LO_IP6} ::1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -s -d ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
+ log_test_addr ${a} $? 0 "Global server, device client, local connection"
+
+ for a in ${NSA_LO_IP6} ::1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "Global server, device client, local connection"
+ done
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local conn"
+ done
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+ done
+
+ ipv6_tcp_md5_novrf
+}
+
+ipv6_tcp_vrf()
+{
+ local a
+
+ # disable global server
+ log_subsection "Global server disabled"
+
+ set_sysctl net.ipv4.tcp_l3mdev_accept=0
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "Global server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+ done
+
+ # link local is always bound to ingress device
+ a=${NSA_LINKIP6}%${NSB_DEV}
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+ done
+
+ # verify TCP reset received
+ for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ # local address tests
+ a=${NSA_IP6}
+ log_start
+ show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "Global server, local connection"
+
+ # run MD5 tests
+ ipv6_tcp_md5
+
+ #
+ # enable VRF global server
+ #
+ log_subsection "VRF Global server enabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+ done
+
+ # For LLA, child socket is bound to device
+ a=${NSA_LINKIP6}%${NSB_DEV}
+ log_start
+ run_cmd nettest -6 -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+ done
+
+ # verify TCP reset received
+ for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ # local address tests
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ show_hint "Fails 'Connection refused' since client is not in VRF"
+ run_cmd nettest -6 -s -d ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "Global server, local connection"
+ done
+
+
+ #
+ # client
+ #
+ for a in ${NSB_IP6} ${NSB_LO_IP6}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${VRF}
+ log_test_addr ${a} $? 0 "Client, VRF bind"
+ done
+
+ a=${NSB_LINKIP6}
+ log_start
+ show_hint "Fails since VRF device does not allow linklocal addresses"
+ run_cmd_nsb nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${VRF}
+ log_test_addr ${a} $? 1 "Client, VRF bind"
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -s &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 0 "Client, device bind"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -r ${a} -d ${VRF}
+ log_test_addr ${a} $? 1 "No server, VRF client"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6} ::1
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
+ log_test_addr ${a} $? 0 "VRF server, device client, local connection"
+
+ a=${NSA_IP6}
+ log_start
+ show_hint "Should fail since unbound client is out of VRF scope"
+ run_cmd nettest -6 -s -d ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
+
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
+
+ for a in ${NSA_IP6} ${NSA_LINKIP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local connection"
+ done
+}
+
+ipv6_tcp()
+{
+ log_section "IPv6/TCP"
+ log_subsection "No VRF"
+ setup
+
+ # tcp_l3mdev_accept should have no affect without VRF;
+ # run tests with it enabled and disabled to verify
+ log_subsection "tcp_l3mdev_accept disabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=0
+ ipv6_tcp_novrf
+ log_subsection "tcp_l3mdev_accept enabled"
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+ ipv6_tcp_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv6_tcp_vrf
+}
+
+################################################################################
+# IPv6 UDP
+
+ipv6_udp_novrf()
+{
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Device server"
+ done
+
+ a=${NSA_LO_IP6}
+ log_start
+ run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+
+ # should fail since loopback address is out of scope for a device
+ # bound server, but it does not - hence this is more documenting
+ # behavior.
+ #log_start
+ #show_hint "Should fail since loopback address is out of scope"
+ #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ #sleep 1
+ #run_cmd_nsb nettest -6 -D -r ${a}
+ #log_test_addr ${a} $? 1 "Device server"
+
+ # negative test - should fail
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # client
+ #
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6}
+ log_test_addr ${a} $? 0 "Client"
+
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6}
+ log_test_addr ${a} $? 0 "Client, device bind"
+
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6}
+ log_test_addr ${a} $? 0 "Client, device send via cmsg"
+
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6}
+ log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "No server, unbound client"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "No server, device client"
+ done
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6} ::1
+ do
+ log_start
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a}
+ log_test_addr ${a} $? 0 "Global server, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
+
+ for a in ${NSA_LO_IP6} ::1
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since address is out of device scope"
+ run_cmd nettest -6 -s -D -d ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "Device server, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -D &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device client, local connection"
+
+ log_start
+ run_cmd nettest -6 -s -D &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
+
+ log_start
+ run_cmd nettest -6 -s -D &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection"
+
+ for a in ${NSA_LO_IP6} ::1
+ do
+ log_start
+ show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV}
+ log_test_addr ${a} $? 1 "Global server, device client, local connection"
+
+ log_start
+ show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C
+ log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
+
+ log_start
+ show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
+ log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local conn"
+
+ log_start
+ show_hint "Should fail 'Connection refused'"
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+
+ # LLA to GUA
+ run_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV}
+ run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
+ log_start
+ run_cmd nettest -6 -s -D &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
+ log_test $? 0 "UDP in - LLA to GUA"
+
+ run_cmd_nsb ip -6 ro del ${NSA_IP6}/128 dev ${NSB_DEV}
+ run_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV} nodad
+}
+
+ipv6_udp_vrf()
+{
+ local a
+
+ # disable global server
+ log_subsection "Global server disabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=0
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since global server is disabled"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "Global server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server"
+ done
+
+ # negative test - should fail
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since there is no server"
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ show_hint "Should fail 'Connection refused' since global server is disabled"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 1 "Global server, VRF client, local conn"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ show_hint "Should fail 'Connection refused' since global server is disabled"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "Global server, device client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, device client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
+
+ # disable global server
+ log_subsection "Global server enabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=1
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Global server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "VRF server"
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 0 "Enslaved device server"
+ done
+
+ # negative test - should fail
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd_nsb nettest -6 -D -r ${a}
+ log_test_addr ${a} $? 1 "No server"
+ done
+
+ #
+ # client tests
+ #
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6}
+ log_test $? 0 "VRF client"
+
+ # negative test - should fail
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6}
+ log_test $? 1 "No server, VRF client"
+
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6}
+ log_test $? 0 "Enslaved device client"
+
+ # negative test - should fail
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6}
+ log_test $? 1 "No server, enslaved device client"
+
+ #
+ # local address tests
+ #
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
+
+ #log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+
+
+ a=${VRF_IP6}
+ log_start
+ run_cmd nettest -6 -D -s -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
+
+ # negative test - should fail
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 1 "No server, VRF client, local conn"
+ done
+
+ # device to global IP
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Global server, device client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "VRF server, device client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${VRF} -r ${a}
+ log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 0 "Device server, device client, local conn"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+
+
+ # link local addresses
+ log_start
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6}
+ log_test $? 0 "Global server, linklocal IP"
+
+ log_start
+ run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6}
+ log_test $? 1 "No server, linklocal IP"
+
+
+ log_start
+ run_cmd_nsb nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6}
+ log_test $? 0 "Enslaved device client, linklocal IP"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6}
+ log_test $? 1 "No server, device client, peer linklocal IP"
+
+
+ log_start
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6}
+ log_test $? 0 "Enslaved device client, local conn - linklocal IP"
+
+ log_start
+ run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6}
+ log_test $? 1 "No server, device client, local conn - linklocal IP"
+
+ # LLA to GUA
+ run_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV}
+ run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
+ log_start
+ run_cmd nettest -6 -s -D &
+ sleep 1
+ run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
+ log_test $? 0 "UDP in - LLA to GUA"
+
+ run_cmd_nsb ip -6 ro del ${NSA_IP6}/128 dev ${NSB_DEV}
+ run_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV} nodad
+}
+
+ipv6_udp()
+{
+ # should not matter, but set to known state
+ set_sysctl net.ipv4.udp_early_demux=1
+
+ log_section "IPv6/UDP"
+ log_subsection "No VRF"
+ setup
+
+ # udp_l3mdev_accept should have no affect without VRF;
+ # run tests with it enabled and disabled to verify
+ log_subsection "udp_l3mdev_accept disabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=0
+ ipv6_udp_novrf
+ log_subsection "udp_l3mdev_accept enabled"
+ set_sysctl net.ipv4.udp_l3mdev_accept=1
+ ipv6_udp_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv6_udp_vrf
+}
+
+################################################################################
+# IPv6 address bind
+
+ipv6_addr_bind_novrf()
+{
+ #
+ # raw socket
+ #
+ for a in ${NSA_IP6} ${NSA_LO_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address"
+
+ log_start
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
+ done
+
+ #
+ # tcp sockets
+ #
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -l ${a} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address"
+
+ log_start
+ run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Should fail with 'Cannot assign requested address'"
+ run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
+}
+
+ipv6_addr_bind_vrf()
+{
+ #
+ # raw socket
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind"
+
+ log_start
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
+ done
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Address on loopback is out of VRF scope"
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+ log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind"
+
+ #
+ # tcp sockets
+ #
+ # address on enslaved device is valid for the VRF or device in a VRF
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind"
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind"
+
+ a=${VRF_IP6}
+ log_start
+ run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind"
+
+ a=${NSA_LO_IP6}
+ log_start
+ show_hint "Address on loopback out of scope for VRF"
+ run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
+
+ log_start
+ show_hint "Address on loopback out of scope for device in VRF"
+ run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
+
+}
+
+ipv6_addr_bind()
+{
+ log_section "IPv6 address binds"
+
+ log_subsection "No VRF"
+ setup
+ ipv6_addr_bind_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv6_addr_bind_vrf
+}
+
+################################################################################
+# IPv6 runtime tests
+
+ipv6_rt()
+{
+ local desc="$1"
+ local varg="-6 $2"
+ local with_vrf="yes"
+ local a
+
+ #
+ # server tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server"
+
+ setup ${with_vrf}
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest ${varg} -d ${VRF} -s &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server"
+
+ setup ${with_vrf}
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ sleep 1
+ run_cmd_nsb nettest ${varg} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, enslaved device server"
+
+ setup ${with_vrf}
+ done
+
+ #
+ # client test
+ #
+ log_start
+ run_cmd_nsb nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test 0 0 "${desc}, VRF client"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd_nsb nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test 0 0 "${desc}, enslaved device client"
+
+ setup ${with_vrf}
+
+
+ #
+ # local address tests
+ #
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server, VRF client"
+
+ setup ${with_vrf}
+ done
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest ${varg} -d ${VRF} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${VRF} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server and client"
+
+ setup ${with_vrf}
+ done
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd nettest ${varg} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, global server, device client"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd nettest ${varg} -d ${VRF} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, VRF server, device client"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ sleep 1
+ run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "${desc}, device server, device client"
+}
+
+ipv6_ping_rt()
+{
+ local with_vrf="yes"
+ local a
+
+ a=${NSA_IP6}
+ log_start
+ run_cmd_nsb ${ping6} -f ${a} &
+ sleep 3
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "Device delete with active traffic - ping in"
+
+ setup ${with_vrf}
+
+ log_start
+ run_cmd ${ping6} -f ${NSB_IP6} -I ${VRF} &
+ sleep 1
+ run_cmd ip link del ${VRF}
+ sleep 1
+ log_test_addr ${a} 0 0 "Device delete with active traffic - ping out"
+}
+
+ipv6_runtime()
+{
+ log_section "Run time tests - ipv6"
+
+ setup "yes"
+ ipv6_ping_rt
+
+ setup "yes"
+ ipv6_rt "TCP active socket" "-n -1"
+
+ setup "yes"
+ ipv6_rt "TCP passive socket" "-i"
+
+ setup "yes"
+ ipv6_rt "UDP active socket" "-D -n -1"
+}
+
+################################################################################
+# netfilter blocking connections
+
+netfilter_tcp_reset()
+{
+ local a
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd_nsb nettest -r ${a}
+ log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
+ done
+}
+
+netfilter_icmp()
+{
+ local stype="$1"
+ local arg
+ local a
+
+ [ "${stype}" = "UDP" ] && arg="-D"
+
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
+ run_cmd nettest ${arg} -s &
+ sleep 1
+ run_cmd_nsb nettest ${arg} -r ${a}
+ log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
+ done
+}
+
+ipv4_netfilter()
+{
+ log_section "IPv4 Netfilter"
+ log_subsection "TCP reset"
+
+ setup "yes"
+ run_cmd iptables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with tcp-reset
+
+ netfilter_tcp_reset
+
+ log_start
+ log_subsection "ICMP unreachable"
+
+ log_start
+ run_cmd iptables -F
+ run_cmd iptables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with icmp-port-unreachable
+ run_cmd iptables -A INPUT -p udp --dport 12345 -j REJECT --reject-with icmp-port-unreachable
+
+ netfilter_icmp "TCP"
+ netfilter_icmp "UDP"
+
+ log_start
+ iptables -F
+}
+
+netfilter_tcp6_reset()
+{
+ local a
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${a}
+ log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
+ done
+}
+
+netfilter_icmp6()
+{
+ local stype="$1"
+ local arg
+ local a
+
+ [ "${stype}" = "UDP" ] && arg="$arg -D"
+
+ for a in ${NSA_IP6} ${VRF_IP6}
+ do
+ log_start
+ run_cmd nettest -6 -s ${arg} &
+ sleep 1
+ run_cmd_nsb nettest -6 ${arg} -r ${a}
+ log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
+ done
+}
+
+ipv6_netfilter()
+{
+ log_section "IPv6 Netfilter"
+ log_subsection "TCP reset"
+
+ setup "yes"
+ run_cmd ip6tables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with tcp-reset
+
+ netfilter_tcp6_reset
+
+ log_subsection "ICMP unreachable"
+
+ log_start
+ run_cmd ip6tables -F
+ run_cmd ip6tables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with icmp6-port-unreachable
+ run_cmd ip6tables -A INPUT -p udp --dport 12345 -j REJECT --reject-with icmp6-port-unreachable
+
+ netfilter_icmp6 "TCP"
+ netfilter_icmp6 "UDP"
+
+ log_start
+ ip6tables -F
+}
+
+################################################################################
+# specific use cases
+
+# VRF only.
+# ns-A device enslaved to bridge. Verify traffic with and without
+# br_netfilter module loaded. Repeat with SVI on bridge.
+use_case_br()
+{
+ setup "yes"
+
+ setup_cmd ip link set ${NSA_DEV} down
+ setup_cmd ip addr del dev ${NSA_DEV} ${NSA_IP}/24
+ setup_cmd ip -6 addr del dev ${NSA_DEV} ${NSA_IP6}/64
+
+ setup_cmd ip link add br0 type bridge
+ setup_cmd ip addr add dev br0 ${NSA_IP}/24
+ setup_cmd ip -6 addr add dev br0 ${NSA_IP6}/64 nodad
+
+ setup_cmd ip li set ${NSA_DEV} master br0
+ setup_cmd ip li set ${NSA_DEV} up
+ setup_cmd ip li set br0 up
+ setup_cmd ip li set br0 vrf ${VRF}
+
+ rmmod br_netfilter 2>/dev/null
+ sleep 5 # DAD
+
+ run_cmd ip neigh flush all
+ run_cmd ping -c1 -w1 -I br0 ${NSB_IP}
+ log_test $? 0 "Bridge into VRF - IPv4 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd ${ping6} -c1 -w1 -I br0 ${NSB_IP6}
+ log_test $? 0 "Bridge into VRF - IPv6 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ping -c1 -w1 ${NSA_IP}
+ log_test $? 0 "Bridge into VRF - IPv4 ping in"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6}
+ log_test $? 0 "Bridge into VRF - IPv6 ping in"
+
+ modprobe br_netfilter
+ if [ $? -eq 0 ]; then
+ run_cmd ip neigh flush all
+ run_cmd ping -c1 -w1 -I br0 ${NSB_IP}
+ log_test $? 0 "Bridge into VRF with br_netfilter - IPv4 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd ${ping6} -c1 -w1 -I br0 ${NSB_IP6}
+ log_test $? 0 "Bridge into VRF with br_netfilter - IPv6 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ping -c1 -w1 ${NSA_IP}
+ log_test $? 0 "Bridge into VRF with br_netfilter - IPv4 ping in"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6}
+ log_test $? 0 "Bridge into VRF with br_netfilter - IPv6 ping in"
+ fi
+
+ setup_cmd ip li set br0 nomaster
+ setup_cmd ip li add br0.100 link br0 type vlan id 100
+ setup_cmd ip li set br0.100 vrf ${VRF} up
+ setup_cmd ip addr add dev br0.100 172.16.101.1/24
+ setup_cmd ip -6 addr add dev br0.100 2001:db8:101::1/64 nodad
+
+ setup_cmd_nsb ip li add vlan100 link ${NSB_DEV} type vlan id 100
+ setup_cmd_nsb ip addr add dev vlan100 172.16.101.2/24
+ setup_cmd_nsb ip -6 addr add dev vlan100 2001:db8:101::2/64 nodad
+ setup_cmd_nsb ip li set vlan100 up
+ sleep 1
+
+ rmmod br_netfilter 2>/dev/null
+
+ run_cmd ip neigh flush all
+ run_cmd ping -c1 -w1 -I br0.100 172.16.101.2
+ log_test $? 0 "Bridge vlan into VRF - IPv4 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd ${ping6} -c1 -w1 -I br0.100 2001:db8:101::2
+ log_test $? 0 "Bridge vlan into VRF - IPv6 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ping -c1 -w1 172.16.101.1
+ log_test $? 0 "Bridge vlan into VRF - IPv4 ping in"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ${ping6} -c1 -w1 2001:db8:101::1
+ log_test $? 0 "Bridge vlan into VRF - IPv6 ping in"
+
+ modprobe br_netfilter
+ if [ $? -eq 0 ]; then
+ run_cmd ip neigh flush all
+ run_cmd ping -c1 -w1 -I br0.100 172.16.101.2
+ log_test $? 0 "Bridge vlan into VRF with br_netfilter - IPv4 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd ${ping6} -c1 -w1 -I br0.100 2001:db8:101::2
+ log_test $? 0 "Bridge vlan into VRF with br_netfilter - IPv6 ping out"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ping -c1 -w1 172.16.101.1
+ log_test $? 0 "Bridge vlan into VRF - IPv4 ping in"
+
+ run_cmd ip neigh flush all
+ run_cmd_nsb ${ping6} -c1 -w1 2001:db8:101::1
+ log_test $? 0 "Bridge vlan into VRF - IPv6 ping in"
+ fi
+
+ setup_cmd ip li del br0 2>/dev/null
+ setup_cmd_nsb ip li del vlan100 2>/dev/null
+}
+
+use_cases()
+{
+ log_section "Use cases"
+ use_case_br
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 IPv4 tests only
+ -6 IPv6 tests only
+ -t <test> Test name/set to run
+ -p Pause on fail
+ -P Pause after each test
+ -v Be verbose
+EOF
+}
+
+################################################################################
+# main
+
+TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
+TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
+TESTS_OTHER="use_cases"
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+while getopts :46t:pPvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+which nettest >/dev/null
+if [ $? -ne 0 ]; then
+ echo "'nettest' command not found; skipping tests"
+ exit 0
+fi
+
+declare -i nfail=0
+declare -i nsuccess=0
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping|ping) ipv4_ping;;
+ ipv4_tcp|tcp) ipv4_tcp;;
+ ipv4_udp|udp) ipv4_udp;;
+ ipv4_bind|bind) ipv4_addr_bind;;
+ ipv4_runtime) ipv4_runtime;;
+ ipv4_netfilter) ipv4_netfilter;;
+
+ ipv6_ping|ping6) ipv6_ping;;
+ ipv6_tcp|tcp6) ipv6_tcp;;
+ ipv6_udp|udp6) ipv6_udp;;
+ ipv6_bind|bind6) ipv6_addr_bind;;
+ ipv6_runtime) ipv6_runtime;;
+ ipv6_netfilter) ipv6_netfilter;;
+
+ use_cases) use_cases;;
+
+ # setup namespaces and config, but do not run any tests
+ setup) setup; exit 0;;
+ vrf_setup) setup "yes"; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup 2>/dev/null
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index e6828732843e..9dc35a16e415 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -15,6 +15,8 @@
PAUSE_ON_FAIL=no
VERBOSE=0
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
################################################################################
# helpers
@@ -200,7 +202,7 @@ validate_v6_exception()
local rc
if [ ${ping_sz} != "0" ]; then
- run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+ run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
fi
if [ "$VERBOSE" = "1" ]; then
@@ -243,7 +245,7 @@ do
run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
- run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+ run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
[ $ret -ne 0 ] && break
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index c5c93d5fb3ad..796670ebc65b 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -212,6 +212,8 @@ check_output()
printf " ${out}\n"
printf " Expected:\n"
printf " ${expected}\n\n"
+ else
+ echo " WARNING: Unexpected route entry"
fi
fi
@@ -274,7 +276,7 @@ ipv6_fcnal()
run_cmd "$IP nexthop get id 52"
log_test $? 0 "Get nexthop by id"
- check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1"
+ check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1 scope link"
run_cmd "$IP nexthop del id 52"
log_test $? 0 "Delete nexthop by id"
@@ -479,12 +481,12 @@ ipv6_fcnal_runtime()
run_cmd "$IP -6 nexthop add id 85 dev veth1"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
log_test $? 0 "IPv6 route with device only nexthop"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium"
run_cmd "$IP nexthop add id 123 group 81/85"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium"
#
# IPv6 route with v4 nexthop - not allowed
@@ -538,7 +540,7 @@ ipv4_fcnal()
run_cmd "$IP nexthop get id 12"
log_test $? 0 "Get nexthop by id"
- check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link"
+ check_nexthop "id 12" "id 12 via 172.16.1.2 dev veth1 scope link"
run_cmd "$IP nexthop del id 12"
log_test $? 0 "Delete nexthop by id"
@@ -685,7 +687,7 @@ ipv4_withv6_fcnal()
set +e
run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
log_test $? 0 "IPv6 nexthop with IPv4 route"
- check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1"
+ check_route "172.16.101.1" "172.16.101.1 nhid 11 via inet6 ${lladdr} dev veth1"
set -e
run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
@@ -694,11 +696,11 @@ ipv4_withv6_fcnal()
run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
log_test $? 0 "IPv6 nexthop with IPv4 route"
- check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+ check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
log_test $? 0 "IPv4 route with IPv6 gateway"
- check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1"
+ check_route "172.16.101.1" "172.16.101.1 via inet6 ${lladdr} dev veth1"
run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
log_test $? 2 "IPv4 route with invalid IPv6 gateway"
@@ -785,10 +787,10 @@ ipv4_fcnal_runtime()
log_test $? 0 "IPv4 route with device only nexthop"
check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1"
- run_cmd "$IP nexthop add id 122 group 21/85"
- run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+ run_cmd "$IP nexthop add id 123 group 21/85"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 123"
log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw"
- check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1"
+ check_route "172.16.101.1" "172.16.101.1 nhid 123 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop dev veth1 weight 1"
#
# IPv4 with IPv6
@@ -820,7 +822,7 @@ ipv4_fcnal_runtime()
run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
log_test $? 0 "IPv4 route with mixed v4-v6 multipath route"
- check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+ check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
log_test $? 0 "IPv6 nexthop with IPv4 route"
@@ -938,6 +940,20 @@ basic()
run_cmd "$IP nexthop add id 104 group 1 dev veth1"
log_test $? 2 "Nexthop group and device"
+ # Tests to ensure that flushing works as expected.
+ run_cmd "$IP nexthop add id 105 blackhole proto 99"
+ run_cmd "$IP nexthop add id 106 blackhole proto 100"
+ run_cmd "$IP nexthop add id 107 blackhole proto 99"
+ run_cmd "$IP nexthop flush proto 99"
+ check_nexthop "id 105" ""
+ check_nexthop "id 106" "id 106 blackhole proto 100"
+ check_nexthop "id 107" ""
+ run_cmd "$IP nexthop flush proto 100"
+ check_nexthop "id 106" ""
+
+ run_cmd "$IP nexthop flush proto 100"
+ log_test $? 0 "Test proto flush"
+
run_cmd "$IP nexthop add id 104 group 1 blackhole"
log_test $? 2 "Nexthop group and blackhole"
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 4465fc2dae14..6dd403103800 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -17,6 +17,8 @@ PAUSE=no
IP="ip -netns ns1"
NS_EXEC="ip netns exec ns1"
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
log_test()
{
local rc=$1
@@ -614,6 +616,20 @@ fib_nexthop_test()
cleanup
}
+fib_suppress_test()
+{
+ $IP link add dummy1 type dummy
+ $IP link set dummy1 up
+ $IP -6 route add default dev dummy1
+ $IP -6 rule add table main suppress_prefixlength 0
+ ping -f -c 1000 -W 1 1234::1 || true
+ $IP -6 rule del table main suppress_prefixlength 0
+ $IP link del dummy1
+
+ # If we got here without crashing, we're good.
+ return 0
+}
+
################################################################################
# Tests on route add and replace
@@ -1086,7 +1102,7 @@ ipv6_route_metrics_test()
log_test $rc 0 "Multipath route with mtu metric"
$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
- run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+ run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
log_test $? 0 "Using route with mtu metric"
run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo"
@@ -1422,6 +1438,27 @@ ipv4_addr_metric_test()
fi
log_test $rc 0 "Prefix route with metric on link up"
+ # explicitly check for metric changes on edge scenarios
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of .0/24 address"
+
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address with peer route"
+
$IP li del dummy1
$IP li del dummy2
cleanup
@@ -1463,6 +1500,55 @@ ipv4_route_metrics_test()
route_cleanup
}
+ipv4_del_addr_test()
+{
+ echo
+ echo "IPv4 delete address route tests"
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li set dummy1 up
+ $IP li add dummy2 type dummy
+ $IP li set dummy2 up
+ $IP li add red type vrf table 1111
+ $IP li set red up
+ $IP ro add vrf red unreachable default
+ $IP li set dummy2 vrf red
+
+ $IP addr add dev dummy1 172.16.104.1/24
+ $IP addr add dev dummy1 172.16.104.11/24
+ $IP addr add dev dummy2 172.16.104.1/24
+ $IP addr add dev dummy2 172.16.104.11/24
+ $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ set +e
+
+ # removing address from device in vrf should only remove route from vrf table
+ $IP addr del dev dummy2 172.16.104.11/24
+ $IP ro ls vrf red | grep -q 172.16.105.0/24
+ log_test $? 1 "Route removed from VRF when source address deleted"
+
+ $IP ro ls | grep -q 172.16.105.0/24
+ log_test $? 0 "Route in default VRF not removed"
+
+ $IP addr add dev dummy2 172.16.104.11/24
+ $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+
+ $IP addr del dev dummy1 172.16.104.11/24
+ $IP ro ls | grep -q 172.16.105.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
+ $IP ro ls vrf red | grep -q 172.16.105.0/24
+ log_test $? 0 "Route in VRF is not removed by address delete"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+
ipv4_route_v6_gw_test()
{
local rc
@@ -1591,10 +1677,12 @@ do
fib_carrier_test|carrier) fib_carrier_test;;
fib_rp_filter_test|rp_filter) fib_rp_filter_test;;
fib_nexthop_test|nexthop) fib_nexthop_test;;
+ fib_suppress_test|suppress) fib_suppress_test;;
ipv6_route_test|ipv6_rt) ipv6_route_test;;
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
ipv4_addr_metric) ipv4_addr_metric_test;;
+ ipv4_del_addr) ipv4_del_addr_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
diff --git a/tools/testing/selftests/net/fin_ack_lat.c b/tools/testing/selftests/net/fin_ack_lat.c
new file mode 100644
index 000000000000..70187494b57a
--- /dev/null
+++ b/tools/testing/selftests/net/fin_ack_lat.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+static int child_pid;
+
+static unsigned long timediff(struct timeval s, struct timeval e)
+{
+ unsigned long s_us, e_us;
+
+ s_us = s.tv_sec * 1000000 + s.tv_usec;
+ e_us = e.tv_sec * 1000000 + e.tv_usec;
+ if (s_us > e_us)
+ return 0;
+ return e_us - s_us;
+}
+
+static void client(int port)
+{
+ int sock = 0;
+ struct sockaddr_in addr, laddr;
+ socklen_t len = sizeof(laddr);
+ struct linger sl;
+ int flag = 1;
+ int buffer;
+ struct timeval start, end;
+ unsigned long lat, sum_lat = 0, nr_lat = 0;
+
+ while (1) {
+ gettimeofday(&start, NULL);
+
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0)
+ error(-1, errno, "socket creation");
+
+ sl.l_onoff = 1;
+ sl.l_linger = 0;
+ if (setsockopt(sock, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)))
+ error(-1, errno, "setsockopt(linger)");
+
+ if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
+ &flag, sizeof(flag)))
+ error(-1, errno, "setsockopt(nodelay)");
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(port);
+
+ if (inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr) <= 0)
+ error(-1, errno, "inet_pton");
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ error(-1, errno, "connect");
+
+ send(sock, &buffer, sizeof(buffer), 0);
+ if (read(sock, &buffer, sizeof(buffer)) == -1)
+ error(-1, errno, "waiting read");
+
+ gettimeofday(&end, NULL);
+ lat = timediff(start, end);
+ sum_lat += lat;
+ nr_lat++;
+ if (lat < 100000)
+ goto close;
+
+ if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1)
+ error(-1, errno, "getsockname");
+ printf("port: %d, lat: %lu, avg: %lu, nr: %lu\n",
+ ntohs(laddr.sin_port), lat,
+ sum_lat / nr_lat, nr_lat);
+close:
+ fflush(stdout);
+ close(sock);
+ }
+}
+
+static void server(int sock, struct sockaddr_in address)
+{
+ int accepted;
+ int addrlen = sizeof(address);
+ int buffer;
+
+ while (1) {
+ accepted = accept(sock, (struct sockaddr *)&address,
+ (socklen_t *)&addrlen);
+ if (accepted < 0)
+ error(-1, errno, "accept");
+
+ if (read(accepted, &buffer, sizeof(buffer)) == -1)
+ error(-1, errno, "read");
+ close(accepted);
+ }
+}
+
+static void sig_handler(int signum)
+{
+ kill(SIGTERM, child_pid);
+ exit(0);
+}
+
+int main(int argc, char const *argv[])
+{
+ int sock;
+ int opt = 1;
+ struct sockaddr_in address;
+ struct sockaddr_in laddr;
+ socklen_t len = sizeof(laddr);
+
+ if (signal(SIGTERM, sig_handler) == SIG_ERR)
+ error(-1, errno, "signal");
+
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0)
+ error(-1, errno, "socket");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT,
+ &opt, sizeof(opt)) == -1)
+ error(-1, errno, "setsockopt");
+
+ address.sin_family = AF_INET;
+ address.sin_addr.s_addr = INADDR_ANY;
+ /* dynamically allocate unused port */
+ address.sin_port = 0;
+
+ if (bind(sock, (struct sockaddr *)&address, sizeof(address)) < 0)
+ error(-1, errno, "bind");
+
+ if (listen(sock, 3) < 0)
+ error(-1, errno, "listen");
+
+ if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1)
+ error(-1, errno, "getsockname");
+
+ fprintf(stderr, "server port: %d\n", ntohs(laddr.sin_port));
+ child_pid = fork();
+ if (!child_pid)
+ client(ntohs(laddr.sin_port));
+ else
+ server(sock, laddr);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/fin_ack_lat.sh b/tools/testing/selftests/net/fin_ack_lat.sh
new file mode 100755
index 000000000000..a3ff6e0b2c7a
--- /dev/null
+++ b/tools/testing/selftests/net/fin_ack_lat.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test latency spikes caused by FIN/ACK handling race.
+
+set +x
+set -e
+
+tmpfile=$(mktemp /tmp/fin_ack_latency.XXXX.log)
+
+cleanup() {
+ kill $(pidof fin_ack_lat)
+ rm -f $tmpfile
+}
+
+trap cleanup EXIT
+
+do_test() {
+ RUNTIME=$1
+
+ ./fin_ack_lat | tee $tmpfile &
+ PID=$!
+
+ sleep $RUNTIME
+ NR_SPIKES=$(wc -l $tmpfile | awk '{print $1}')
+ if [ $NR_SPIKES -gt 0 ]
+ then
+ echo "FAIL: $NR_SPIKES spikes detected"
+ return 1
+ fi
+ return 0
+}
+
+do_test "30"
+echo "test done"
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 8553a67a2322..40b076983239 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -4,19 +4,21 @@
##############################################################################
# Defines
-DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
- | jq -r '.port | keys[]' | cut -d/ -f-2)
-if [ -z "$DEVLINK_DEV" ]; then
- echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
-fi
-if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
- echo "SKIP: devlink device's bus is not PCI"
- exit 1
-fi
+if [[ ! -v DEVLINK_DEV ]]; then
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ | jq -r '.port | keys[]' | cut -d/ -f-2)
+ if [ -z "$DEVLINK_DEV" ]; then
+ echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+ exit 1
+ fi
+ if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+ echo "SKIP: devlink device's bus is not PCI"
+ exit 1
+ fi
-DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
- -n | cut -d" " -f3)
+ DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+ -n | cut -d" " -f3)
+fi
##############################################################################
# Sanity checks
@@ -27,6 +29,12 @@ if [ $? -ne 0 ]; then
exit 1
fi
+devlink help 2>&1 | grep trap &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink trap support"
+ exit 1
+fi
+
##############################################################################
# Devlink helpers
@@ -190,3 +198,215 @@ devlink_tc_bind_pool_th_restore()
devlink sb tc bind set $port tc $tc type $dir \
pool ${orig[0]} th ${orig[1]}
}
+
+devlink_traps_num_get()
+{
+ devlink -j trap | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_traps_get()
+{
+ devlink -j trap | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_type_get()
+{
+ local trap_name=$1; shift
+
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].type'
+}
+
+devlink_trap_action_set()
+{
+ local trap_name=$1; shift
+ local action=$1; shift
+
+ # Pipe output to /dev/null to avoid expected warnings.
+ devlink trap set $DEVLINK_DEV trap $trap_name \
+ action $action &> /dev/null
+}
+
+devlink_trap_action_get()
+{
+ local trap_name=$1; shift
+
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].action'
+}
+
+devlink_trap_group_get()
+{
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].group'
+}
+
+devlink_trap_metadata_test()
+{
+ local trap_name=$1; shift
+ local metadata=$1; shift
+
+ devlink -jv trap show $DEVLINK_DEV trap $trap_name \
+ | jq -e '.[][][].metadata | contains(["'$metadata'"])' \
+ &> /dev/null
+}
+
+devlink_trap_rx_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_rx_bytes_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+devlink_traps_enable_all()
+{
+ local trap_name
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_action_set $trap_name "trap"
+ done
+}
+
+devlink_traps_disable_all()
+{
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_action_set $trap_name "drop"
+ done
+}
+
+devlink_trap_groups_get()
+{
+ devlink -j trap group | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_group_action_set()
+{
+ local group_name=$1; shift
+ local action=$1; shift
+
+ # Pipe output to /dev/null to avoid expected warnings.
+ devlink trap group set $DEVLINK_DEV group $group_name action $action \
+ &> /dev/null
+}
+
+devlink_trap_group_rx_packets_get()
+{
+ local group_name=$1; shift
+
+ devlink -js trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_group_rx_bytes_get()
+{
+ local group_name=$1; shift
+
+ devlink -js trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_group_stats_idle_test()
+{
+ local group_name=$1; shift
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+
+ t0_packets=$(devlink_trap_group_rx_packets_get $group_name)
+ t0_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_group_rx_packets_get $group_name)
+ t1_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+devlink_trap_exception_test()
+{
+ local trap_name=$1; shift
+ local group_name=$1; shift
+
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Trap stats idle when packets should have been trapped"
+
+ devlink_trap_group_stats_idle_test $group_name
+ check_fail $? "Trap group idle when packets should have been trapped"
+}
+
+devlink_trap_drop_test()
+{
+ local trap_name=$1; shift
+ local group_name=$1; shift
+ local dev=$1; shift
+
+ # This is the common part of all the tests. It checks that stats are
+ # initially idle, then non-idle after changing the trap action and
+ # finally idle again. It also makes sure the packets are dropped and
+ # never forwarded.
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle with initial drop action"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle with initial drop action"
+
+
+ devlink_trap_action_set $trap_name "trap"
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Trap stats idle after setting action to trap"
+ devlink_trap_group_stats_idle_test $group_name
+ check_fail $? "Trap group stats idle after setting action to trap"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Trap stats not idle after setting action to drop"
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Trap group stats not idle after setting action to drop"
+
+ tc_check_packets "dev $dev egress" 101 0
+ check_err $? "Packets were not dropped"
+}
+
+devlink_trap_drop_cleanup()
+{
+ local mz_pid=$1; shift
+ local dev=$1; shift
+ local proto=$1; shift
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower
+}
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
new file mode 100755
index 000000000000..eb8e2a23bbb4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -0,0 +1,318 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ same_speeds_autoneg_off
+ different_speeds_autoneg_off
+ combination_of_neg_on_and_off
+ advertise_subset_of_speeds
+ check_highest_speed_is_chosen
+ different_speeds_autoneg_on
+"
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
+
+same_speeds_autoneg_off()
+{
+ # Check that when each of the reported speeds is forced, the links come
+ # up and are operational.
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0))
+
+ for speed in "${speeds_arr[@]}"; do
+ RET=0
+ ethtool_set $h1 speed $speed autoneg off
+ ethtool_set $h2 speed $speed autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "speed $speed autoneg off"
+ log_test "force of same speed autoneg off"
+ log_info "speed = $speed"
+ done
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_off()
+{
+ # Test that when we force different speeds, links are not up and ping
+ # fails.
+ RET=0
+
+ local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $h1 speed $speed1 autoneg off
+ ethtool_set $h2 speed $speed2 autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_fail $? "ping with different speeds"
+
+ log_test "force of different speeds autoneg off"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+combination_of_neg_on_and_off()
+{
+ # Test that when one device is forced to a speed supported by both
+ # endpoints and the other device is configured to autoneg on, the links
+ # are up and ping passes.
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+
+ for speed in "${speeds_arr[@]}"; do
+ RET=0
+ ethtool_set $h1 speed $speed autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "h1-speed=$speed autoneg off, h2 autoneg on"
+ log_test "one side with autoneg off and another with autoneg on"
+ log_info "force speed = $speed"
+ done
+
+ ethtool -s $h1 autoneg on
+}
+
+hex_speed_value_get()
+{
+ local speed=$1; shift
+
+ local shift_size=${speed_values[$speed]}
+ speed=$((0x1 << $"shift_size"))
+ printf "%#x" "$speed"
+}
+
+subset_of_common_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver))
+ local speed_to_advertise=0
+ local speed_to_remove=${speeds_arr[0]}
+ speed_to_remove+='base'
+
+ local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver))
+
+ for speed in ${speeds_mode_arr[@]}; do
+ if [[ $speed != $speed_to_remove* ]]; then
+ speed=$(hex_speed_value_get $speed)
+ speed_to_advertise=$(($speed_to_advertise | \
+ $speed))
+ fi
+
+ done
+
+ # Convert to hex.
+ printf "%#x" "$speed_to_advertise"
+}
+
+speed_to_advertise_get()
+{
+ # The function returns the hex number that is composed by OR-ing all
+ # the modes corresponding to the provided speed.
+ local speed_without_mode=$1; shift
+ local supported_speeds=("$@"); shift
+ local speed_to_advertise=0
+
+ speed_without_mode+='base'
+
+ for speed in ${supported_speeds[@]}; do
+ if [[ $speed == $speed_without_mode* ]]; then
+ speed=$(hex_speed_value_get $speed)
+ speed_to_advertise=$(($speed_to_advertise | \
+ $speed))
+ fi
+
+ done
+
+ # Convert to hex.
+ printf "%#x" "$speed_to_advertise"
+}
+
+advertise_subset_of_speeds()
+{
+ # Test that when one device advertises a subset of speeds and another
+ # advertises a specific speed (but all modes of this speed), the links
+ # are up and ping passes.
+ RET=0
+
+ local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+ ethtool_set $h1 advertise $speed_1_to_advertise
+
+ if [ $RET != 0 ]; then
+ log_test "advertise subset of speeds"
+ return
+ fi
+
+ local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1))
+ # Check only speeds that h1 advertised. Remove the first speed.
+ unset speeds_arr_without_mode[0]
+ local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1))
+
+ for speed_value in ${speeds_arr_without_mode[@]}; do
+ RET=0
+ local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \
+ "${speeds_arr_with_mode[@]}")
+ ethtool_set $h2 advertise $speed_2_to_advertise
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+
+ log_test "advertise subset of speeds"
+ log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise"
+ done
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+check_highest_speed_is_chosen()
+{
+ # Test that when one device advertises a subset of speeds, the other
+ # chooses the highest speed. This test checks configuration without
+ # traffic.
+ RET=0
+
+ local max_speed
+ local chosen_speed
+ local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+
+ ethtool_set $h1 advertise $speed_to_advertise
+
+ if [ $RET != 0 ]; then
+ log_test "check highest speed"
+ return
+ fi
+
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+ # Remove the first speed, h1 does not advertise this speed.
+ unset speeds_arr[0]
+
+ max_speed=${speeds_arr[0]}
+ for current in ${speeds_arr[@]}; do
+ if [[ $current -gt $max_speed ]]; then
+ max_speed=$current
+ fi
+ done
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ chosen_speed=$(ethtool $h1 | grep 'Speed:')
+ chosen_speed=${chosen_speed%"Mb/s"*}
+ chosen_speed=${chosen_speed#*"Speed: "}
+ ((chosen_speed == max_speed))
+ check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed"
+
+ log_test "check highest speed"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_on()
+{
+ # Test that when we configure links to advertise different speeds,
+ # links are not up and ping fails.
+ RET=0
+
+ local -a speeds=($(different_speeds_get $h1 $h2 1 1))
+ local speed1=${speeds[0]}
+ local speed2=${speeds[1]}
+
+ speed1=$(hex_speed_value_get $speed1)
+ speed2=$(hex_speed_value_get $speed2)
+
+ ethtool_set $h1 advertise $speed1
+ ethtool_set $h2 advertise $speed2
+
+ if (($RET)); then
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_fail $? "ping with different speeds autoneg on"
+ fi
+
+ log_test "advertise different speeds autoneg on"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+declare -gA speed_values
+eval "speed_values=($(speeds_arr_get))"
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
new file mode 100755
index 000000000000..925d229a59d8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+speeds_arr_get()
+{
+ cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \
+ {sub(/,$/, "") \
+ sub(/ETHTOOL_LINK_MODE_/,"") \
+ sub(/_BIT/,"") \
+ sub(/_Full/,"/Full") \
+ sub(/_Half/,"/Half");\
+ print "["$1"]="$3}'
+
+ awk "${cmd}" /usr/include/linux/ethtool.h
+}
+
+ethtool_set()
+{
+ local cmd="$@"
+ local out=$(ethtool -s $cmd 2>&1 | wc -l)
+
+ check_err $out "error in configuration. $cmd"
+}
+
+dev_speeds_get()
+{
+ local dev=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+ local speeds_str
+
+ if (($adver)); then
+ mode="Advertised link modes"
+ else
+ mode="Supported link modes"
+ fi
+
+ speeds_str=$(ethtool "$dev" | \
+ # Snip everything before the link modes section.
+ sed -n '/'"$mode"':/,$p' | \
+ # Quit processing the rest at the start of the next section.
+ # When checking, skip the header of this section (hence the 2,).
+ sed -n '2,${/^[\t][^ \t]/q};p' | \
+ # Drop the section header of the current section.
+ cut -d':' -f2)
+
+ local -a speeds_arr=($speeds_str)
+ if [[ $with_mode -eq 0 ]]; then
+ for ((i=0; i<${#speeds_arr[@]}; i++)); do
+ speeds_arr[$i]=${speeds_arr[$i]%base*}
+ done
+ fi
+ echo ${speeds_arr[@]}
+}
+
+common_speeds_get()
+{
+ dev1=$1; shift
+ dev2=$1; shift
+ with_mode=$1; shift
+ adver=$1; shift
+
+ local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver))
+ local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver))
+
+ comm -12 \
+ <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
+ <(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
+}
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
new file mode 100644
index 000000000000..66496659bea7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -0,0 +1,873 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Various helpers and tests to verify FIB offload.
+
+__fib_trap_check()
+{
+ local ns=$1; shift
+ local family=$1; shift
+ local route=$1; shift
+ local should_fail=$1; shift
+ local ret
+
+ ip -n $ns -j -p -$family route show $route \
+ | jq -e '.[]["flags"] | contains(["trap"])' &> /dev/null
+ ret=$?
+ if [[ $should_fail == "true" ]]; then
+ if [[ $ret -ne 0 ]]; then
+ return 0
+ else
+ return 1
+ fi
+ fi
+
+ return $ret
+}
+
+fib_trap_check()
+{
+ local ns=$1; shift
+ local family=$1; shift
+ local route=$1; shift
+ local should_fail=$1; shift
+
+ busywait 5000 __fib_trap_check $ns $family "$route" $should_fail
+}
+
+fib4_trap_check()
+{
+ local ns=$1; shift
+ local route=$1; shift
+ local should_fail=$1; shift
+
+ fib_trap_check $ns 4 "$route" $should_fail
+}
+
+fib6_trap_check()
+{
+ local ns=$1; shift
+ local route=$1; shift
+ local should_fail=$1; shift
+
+ fib_trap_check $ns 6 "$route" $should_fail
+}
+
+fib_ipv4_identical_routes_test()
+{
+ local ns=$1; shift
+ local i
+
+ RET=0
+
+ for i in $(seq 1 3); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ done
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route append 192.0.2.0/24 dev dummy2 tos 0 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy2 tos 0 metric 1024" true
+ check_err $? "Appended route in hardware when should not"
+
+ ip -n $ns route prepend 192.0.2.0/24 dev dummy3 tos 0 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy3 tos 0 metric 1024" false
+ check_err $? "Prepended route not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
+ check_err $? "Route was not replaced in hardware by prepended one"
+
+ log_test "IPv4 identical routes"
+
+ for i in $(seq 1 3); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv4_tos_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false
+ check_err $? "Highest TOS route not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
+ check_err $? "Lowest TOS route still in hardware when should not"
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true
+ check_err $? "Middle TOS route in hardware when should not"
+
+ log_test "IPv4 routes with TOS"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_metric_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1022
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1022" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" true
+ check_err $? "Highest metric route still in hardware when should not"
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1023
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1023" true
+ check_err $? "Middle metric route in hardware when should not"
+
+ log_test "IPv4 routes with metric"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_replace_test()
+{
+ local ns=$1; shift
+ local i
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ done
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false
+ check_err $? "Replacement route not in hardware when should"
+
+ # Add a route with an higher metric and make sure that replacing it
+ # does not affect the lower metric one.
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025
+ ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1025
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1025" true
+ check_err $? "Highest metric route in hardware when should not"
+
+ log_test "IPv4 route replace"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv4_delete_test()
+{
+ local ns=$1; shift
+ local metric
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ # Insert multiple routes with the same prefix and length and varying
+ # metrics. Make sure that throughout delete operations the lowest
+ # metric route is the one in hardware.
+ for metric in $(seq 1024 1026); do
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric
+ done
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1026
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false
+ check_err $? "Sole route not in hardware when should"
+
+ log_test "IPv4 route delete"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_plen_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ # Add two routes with the same key and different prefix length and
+ # make sure both are in hardware. It can be verfied that both are
+ # sharing the same leaf by checking the /proc/net/fib_trie
+ ip -n $ns route add 192.0.2.0/24 dev dummy1
+ ip -n $ns route add 192.0.2.0/25 dev dummy1
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false
+ check_err $? "/24 not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false
+ check_err $? "/25 not in hardware when should"
+
+ log_test "IPv4 routes with different prefix length"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_replay_metric_test()
+{
+ local ns=$1; shift
+ local devlink_dev=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025
+
+ devlink -N $ns dev reload $devlink_dev
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" true
+ check_err $? "Highest metric route in hardware when should not"
+
+ log_test "IPv4 routes replay - metric"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_replay_tos_test()
+{
+ local ns=$1; shift
+ local devlink_dev=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1
+
+ devlink -N $ns dev reload $devlink_dev
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false
+ check_err $? "Highest TOS route not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true
+ check_err $? "Lowest TOS route in hardware when should not"
+
+ log_test "IPv4 routes replay - TOS"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_replay_plen_test()
+{
+ local ns=$1; shift
+ local devlink_dev=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 192.0.2.0/24 dev dummy1
+ ip -n $ns route add 192.0.2.0/25 dev dummy1
+
+ devlink -N $ns dev reload $devlink_dev
+
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false
+ check_err $? "/24 not in hardware when should"
+
+ fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false
+ check_err $? "/25 not in hardware when should"
+
+ log_test "IPv4 routes replay - prefix length"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv4_flush_test()
+{
+ local ns=$1; shift
+ local metric
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ # Exercise the routes flushing code paths by inserting various
+ # prefix routes on a netdev and then deleting it.
+ for metric in $(seq 1 20); do
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric
+ done
+
+ ip -n $ns link del dev dummy1
+
+ log_test "IPv4 routes flushing"
+}
+
+fib_ipv6_add_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ done
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route append 2001:db8:1::/64 dev dummy2 metric 1024
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" true
+ check_err $? "Route in hardware when should not"
+
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware after appending route"
+
+ log_test "IPv6 single route add"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_metric_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ ip -n $ns link add name dummy1 type dummy
+ ip -n $ns link set dev dummy1 up
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1022
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1022" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" true
+ check_err $? "Highest metric route still in hardware when should not"
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1023
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1023" true
+ check_err $? "Middle metric route in hardware when should not"
+
+ log_test "IPv6 routes with metric"
+
+ ip -n $ns link del dev dummy1
+}
+
+fib_ipv6_append_single_test()
+{
+ local ns=$1; shift
+
+ # When an IPv6 multipath route is added without the 'nexthop' keyword,
+ # different code paths are taken compared to when the keyword is used.
+ # This test tries to verify the former.
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1024
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1024
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware after appending"
+
+ ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1025
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Route in hardware when should not"
+
+ ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1025
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Route in hardware when should not after appending"
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ log_test "IPv6 append single route without 'nexthop' keyword"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_replace_single_test()
+{
+ local ns=$1; shift
+ local i
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ done
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1024
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false
+ check_err $? "Replacement route not in hardware when should"
+
+ # Add a route with an higher metric and make sure that replacing it
+ # does not affect the lower metric one.
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1025
+ ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1025
+
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1025" true
+ check_err $? "Highest metric route in hardware when should not"
+
+ log_test "IPv6 single route replace"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_metric_multipath_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1022 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1022" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1023 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" true
+ check_err $? "Highest metric route still in hardware when should not"
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1023" true
+ check_err $? "Middle metric route in hardware when should not"
+
+ log_test "IPv6 multipath routes with metric"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_append_multipath_test()
+{
+ local ns=$1; shift
+
+ RET=0
+
+ for i in $(seq 1 3); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route append 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:2::2 dev dummy2 \
+ nexthop via 2001:db8:3::2 dev dummy3
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware after appending"
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Route in hardware when should not"
+
+ ip -n $ns route append 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:2::2 dev dummy2 \
+ nexthop via 2001:db8:3::2 dev dummy3
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Route in hardware when should not after appending"
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+
+ log_test "IPv6 append multipath route with 'nexthop' keyword"
+
+ for i in $(seq 1 3); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_replace_multipath_test()
+{
+ local ns=$1; shift
+ local i
+
+ RET=0
+
+ for i in $(seq 1 3); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route replace 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:3::2 dev dummy3
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Replacement route not in hardware when should"
+
+ # Add a route with an higher metric and make sure that replacing it
+ # does not affect the lower metric one.
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route replace 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:3::2 dev dummy3
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Lowest metric route not in hardware when should"
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Highest metric route in hardware when should not"
+
+ log_test "IPv6 multipath route replace"
+
+ for i in $(seq 1 3); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_append_multipath_to_single_test()
+{
+ local ns=$1; shift
+
+ # Test that when the first route in the leaf is not a multipath route
+ # and we try to append a multipath route with the same metric to it, it
+ # is not notified.
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware when should"
+
+ ip -n $ns route append 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy2 metric 1024" true
+ check_err $? "Route in hardware when should not"
+
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware after append"
+
+ log_test "IPv6 append multipath route to non-multipath route"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_delete_single_test()
+{
+ local ns=$1; shift
+
+ # Test various deletion scenarios, where only a single route is
+ # deleted from the FIB node.
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ # Test deletion of a single route when it is the only route in the FIB
+ # node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024
+
+ log_test "IPv6 delete sole single route"
+
+ # Test that deletion of last route does not affect the first one.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025
+
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false
+ check_err $? "Route not in hardware after deleting higher metric route"
+
+ log_test "IPv6 delete single route not in hardware"
+
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024
+
+ # Test that first route is replaced by next single route in the FIB
+ # node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024
+
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false
+ check_err $? "Route not in hardware after deleting lowest metric route"
+
+ log_test "IPv6 delete single route - replaced by single"
+
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025
+
+ # Test that first route is replaced by next multipath route in the FIB
+ # node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false
+ check_err $? "Route not in hardware after deleting lowest metric route"
+
+ log_test "IPv6 delete single route - replaced by multipath"
+
+ ip -n $ns route del 2001:db8:10::/64 metric 1025
+
+ # Test deletion of a single nexthop from a multipath route.
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route del 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware after deleting a single nexthop"
+
+ log_test "IPv6 delete single nexthop"
+
+ ip -n $ns route del 2001:db8:10::/64 metric 1024
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_delete_multipath_test()
+{
+ local ns=$1; shift
+
+ # Test various deletion scenarios, where an entire multipath route is
+ # deleted from the FIB node.
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ # Test deletion of a multipath route when it is the only route in the
+ # FIB node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route del 2001:db8:10::/64 metric 1024
+
+ log_test "IPv6 delete sole multipath route"
+
+ # Test that deletion of last route does not affect the first one.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route del 2001:db8:10::/64 metric 1025
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "Route not in hardware after deleting higher metric route"
+
+ log_test "IPv6 delete multipath route not in hardware"
+
+ ip -n $ns route del 2001:db8:10::/64 metric 1024
+
+ # Test that first route is replaced by next single route in the FIB
+ # node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025
+ ip -n $ns route del 2001:db8:10::/64 metric 1024
+
+ fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false
+ check_err $? "Route not in hardware after deleting lowest metric route"
+
+ log_test "IPv6 delete multipath route - replaced by single"
+
+ ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025
+
+ # Test that first route is replaced by next multipath route in the FIB
+ # node.
+ RET=0
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route del 2001:db8:10::/64 metric 1024
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false
+ check_err $? "Route not in hardware after deleting lowest metric route"
+
+ log_test "IPv6 delete multipath route - replaced by multipath"
+
+ ip -n $ns route del 2001:db8:10::/64 metric 1025
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_replay_single_test()
+{
+ local ns=$1; shift
+ local devlink_dev=$1; shift
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ done
+
+ ip -n $ns route add 2001:db8:1::/64 dev dummy1
+ ip -n $ns route append 2001:db8:1::/64 dev dummy2
+
+ devlink -N $ns dev reload $devlink_dev
+
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy1" false
+ check_err $? "First route not in hardware when should"
+
+ fib6_trap_check $ns "2001:db8:1::/64 dev dummy2" true
+ check_err $? "Second route in hardware when should not"
+
+ log_test "IPv6 routes replay - single route"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
+
+fib_ipv6_replay_multipath_test()
+{
+ local ns=$1; shift
+ local devlink_dev=$1; shift
+
+ RET=0
+
+ for i in $(seq 1 2); do
+ ip -n $ns link add name dummy$i type dummy
+ ip -n $ns link set dev dummy$i up
+ ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i
+ done
+
+ ip -n $ns route add 2001:db8:10::/64 metric 1024 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+ ip -n $ns route add 2001:db8:10::/64 metric 1025 \
+ nexthop via 2001:db8:1::2 dev dummy1 \
+ nexthop via 2001:db8:2::2 dev dummy2
+
+ devlink -N $ns dev reload $devlink_dev
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false
+ check_err $? "First route not in hardware when should"
+
+ fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true
+ check_err $? "Second route in hardware when should not"
+
+ log_test "IPv6 routes replay - multipath route"
+
+ for i in $(seq 1 2); do
+ ip -n $ns link del dev dummy$i
+ done
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9385dc971269..2f5da414aaa7 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -18,6 +18,8 @@ NETIF_CREATE=${NETIF_CREATE:=yes}
MCD=${MCD:=smcrouted}
MC_CLI=${MC_CLI:=smcroutectl}
PING_TIMEOUT=${PING_TIMEOUT:=5}
+WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -223,33 +225,119 @@ log_info()
echo "INFO: $msg"
}
+busywait()
+{
+ local timeout=$1; shift
+
+ local start_time="$(date -u +%s%3N)"
+ while true
+ do
+ local out
+ out=$("$@")
+ local ret=$?
+ if ((!ret)); then
+ echo -n "$out"
+ return 0
+ fi
+
+ local current_time="$(date -u +%s%3N)"
+ if ((current_time - start_time > timeout)); then
+ echo -n "$out"
+ return 1
+ fi
+ done
+}
+
+until_counter_is()
+{
+ local value=$1; shift
+ local current=$("$@")
+
+ echo $((current))
+ ((current >= value))
+}
+
+busywait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ busywait "$timeout" until_counter_is $((base + delta)) "$@"
+}
+
setup_wait_dev()
{
local dev=$1; shift
+ local wait_time=${1:-$WAIT_TIME}; shift
+
+ setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
+
+ if (($?)); then
+ check_err 1
+ log_test setup_wait_dev ": Interface $dev does not come up."
+ exit 1
+ fi
+}
+
+setup_wait_dev_with_timeout()
+{
+ local dev=$1; shift
+ local max_iterations=${1:-$WAIT_TIMEOUT}; shift
+ local wait_time=${1:-$WAIT_TIME}; shift
+ local i
- while true; do
+ for ((i = 1; i <= $max_iterations; ++i)); do
ip link show dev $dev up \
| grep 'state UP' &> /dev/null
if [[ $? -ne 0 ]]; then
sleep 1
else
- break
+ sleep $wait_time
+ return 0
fi
done
+
+ return 1
}
setup_wait()
{
local num_netifs=${1:-$NUM_NETIFS}
+ local i
for ((i = 1; i <= num_netifs; ++i)); do
- setup_wait_dev ${NETIFS[p$i]}
+ setup_wait_dev ${NETIFS[p$i]} 0
done
# Make sure links are ready.
sleep $WAIT_TIME
}
+cmd_jq()
+{
+ local cmd=$1
+ local jq_exp=$2
+ local jq_opts=$3
+ local ret
+ local output
+
+ output="$($cmd)"
+ # it the command fails, return error right away
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ output=$(echo $output | jq -r $jq_opts "$jq_exp")
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output
+ # return success only in case of non-empty output
+ [ ! -z "$output" ]
+}
+
lldpad_app_wait_set()
{
local dev=$1; shift
@@ -505,9 +593,10 @@ tc_rule_stats_get()
local dev=$1; shift
local pref=$1; shift
local dir=$1; shift
+ local selector=${1:-.packets}; shift
tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
- | jq '.[1].options.actions[].stats.packets'
+ | jq ".[1].options.actions[].stats$selector"
}
ethtool_stats_get()
@@ -518,6 +607,30 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+humanize()
+{
+ local speed=$1; shift
+
+ for unit in bps Kbps Mbps Gbps; do
+ if (($(echo "$speed < 1024" | bc))); then
+ break
+ fi
+
+ speed=$(echo "scale=1; $speed / 1024" | bc)
+ done
+
+ echo "$speed${unit}"
+}
+
+rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $((8 * (t1 - t0) / interval))
+}
+
mac_get()
{
local if_name=$1
@@ -1018,3 +1131,21 @@ flood_test()
flood_unicast_test $br_port $host1_if $host2_if
flood_multicast_test $br_port $host1_if $host2_if
}
+
+start_traffic()
+{
+ local h_in=$1; shift # Where the traffic egresses the host
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+
+ $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+ -a own -b $dmac -t udp -q &
+ sleep 1
+}
+
+stop_traffic()
+{
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait %%; } 2>/dev/null
+}
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh
index 6e4626ae71b0..8f4057310b5b 100755
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ b/tools/testing/selftests/net/forwarding/loopback.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ALL_TESTS="loopback_test"
NUM_NETIFS=2
source tc_common.sh
@@ -72,6 +75,11 @@ setup_prepare()
h1_create
h2_create
+
+ if ethtool -k $h1 | grep loopback | grep -q fixed; then
+ log_test "SKIP: dev $h1 does not support loopback feature"
+ exit $ksft_skip
+ fi
}
cleanup()
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index a75cb51cc5bd..057f91b05098 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,9 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6"
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ sip_in_class_e
+ mc_mac_mismatch
+ ipv4_sip_equal_dip
+ ipv6_sip_equal_dip
+ ipv4_dip_link_local
+"
+
NUM_NETIFS=4
source lib.sh
+source tc_common.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
h1_create()
{
@@ -64,6 +78,8 @@ router_create()
ip link set dev $rp1 up
ip link set dev $rp2 up
+ tc qdisc add dev $rp2 clsact
+
ip address add 192.0.2.1/24 dev $rp1
ip address add 2001:db8:1::1/64 dev $rp1
@@ -79,10 +95,31 @@ router_destroy()
ip address del 2001:db8:1::1/64 dev $rp1
ip address del 192.0.2.1/24 dev $rp1
+ tc qdisc del dev $rp2 clsact
+
ip link set dev $rp2 down
ip link set dev $rp1 down
}
+start_mcd()
+{
+ SMCROUTEDIR="$(mktemp -d)"
+
+ for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ $SMCROUTEDIR/$table_name.conf
+ done
+
+ $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+ -P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+ pkill $MCD
+ rm -rf $SMCROUTEDIR
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -91,6 +128,10 @@ setup_prepare()
rp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
+ rp1mac=$(mac_get $rp1)
+
+ start_mcd
+
vrf_prepare
h1_create
@@ -113,6 +154,8 @@ cleanup()
h1_destroy
vrf_cleanup
+
+ kill_mcd
}
ping_ipv4()
@@ -125,6 +168,150 @@ ping_ipv6()
ping6_test $h1 2001:db8:2::2
}
+sip_in_class_e()
+{
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$rp1.rp_filter 0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower src_ip 240.0.0.1 ip_proto udp action pass
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 240.0.0.1 -b $rp1mac -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP in class E"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf.$rp1.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+create_mcast_sg()
+{
+ local if_name=$1; shift
+ local s_addr=$1; shift
+ local mcast=$1; shift
+ local dest_ifs=${@}
+
+ $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+}
+
+delete_mcast_sg()
+{
+ local if_name=$1; shift
+ local s_addr=$1; shift
+ local mcast=$1; shift
+ local dest_ifs=${@}
+
+ $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+}
+
+__mc_mac_mismatch()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local dmac=01:02:03:04:05:06
+
+ RET=0
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower dst_ip $dip action pass
+
+ create_mcast_sg $rp1 $sip $dip $rp2
+
+ $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $dmac \
+ -B $dip -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Multicast MAC mismatch: $desc"
+
+ delete_mcast_sg $rp1 $sip $dip $rp2
+ tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
+}
+
+mc_mac_mismatch()
+{
+ __mc_mac_mismatch "IPv4" "ip" 192.0.2.2 225.1.2.3
+ __mc_mac_mismatch "IPv6" "ipv6" 2001:db8:1::2 ff0e::3 "-6"
+}
+
+ipv4_sip_equal_dip()
+{
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$rp1.rp_filter 0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower src_ip 198.51.100.2 action pass
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 198.51.100.2 -b $rp1mac -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP is equal to destination IP: IPv4"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf.$rp1.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+ipv6_sip_equal_dip()
+{
+ RET=0
+
+ tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::2 action pass
+
+ $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 2001:db8:2::2 -b $rp1mac -B 2001:db8:2::2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP is equal to destination IP: IPv6"
+
+ tc filter del dev $rp2 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+ipv4_dip_link_local()
+{
+ local dip=169.254.1.1
+
+ RET=0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip $dip action pass
+
+ ip neigh add 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2
+ ip route add 169.254.1.0/24 dev $rp2
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $rp1mac -B $dip -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "IPv4 destination IP is link-local"
+
+ ip route del 169.254.1.0/24 dev $rp2
+ ip neigh del 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index fef88eb4b873..fa6a88c50750 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -36,7 +36,7 @@ h2_destroy()
{
ip -6 route del 2001:db8:1::/64 vrf v$h2
ip -4 route del 192.0.2.0/28 vrf v$h2
- simple_if_fini $h2 192.0.2.130/28
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
}
router_create()
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
new file mode 100755
index 000000000000..40e0ad1bc4f2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in slowpath.
+lib_dir=.
+source sch_ets_core.sh
+
+ALL_TESTS="
+ ping_ipv4
+ priomap_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+ classifier_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+"
+
+switch_create()
+{
+ ets_switch_create
+
+ # Create a bottleneck so that the DWRR process can kick in.
+ tc qdisc add dev $swp2 root handle 1: tbf \
+ rate 1Gbit burst 1Mbit latency 100ms
+ PARENT="parent 1:"
+}
+
+switch_destroy()
+{
+ ets_switch_destroy
+ tc qdisc del dev $swp2 root
+}
+
+# Callback from sch_ets_tests.sh
+get_stats()
+{
+ local stream=$1; shift
+
+ link_stats_get $h2.1$stream rx bytes
+}
+
+ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
new file mode 100644
index 000000000000..f906fcc66572
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
@@ -0,0 +1,300 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a template for ETS Qdisc test.
+#
+# This test sends from H1 several traffic streams with 802.1p-tagged packets.
+# The tags are used at $swp1 to prioritize the traffic. Each stream is then
+# queued at a different ETS band according to the assigned priority. After
+# runnig for a while, counters at H2 are consulted to determine whether the
+# traffic scheduling was according to the ETS configuration.
+#
+# This template is supposed to be embedded by a test driver, which implements
+# statistics collection, any HW-specific stuff, and prominently configures the
+# system to assure that there is overcommitment at $swp2. That is necessary so
+# that the ETS traffic selection algorithm kicks in and has to schedule some
+# traffic at the expense of other.
+#
+# A driver for veth-based testing is in sch_ets.sh, an example of a driver for
+# an offloaded data path is in selftests/drivers/net/mlxsw/sch_ets.sh.
+#
+# +---------------------------------------------------------------------+
+# | H1 |
+# | + $h1.10 + $h1.11 + $h1.12 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 | 192.0.2.33/28 |
+# | | egress-qos-map | egress-qos-map | egress-qos-map |
+# | | 0:0 | 0:1 | 0:2 |
+# | \____________________ | ____________________/ |
+# | \|/ |
+# | + $h1 |
+# +---------------------------|-----------------------------------------+
+# |
+# +---------------------------|-----------------------------------------+
+# | SW + $swp1 |
+# | | >1Gbps |
+# | ____________________/|\____________________ |
+# | / | \ |
+# | +--|----------------+ +--|----------------+ +--|----------------+ |
+# | | + $swp1.10 | | + $swp1.11 | | + $swp1.12 | |
+# | | ingress-qos-map| | ingress-qos-map| | ingress-qos-map| |
+# | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | |
+# | | | | | | | |
+# | | BR10 | | BR11 | | BR12 | |
+# | | | | | | | |
+# | | + $swp2.10 | | + $swp2.11 | | + $swp2.12 | |
+# | +--|----------------+ +--|----------------+ +--|----------------+ |
+# | \____________________ | ____________________/ |
+# | \|/ |
+# | + $swp2 |
+# | | 1Gbps (ethtool or HTB qdisc) |
+# | | qdisc ets quanta $W0 $W1 $W2 |
+# | | priomap 0 1 2 |
+# +---------------------------|-----------------------------------------+
+# |
+# +---------------------------|-----------------------------------------+
+# | H2 + $h2 |
+# | ____________________/|\____________________ |
+# | / | \ |
+# | + $h2.10 + $h2.11 + $h2.12 |
+# | 192.0.2.2/28 192.0.2.18/28 192.0.2.34/28 |
+# +---------------------------------------------------------------------+
+
+NUM_NETIFS=4
+CHECK_TC=yes
+source $lib_dir/lib.sh
+source $lib_dir/sch_ets_tests.sh
+
+PARENT=root
+QDISC_DEV=
+
+sip()
+{
+ echo 192.0.2.$((16 * $1 + 1))
+}
+
+dip()
+{
+ echo 192.0.2.$((16 * $1 + 2))
+}
+
+# Callback from sch_ets_tests.sh
+ets_start_traffic()
+{
+ local dst_mac=$(mac_get $h2)
+ local i=$1; shift
+
+ start_traffic $h1.1$i $(sip $i) $(dip $i) $dst_mac
+}
+
+ETS_CHANGE_QDISC=
+
+priomap_mode()
+{
+ echo "Running in priomap mode"
+ ets_delete_qdisc
+ ETS_CHANGE_QDISC=ets_change_qdisc_priomap
+}
+
+classifier_mode()
+{
+ echo "Running in classifier mode"
+ ets_delete_qdisc
+ ETS_CHANGE_QDISC=ets_change_qdisc_classifier
+}
+
+ets_change_qdisc_priomap()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local priomap=$1; shift
+ local quanta=("${@}")
+
+ local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi)
+
+ tc qdisc $op dev $dev $PARENT handle 10: ets \
+ $(if ((nstrict)); then echo strict $nstrict; fi) \
+ $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) \
+ priomap $priomap
+ QDISC_DEV=$dev
+}
+
+ets_change_qdisc_classifier()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local priomap=$1; shift
+ local quanta=("${@}")
+
+ local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi)
+
+ tc qdisc $op dev $dev $PARENT handle 10: ets \
+ $(if ((nstrict)); then echo strict $nstrict; fi) \
+ $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi)
+
+ if [[ $op == add ]]; then
+ local prio=0
+ local band
+
+ for band in $priomap; do
+ tc filter add dev $dev parent 10: basic \
+ match "meta(priority eq $prio)" \
+ flowid 10:$((band + 1))
+ ((prio++))
+ done
+ fi
+ QDISC_DEV=$dev
+}
+
+# Callback from sch_ets_tests.sh
+ets_change_qdisc()
+{
+ if [[ -z "$ETS_CHANGE_QDISC" ]]; then
+ exit 1
+ fi
+ $ETS_CHANGE_QDISC "$@"
+}
+
+ets_delete_qdisc()
+{
+ if [[ -n $QDISC_DEV ]]; then
+ tc qdisc del dev $QDISC_DEV $PARENT
+ QDISC_DEV=
+ fi
+}
+
+h1_create()
+{
+ local i;
+
+ simple_if_init $h1
+ mtu_set $h1 9900
+ for i in {0..2}; do
+ vlan_create $h1 1$i v$h1 $(sip $i)/28
+ ip link set dev $h1.1$i type vlan egress 0:$i
+ done
+}
+
+h1_destroy()
+{
+ local i
+
+ for i in {0..2}; do
+ vlan_destroy $h1 1$i
+ done
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ local i
+
+ simple_if_init $h2
+ mtu_set $h2 9900
+ for i in {0..2}; do
+ vlan_create $h2 1$i v$h2 $(dip $i)/28
+ done
+}
+
+h2_destroy()
+{
+ local i
+
+ for i in {0..2}; do
+ vlan_destroy $h2 1$i
+ done
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+ets_switch_create()
+{
+ local i
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 9900
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 9900
+
+ for i in {0..2}; do
+ vlan_create $swp1 1$i
+ ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2
+
+ vlan_create $swp2 1$i
+
+ ip link add dev br1$i type bridge
+ ip link set dev $swp1.1$i master br1$i
+ ip link set dev $swp2.1$i master br1$i
+
+ ip link set dev br1$i up
+ ip link set dev $swp1.1$i up
+ ip link set dev $swp2.1$i up
+ done
+}
+
+ets_switch_destroy()
+{
+ local i
+
+ ets_delete_qdisc
+
+ for i in {0..2}; do
+ ip link del dev br1$i
+ vlan_destroy $swp2 1$i
+ vlan_destroy $swp1 1$i
+ done
+
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ put=$swp2
+ hut=$h2
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 $(dip 0) " vlan 10"
+ ping_test $h1.11 $(dip 1) " vlan 11"
+ ping_test $h1.12 $(dip 2) " vlan 12"
+}
+
+ets_run()
+{
+ trap cleanup EXIT
+
+ setup_prepare
+ setup_wait
+
+ tests_run
+
+ exit $EXIT_STATUS
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
new file mode 100644
index 000000000000..3c3b204d47e8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -0,0 +1,227 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Global interface:
+# $put -- port under test (e.g. $swp2)
+# get_stats($band) -- A function to collect stats for band
+# ets_start_traffic($band) -- Start traffic for this band
+# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
+
+# WS describes the Qdisc configuration. It has one value per band (so the
+# number of array elements indicates the number of bands). If the value is
+# 0, it is a strict band, otherwise the it's a DRR band and the value is
+# that band's quantum.
+declare -a WS
+
+qdisc_describe()
+{
+ local nbands=${#WS[@]}
+ local nstrict=0
+ local i
+
+ for ((i = 0; i < nbands; i++)); do
+ if ((!${WS[$i]})); then
+ : $((nstrict++))
+ fi
+ done
+
+ echo -n "ets bands $nbands"
+ if ((nstrict)); then
+ echo -n " strict $nstrict"
+ fi
+ if ((nstrict < nbands)); then
+ echo -n " quanta"
+ for ((i = nstrict; i < nbands; i++)); do
+ echo -n " ${WS[$i]}"
+ done
+ fi
+}
+
+__strict_eval()
+{
+ local desc=$1; shift
+ local d=$1; shift
+ local total=$1; shift
+ local above=$1; shift
+
+ RET=0
+
+ if ((! total)); then
+ check_err 1 "No traffic observed"
+ log_test "$desc"
+ return
+ fi
+
+ local ratio=$(echo "scale=2; 100 * $d / $total" | bc -l)
+ if ((above)); then
+ test $(echo "$ratio > 95.0" | bc -l) -eq 1
+ check_err $? "Not enough traffic"
+ log_test "$desc"
+ log_info "Expected ratio >95% Measured ratio $ratio"
+ else
+ test $(echo "$ratio < 5" | bc -l) -eq 1
+ check_err $? "Too much traffic"
+ log_test "$desc"
+ log_info "Expected ratio <5% Measured ratio $ratio"
+ fi
+}
+
+strict_eval()
+{
+ __strict_eval "$@" 1
+}
+
+notraf_eval()
+{
+ __strict_eval "$@" 0
+}
+
+__ets_dwrr_test()
+{
+ local -a streams=("$@")
+
+ local low_stream=${streams[0]}
+ local seen_strict=0
+ local -a t0 t1 d
+ local stream
+ local total
+ local i
+
+ echo "Testing $(qdisc_describe), streams ${streams[@]}"
+
+ for stream in ${streams[@]}; do
+ ets_start_traffic $stream
+ done
+
+ sleep 10
+
+ t0=($(for stream in ${streams[@]}; do
+ get_stats $stream
+ done))
+
+ sleep 10
+
+ t1=($(for stream in ${streams[@]}; do
+ get_stats $stream
+ done))
+ d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
+ echo $((${t1[$i]} - ${t0[$i]}))
+ done))
+ total=$(echo ${d[@]} | sed 's/ /+/g' | bc)
+
+ for ((i = 0; i < ${#streams[@]}; i++)); do
+ local stream=${streams[$i]}
+ if ((seen_strict)); then
+ notraf_eval "band $stream" ${d[$i]} $total
+ elif ((${WS[$stream]} == 0)); then
+ strict_eval "band $stream" ${d[$i]} $total
+ seen_strict=1
+ elif ((stream == low_stream)); then
+ # Low stream is used as DWRR evaluation reference.
+ continue
+ else
+ multipath_eval "bands $low_stream:$stream" \
+ ${WS[$low_stream]} ${WS[$stream]} \
+ ${d[0]} ${d[$i]}
+ fi
+ done
+
+ for stream in ${streams[@]}; do
+ stop_traffic
+ done
+}
+
+ets_dwrr_test_012()
+{
+ __ets_dwrr_test 0 1 2
+}
+
+ets_dwrr_test_01()
+{
+ __ets_dwrr_test 0 1
+}
+
+ets_dwrr_test_12()
+{
+ __ets_dwrr_test 1 2
+}
+
+ets_qdisc_setup()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local -a quanta=("$@")
+
+ local ndwrr=${#quanta[@]}
+ local nbands=$((nstrict + ndwrr))
+ local nstreams=$(if ((nbands > 3)); then echo 3; else echo $nbands; fi)
+ local priomap=$(seq 0 $((nstreams - 1)))
+ local i
+
+ WS=($(
+ for ((i = 0; i < nstrict; i++)); do
+ echo 0
+ done
+ for ((i = 0; i < ndwrr; i++)); do
+ echo ${quanta[$i]}
+ done
+ ))
+
+ ets_change_qdisc $dev $nstrict "$priomap" ${quanta[@]}
+}
+
+ets_set_dwrr_uniform()
+{
+ ets_qdisc_setup $put 0 3300 3300 3300
+}
+
+ets_set_dwrr_varying()
+{
+ ets_qdisc_setup $put 0 5000 3500 1500
+}
+
+ets_set_strict()
+{
+ ets_qdisc_setup $put 3
+}
+
+ets_set_mixed()
+{
+ ets_qdisc_setup $put 1 5000 2500 1500
+}
+
+ets_change_quantum()
+{
+ tc class change dev $put classid 10:2 ets quantum 8000
+ WS[1]=8000
+}
+
+ets_set_dwrr_two_bands()
+{
+ ets_qdisc_setup $put 0 5000 2500
+}
+
+ets_test_strict()
+{
+ ets_set_strict
+ ets_dwrr_test_01
+ ets_dwrr_test_12
+}
+
+ets_test_mixed()
+{
+ ets_set_mixed
+ ets_dwrr_test_01
+ ets_dwrr_test_12
+}
+
+ets_test_dwrr()
+{
+ ets_set_dwrr_uniform
+ ets_dwrr_test_012
+ ets_set_dwrr_varying
+ ets_dwrr_test_012
+ ets_change_quantum
+ ets_dwrr_test_012
+ ets_set_dwrr_two_bands
+ ets_dwrr_test_01
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
new file mode 100644
index 000000000000..d1f26cb7cd73
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a stream of traffic from H1 through a switch, to H2. On the
+# egress port from the switch ($swp2), a shaper is installed. The test verifies
+# that the rates on the port match the configured shaper.
+#
+# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or
+# ETS, with two different configurations. Traffic is prioritized using 802.1p.
+#
+# +-------------------------------------------+
+# | H1 |
+# | + $h1.10 $h1.11 + |
+# | | 192.0.2.1/28 192.0.2.17/28 | |
+# | | | |
+# | \______________ _____________/ |
+# | \ / |
+# | + $h1 |
+# +---------------------|---------------------+
+# |
+# +---------------------|---------------------+
+# | SW + $swp1 |
+# | _______________/ \_______________ |
+# | / \ |
+# | +-|--------------+ +--------------|-+ |
+# | | + $swp1.10 | | $swp1.11 + | |
+# | | | | | |
+# | | BR10 | | BR11 | |
+# | | | | | |
+# | | + $swp2.10 | | $swp2.11 + | |
+# | +-|--------------+ +--------------|-+ |
+# | \_______________ ______________/ |
+# | \ / |
+# | + $swp2 |
+# +---------------------|---------------------+
+# |
+# +---------------------|---------------------+
+# | H2 + $h2 |
+# | ______________/ \______________ |
+# | / \ |
+# | | | |
+# | + $h2.10 $h2.11 + |
+# | 192.0.2.2/28 192.0.2.18/28 |
+# +-------------------------------------------+
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source $lib_dir/lib.sh
+
+ipaddr()
+{
+ local host=$1; shift
+ local vlan=$1; shift
+
+ echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+ local dev=$1; shift
+ local host=$1; shift
+
+ simple_if_init $dev
+ mtu_set $dev 10000
+
+ vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+ ip link set dev $dev.10 type vlan egress 0:0
+
+ vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+ ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+ local dev=$1; shift
+
+ vlan_destroy $dev 11
+ vlan_destroy $dev 10
+ mtu_restore $dev
+ simple_if_fini $dev
+}
+
+h1_create()
+{
+ host_create $h1 1
+}
+
+h1_destroy()
+{
+ host_destroy $h1
+}
+
+h2_create()
+{
+ host_create $h2 2
+
+ tc qdisc add dev $h2 clsact
+ tc filter add dev $h2 ingress pref 1010 prot 802.1q \
+ flower $TCFLAGS vlan_id 10 action pass
+ tc filter add dev $h2 ingress pref 1011 prot 802.1q \
+ flower $TCFLAGS vlan_id 11 action pass
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ host_destroy $h2
+}
+
+switch_create()
+{
+ local intf
+ local vlan
+
+ ip link add dev br10 type bridge
+ ip link add dev br11 type bridge
+
+ for intf in $swp1 $swp2; do
+ ip link set dev $intf up
+ mtu_set $intf 10000
+
+ for vlan in 10 11; do
+ vlan_create $intf $vlan
+ ip link set dev $intf.$vlan master br$vlan
+ ip link set dev $intf.$vlan up
+ done
+ done
+
+ for vlan in 10 11; do
+ ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1
+ done
+
+ ip link set dev br10 up
+ ip link set dev br11 up
+}
+
+switch_destroy()
+{
+ local intf
+ local vlan
+
+ # A test may have been interrupted mid-run, with Qdisc installed. Delete
+ # it here.
+ tc qdisc del dev $swp2 root 2>/dev/null
+
+ ip link set dev br11 down
+ ip link set dev br10 down
+
+ for intf in $swp2 $swp1; do
+ for vlan in 11 10; do
+ ip link set dev $intf.$vlan down
+ ip link set dev $intf.$vlan nomaster
+ vlan_destroy $intf $vlan
+ done
+
+ mtu_restore $intf
+ ip link set dev $intf down
+ done
+
+ ip link del dev br11
+ ip link del dev br10
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ swp4=${NETIFS[p7]}
+ swp5=${NETIFS[p8]}
+
+ h2_mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 $(ipaddr 2 10) " vlan 10"
+ ping_test $h1.11 $(ipaddr 2 11) " vlan 11"
+}
+
+tbf_get_counter()
+{
+ local vlan=$1; shift
+
+ tc_rule_stats_get $h2 10$vlan ingress .bytes
+}
+
+do_tbf_test()
+{
+ local vlan=$1; shift
+ local mbit=$1; shift
+
+ start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac
+ sleep 5 # Wait for the burst to dwindle
+
+ local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan)
+ sleep 10
+ local t3=$(tbf_get_counter $vlan)
+ stop_traffic
+
+ RET=0
+
+ # Note: TBF uses 10^6 Mbits, not 2^20 ones.
+ local er=$((mbit * 1000 * 1000))
+ local nr=$(rate $t2 $t3 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-5 <= nr_pct && nr_pct <= 5))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
+
+ log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh
new file mode 100755
index 000000000000..84fb6cab88e4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC="ets strict"
+: ${lib_dir:=.}
+source $lib_dir/sch_tbf_etsprio.sh
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
new file mode 100644
index 000000000000..8bd85da1905a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ tbf_test
+"
+source $lib_dir/sch_tbf_core.sh
+
+tbf_test_one()
+{
+ local bs=$1; shift
+
+ tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \
+ rate 400Mbit burst $bs limit 1M
+ tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \
+ rate 800Mbit burst $bs limit 1M
+
+ do_tbf_test 10 400 $bs
+ do_tbf_test 11 800 $bs
+}
+
+tbf_test()
+{
+ # This test is used for both ETS and PRIO. Even though we only need two
+ # bands, PRIO demands a minimum of three.
+ tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
+ tbf_test_one 128K
+ tc qdisc del dev $swp2 root
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh
new file mode 100755
index 000000000000..9c8cb1cb9ba4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC="prio bands"
+: ${lib_dir:=.}
+source $lib_dir/sch_tbf_etsprio.sh
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
new file mode 100755
index 000000000000..72aa21ba88c7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ tbf_test
+"
+: ${lib_dir:=.}
+source $lib_dir/sch_tbf_core.sh
+
+tbf_test_one()
+{
+ local bs=$1; shift
+
+ tc qdisc replace dev $swp2 root handle 108: tbf \
+ rate 400Mbit burst $bs limit 1M
+ do_tbf_test 10 400 $bs
+}
+
+tbf_test()
+{
+ tbf_test_one 128K
+ tc qdisc del dev $swp2 root
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 9d3b64a2a264..64f652633585 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -3,23 +3,48 @@
CHECK_TC="yes"
+# Can be overridden by the configuration file. See lib.sh
+TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
+
+__tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local operator=$4
+
+ start_time="$(date -u +%s%3N)"
+ while true
+ do
+ cmd_jq "tc -j -s filter show $id" \
+ ".[] | select(.options.handle == $handle) | \
+ select(.options.actions[0].stats.packets $operator $count)" \
+ &> /dev/null
+ ret=$?
+ if [[ $ret -eq 0 ]]; then
+ return $ret
+ fi
+ current_time="$(date -u +%s%3N)"
+ diff=$(expr $current_time - $start_time)
+ if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
+ return 1
+ fi
+ done
+}
+
tc_check_packets()
{
local id=$1
local handle=$2
local count=$3
- local ret
-
- output="$(tc -j -s filter show $id)"
- # workaround the jq bug which causes jq to return 0 in case input is ""
- ret=$?
- if [[ $ret -ne 0 ]]; then
- return $ret
- fi
- echo $output | \
- jq -e ".[] \
- | select(.options.handle == $handle) \
- | select(.options.actions[0].stats.packets == $count)" \
- &> /dev/null
- return $?
+
+ __tc_check_packets "$id" "$handle" "$count" "=="
+}
+
+tc_check_packets_hitting()
+{
+ local id=$1
+ local handle=$2
+
+ __tc_check_packets "$id" "$handle" 0 ">"
}
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
new file mode 100755
index 000000000000..5782433886fc
--- /dev/null
+++ b/tools/testing/selftests/net/l2tp.sh
@@ -0,0 +1,382 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# L2TPv3 tunnel between 2 hosts
+#
+# host-1 | router | host-2
+# | |
+# lo l2tp | | l2tp lo
+# 172.16.101.1 172.16.1.1 | | 172.16.1.2 172.16.101.2
+# fc00:101::1 fc00:1::1 | | fc00:1::2 fc00:101::2
+# | |
+# eth0 | | eth0
+# 10.1.1.1 | | 10.1.2.1
+# 2001:db8:1::1 | | 2001:db8:2::1
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local ns
+ local cmd
+ local out
+ local rc
+
+ ns="$1"
+ shift
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval ip netns exec ${ns} ${cmd} 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# create namespaces and interconnects
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ [ -z "${addr}" ] && addr="-"
+ [ -z "${addr6}" ] && addr6="-"
+
+ ip netns add ${ns}
+
+ ip -netns ${ns} link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip -netns ${ns} addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip -netns ${ns} -6 addr add dev lo ${addr6}
+ fi
+
+ ip -netns ${ns} ro add unreachable default metric 8192
+ ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+
+ ip -netns ${ns1} li add ${ns1_dev} type veth peer name tmp
+ ip -netns ${ns1} li set ${ns1_dev} up
+ ip -netns ${ns1} li set tmp netns ${ns2} name ${ns2_dev}
+ ip -netns ${ns2} li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr}
+ ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr6}
+ ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr6}
+ fi
+}
+
+################################################################################
+# test setup
+
+cleanup()
+{
+ local ns
+
+ for ns in host-1 host-2 router
+ do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+setup_l2tp_ipv4()
+{
+ #
+ # configure l2tpv3 tunnel on host-1
+ #
+ ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
+ encap ip local 10.1.1.1 remote 10.1.2.1
+ ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \
+ session_id 1041 peer_session_id 1042
+ ip -netns host-1 link set dev l2tp4 up
+ ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
+
+ #
+ # configure l2tpv3 tunnel on host-2
+ #
+ ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
+ encap ip local 10.1.2.1 remote 10.1.1.1
+ ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \
+ session_id 1042 peer_session_id 1041
+ ip -netns host-2 link set dev l2tp4 up
+ ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
+
+ #
+ # add routes to loopback addresses
+ #
+ ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2
+ ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1
+}
+
+setup_l2tp_ipv6()
+{
+ #
+ # configure l2tpv3 tunnel on host-1
+ #
+ ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
+ encap ip local 2001:db8:1::1 remote 2001:db8:2::1
+ ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \
+ session_id 1061 peer_session_id 1062
+ ip -netns host-1 link set dev l2tp6 up
+ ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
+
+ #
+ # configure l2tpv3 tunnel on host-2
+ #
+ ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
+ encap ip local 2001:db8:2::1 remote 2001:db8:1::1
+ ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \
+ session_id 1062 peer_session_id 1061
+ ip -netns host-2 link set dev l2tp6 up
+ ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
+
+ #
+ # add routes to loopback addresses
+ #
+ ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2
+ ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1
+}
+
+setup()
+{
+ # start clean
+ cleanup
+
+ set -e
+ create_ns host-1 172.16.101.1/32 fc00:101::1/128
+ create_ns host-2 172.16.101.2/32 fc00:101::2/128
+ create_ns router
+
+ connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
+ router eth1 10.1.1.2/24 2001:db8:1::2/64
+
+ connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
+ router eth2 10.1.2.2/24 2001:db8:2::2/64
+
+ ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2
+ ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
+
+ ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2
+ ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
+
+ setup_l2tp_ipv4
+ setup_l2tp_ipv6
+ set +e
+}
+
+setup_ipsec()
+{
+ #
+ # IPv4
+ #
+ run_cmd host-1 ip xfrm policy add \
+ src 10.1.1.1 dst 10.1.2.1 dir out \
+ tmpl proto esp mode transport
+
+ run_cmd host-1 ip xfrm policy add \
+ src 10.1.2.1 dst 10.1.1.1 dir in \
+ tmpl proto esp mode transport
+
+ run_cmd host-2 ip xfrm policy add \
+ src 10.1.1.1 dst 10.1.2.1 dir in \
+ tmpl proto esp mode transport
+
+ run_cmd host-2 ip xfrm policy add \
+ src 10.1.2.1 dst 10.1.1.1 dir out \
+ tmpl proto esp mode transport
+
+ ip -netns host-1 xfrm state add \
+ src 10.1.1.1 dst 10.1.2.1 \
+ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-1 xfrm state add \
+ src 10.1.2.1 dst 10.1.1.1 \
+ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-2 xfrm state add \
+ src 10.1.1.1 dst 10.1.2.1 \
+ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-2 xfrm state add \
+ src 10.1.2.1 dst 10.1.1.1 \
+ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ #
+ # IPV6
+ #
+ run_cmd host-1 ip -6 xfrm policy add \
+ src 2001:db8:1::1 dst 2001:db8:2::1 dir out \
+ tmpl proto esp mode transport
+
+ run_cmd host-1 ip -6 xfrm policy add \
+ src 2001:db8:2::1 dst 2001:db8:1::1 dir in \
+ tmpl proto esp mode transport
+
+ run_cmd host-2 ip -6 xfrm policy add \
+ src 2001:db8:1::1 dst 2001:db8:2::1 dir in \
+ tmpl proto esp mode transport
+
+ run_cmd host-2 ip -6 xfrm policy add \
+ src 2001:db8:2::1 dst 2001:db8:1::1 dir out \
+ tmpl proto esp mode transport
+
+ ip -netns host-1 -6 xfrm state add \
+ src 2001:db8:1::1 dst 2001:db8:2::1 \
+ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-1 -6 xfrm state add \
+ src 2001:db8:2::1 dst 2001:db8:1::1 \
+ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-2 -6 xfrm state add \
+ src 2001:db8:1::1 dst 2001:db8:2::1 \
+ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+
+ ip -netns host-2 -6 xfrm state add \
+ src 2001:db8:2::1 dst 2001:db8:1::1 \
+ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
+ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
+}
+
+teardown_ipsec()
+{
+ run_cmd host-1 ip xfrm state flush
+ run_cmd host-1 ip xfrm policy flush
+ run_cmd host-2 ip xfrm state flush
+ run_cmd host-2 ip xfrm policy flush
+}
+
+################################################################################
+# generate traffic through tunnel for various cases
+
+run_ping()
+{
+ local desc="$1"
+
+ run_cmd host-1 ping -c1 -w1 172.16.1.2
+ log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
+
+ run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+ log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
+
+ run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+ log_test $? 0 "IPv6 basic L2TP tunnel ${desc}"
+
+ run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+ log_test $? 0 "IPv6 route through L2TP tunnel ${desc}"
+}
+
+run_tests()
+{
+ local desc
+
+ setup
+ run_ping
+
+ setup_ipsec
+ run_ping "- with IPsec"
+ run_cmd host-1 ping -c1 -w1 172.16.1.2
+ log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
+
+ run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+ log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
+
+ run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+ log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec"
+
+ run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+ log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec"
+
+ teardown_ipsec
+ run_ping "- after IPsec teardown"
+}
+
+################################################################################
+# main
+
+declare -i nfail=0
+declare -i nsuccess=0
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ *) exit 1;;
+ esac
+done
+
+run_tests
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
new file mode 100644
index 000000000000..d72f07642738
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -0,0 +1,2 @@
+mptcp_connect
+*.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
new file mode 100644
index 000000000000..93de52016dde
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+
+TEST_PROGS := mptcp_connect.sh
+
+TEST_GEN_FILES = mptcp_connect
+
+EXTRA_CLEAN := *.pcap
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
new file mode 100644
index 000000000000..2499824d9e1c
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/config
@@ -0,0 +1,4 @@
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
+CONFIG_VETH=y
+CONFIG_NET_SCH_NETEM=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
new file mode 100644
index 000000000000..99579c0223c1
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/poll.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+
+extern int optind;
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef TCP_ULP
+#define TCP_ULP 31
+#endif
+
+static bool listen_mode;
+static int poll_timeout;
+
+enum cfg_mode {
+ CFG_MODE_POLL,
+ CFG_MODE_MMAP,
+ CFG_MODE_SENDFILE,
+};
+
+static enum cfg_mode cfg_mode = CFG_MODE_POLL;
+static const char *cfg_host;
+static const char *cfg_port = "12000";
+static int cfg_sock_proto = IPPROTO_MPTCP;
+static bool tcpulp_audit;
+static int pf = AF_INET;
+static int cfg_sndbuf;
+
+static void die_usage(void)
+{
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]"
+ "[ -l ] [ -t timeout ] connect_address\n");
+ exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+ if (err == EAI_SYSTEM)
+ return strerror(errno);
+
+ return gai_strerror(err);
+}
+
+static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
+ char *host, socklen_t hostlen,
+ char *serv, socklen_t servlen)
+{
+ int flags = NI_NUMERICHOST | NI_NUMERICSERV;
+ int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen,
+ flags);
+
+ if (err) {
+ const char *errstr = getxinfo_strerr(err);
+
+ fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr);
+ exit(1);
+ }
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+ const struct addrinfo *hints,
+ struct addrinfo **res)
+{
+ int err = getaddrinfo(node, service, hints, res);
+
+ if (err) {
+ const char *errstr = getxinfo_strerr(err);
+
+ fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+ node ? node : "", service ? service : "", errstr);
+ exit(1);
+ }
+}
+
+static void set_sndbuf(int fd, unsigned int size)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size));
+ if (err) {
+ perror("set SO_SNDBUF");
+ exit(1);
+ }
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+ const char * const port)
+{
+ int sock;
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_PASSIVE | AI_NUMERICHOST
+ };
+
+ hints.ai_family = pf;
+
+ struct addrinfo *a, *addr;
+ int one = 1;
+
+ xgetaddrinfo(listenaddr, port, &hints, &addr);
+ hints.ai_family = pf;
+
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto);
+ if (sock < 0)
+ continue;
+
+ if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one)))
+ perror("setsockopt");
+
+ if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ perror("bind");
+ close(sock);
+ sock = -1;
+ }
+
+ freeaddrinfo(addr);
+
+ if (sock < 0) {
+ fprintf(stderr, "Could not create listen socket\n");
+ return sock;
+ }
+
+ if (listen(sock, 20)) {
+ perror("listen");
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+static bool sock_test_tcpulp(const char * const remoteaddr,
+ const char * const port)
+{
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *a, *addr;
+ int sock = -1, ret = 0;
+ bool test_pass = false;
+
+ hints.ai_family = AF_INET;
+
+ xgetaddrinfo(remoteaddr, port, &hints, &addr);
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP);
+ if (sock < 0) {
+ perror("socket");
+ continue;
+ }
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp",
+ sizeof("mptcp"));
+ if (ret == -1 && errno == EOPNOTSUPP)
+ test_pass = true;
+ close(sock);
+
+ if (test_pass)
+ break;
+ if (!ret)
+ fprintf(stderr,
+ "setsockopt(TCP_ULP) returned 0\n");
+ else
+ perror("setsockopt(TCP_ULP)");
+ }
+ return test_pass;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+ const char * const port, int proto)
+{
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *a, *addr;
+ int sock = -1;
+
+ hints.ai_family = pf;
+
+ xgetaddrinfo(remoteaddr, port, &hints, &addr);
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto);
+ if (sock < 0) {
+ perror("socket");
+ continue;
+ }
+
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ perror("connect()");
+ close(sock);
+ sock = -1;
+ }
+
+ freeaddrinfo(addr);
+ return sock;
+}
+
+static size_t do_rnd_write(const int fd, char *buf, const size_t len)
+{
+ unsigned int do_w;
+ ssize_t bw;
+
+ do_w = rand() & 0xffff;
+ if (do_w == 0 || do_w > len)
+ do_w = len;
+
+ bw = write(fd, buf, do_w);
+ if (bw < 0)
+ perror("write");
+
+ return bw;
+}
+
+static size_t do_write(const int fd, char *buf, const size_t len)
+{
+ size_t offset = 0;
+
+ while (offset < len) {
+ size_t written;
+ ssize_t bw;
+
+ bw = write(fd, buf + offset, len - offset);
+ if (bw < 0) {
+ perror("write");
+ return 0;
+ }
+
+ written = (size_t)bw;
+ offset += written;
+ }
+
+ return offset;
+}
+
+static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
+{
+ size_t cap = rand();
+
+ cap &= 0xffff;
+
+ if (cap == 0)
+ cap = 1;
+ else if (cap > len)
+ cap = len;
+
+ return read(fd, buf, cap);
+}
+
+static void set_nonblock(int fd)
+{
+ int flags = fcntl(fd, F_GETFL);
+
+ if (flags == -1)
+ return;
+
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+static int copyfd_io_poll(int infd, int peerfd, int outfd)
+{
+ struct pollfd fds = {
+ .fd = peerfd,
+ .events = POLLIN | POLLOUT,
+ };
+ unsigned int woff = 0, wlen = 0;
+ char wbuf[8192];
+
+ set_nonblock(peerfd);
+
+ for (;;) {
+ char rbuf[8192];
+ ssize_t len;
+
+ if (fds.events == 0)
+ break;
+
+ switch (poll(&fds, 1, poll_timeout)) {
+ case -1:
+ if (errno == EINTR)
+ continue;
+ perror("poll");
+ return 1;
+ case 0:
+ fprintf(stderr, "%s: poll timed out (events: "
+ "POLLIN %u, POLLOUT %u)\n", __func__,
+ fds.events & POLLIN, fds.events & POLLOUT);
+ return 2;
+ }
+
+ if (fds.revents & POLLIN) {
+ len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+ if (len == 0) {
+ /* no more data to receive:
+ * peer has closed its write side
+ */
+ fds.events &= ~POLLIN;
+
+ if ((fds.events & POLLOUT) == 0)
+ /* and nothing more to send */
+ break;
+
+ /* Else, still have data to transmit */
+ } else if (len < 0) {
+ perror("read");
+ return 3;
+ }
+
+ do_write(outfd, rbuf, len);
+ }
+
+ if (fds.revents & POLLOUT) {
+ if (wlen == 0) {
+ woff = 0;
+ wlen = read(infd, wbuf, sizeof(wbuf));
+ }
+
+ if (wlen > 0) {
+ ssize_t bw;
+
+ bw = do_rnd_write(peerfd, wbuf + woff, wlen);
+ if (bw < 0)
+ return 111;
+
+ woff += bw;
+ wlen -= bw;
+ } else if (wlen == 0) {
+ /* We have no more data to send. */
+ fds.events &= ~POLLOUT;
+
+ if ((fds.events & POLLIN) == 0)
+ /* ... and peer also closed already */
+ break;
+
+ /* ... but we still receive.
+ * Close our write side.
+ */
+ shutdown(peerfd, SHUT_WR);
+ } else {
+ if (errno == EINTR)
+ continue;
+ perror("read");
+ return 4;
+ }
+ }
+
+ if (fds.revents & (POLLERR | POLLNVAL)) {
+ fprintf(stderr, "Unexpected revents: "
+ "POLLERR/POLLNVAL(%x)\n", fds.revents);
+ return 5;
+ }
+ }
+
+ close(peerfd);
+ return 0;
+}
+
+static int do_recvfile(int infd, int outfd)
+{
+ ssize_t r;
+
+ do {
+ char buf[16384];
+
+ r = do_rnd_read(infd, buf, sizeof(buf));
+ if (r > 0) {
+ if (write(outfd, buf, r) != r)
+ break;
+ } else if (r < 0) {
+ perror("read");
+ }
+ } while (r > 0);
+
+ return (int)r;
+}
+
+static int do_mmap(int infd, int outfd, unsigned int size)
+{
+ char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
+ ssize_t ret = 0, off = 0;
+ size_t rem;
+
+ if (inbuf == MAP_FAILED) {
+ perror("mmap");
+ return 1;
+ }
+
+ rem = size;
+
+ while (rem > 0) {
+ ret = write(outfd, inbuf + off, rem);
+
+ if (ret < 0) {
+ perror("write");
+ break;
+ }
+
+ off += ret;
+ rem -= ret;
+ }
+
+ munmap(inbuf, size);
+ return rem;
+}
+
+static int get_infd_size(int fd)
+{
+ struct stat sb;
+ ssize_t count;
+ int err;
+
+ err = fstat(fd, &sb);
+ if (err < 0) {
+ perror("fstat");
+ return -1;
+ }
+
+ if ((sb.st_mode & S_IFMT) != S_IFREG) {
+ fprintf(stderr, "%s: stdin is not a regular file\n", __func__);
+ return -2;
+ }
+
+ count = sb.st_size;
+ if (count > INT_MAX) {
+ fprintf(stderr, "File too large: %zu\n", count);
+ return -3;
+ }
+
+ return (int)count;
+}
+
+static int do_sendfile(int infd, int outfd, unsigned int count)
+{
+ while (count > 0) {
+ ssize_t r;
+
+ r = sendfile(outfd, infd, NULL, count);
+ if (r < 0) {
+ perror("sendfile");
+ return 3;
+ }
+
+ count -= r;
+ }
+
+ return 0;
+}
+
+static int copyfd_io_mmap(int infd, int peerfd, int outfd,
+ unsigned int size)
+{
+ int err;
+
+ if (listen_mode) {
+ err = do_recvfile(peerfd, outfd);
+ if (err)
+ return err;
+
+ err = do_mmap(infd, peerfd, size);
+ } else {
+ err = do_mmap(infd, peerfd, size);
+ if (err)
+ return err;
+
+ shutdown(peerfd, SHUT_WR);
+
+ err = do_recvfile(peerfd, outfd);
+ }
+
+ return err;
+}
+
+static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
+ unsigned int size)
+{
+ int err;
+
+ if (listen_mode) {
+ err = do_recvfile(peerfd, outfd);
+ if (err)
+ return err;
+
+ err = do_sendfile(infd, peerfd, size);
+ } else {
+ err = do_sendfile(infd, peerfd, size);
+ if (err)
+ return err;
+ err = do_recvfile(peerfd, outfd);
+ }
+
+ return err;
+}
+
+static int copyfd_io(int infd, int peerfd, int outfd)
+{
+ int file_size;
+
+ switch (cfg_mode) {
+ case CFG_MODE_POLL:
+ return copyfd_io_poll(infd, peerfd, outfd);
+ case CFG_MODE_MMAP:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+ return copyfd_io_mmap(infd, peerfd, outfd, file_size);
+ case CFG_MODE_SENDFILE:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+ return copyfd_io_sendfile(infd, peerfd, outfd, file_size);
+ }
+
+ fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+
+ die_usage();
+ return 1;
+}
+
+static void check_sockaddr(int pf, struct sockaddr_storage *ss,
+ socklen_t salen)
+{
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
+ socklen_t wanted_size = 0;
+
+ switch (pf) {
+ case AF_INET:
+ wanted_size = sizeof(*sin);
+ sin = (void *)ss;
+ if (!sin->sin_port)
+ fprintf(stderr, "accept: something wrong: ip connection from port 0");
+ break;
+ case AF_INET6:
+ wanted_size = sizeof(*sin6);
+ sin6 = (void *)ss;
+ if (!sin6->sin6_port)
+ fprintf(stderr, "accept: something wrong: ipv6 connection from port 0");
+ break;
+ default:
+ fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen);
+ return;
+ }
+
+ if (salen != wanted_size)
+ fprintf(stderr, "accept: size mismatch, got %d expected %d\n",
+ (int)salen, wanted_size);
+
+ if (ss->ss_family != pf)
+ fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n",
+ (int)ss->ss_family, pf);
+}
+
+static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen)
+{
+ struct sockaddr_storage peerss;
+ socklen_t peersalen = sizeof(peerss);
+
+ if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) {
+ perror("getpeername");
+ return;
+ }
+
+ if (peersalen != salen) {
+ fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen);
+ return;
+ }
+
+ if (memcmp(ss, &peerss, peersalen)) {
+ char a[INET6_ADDRSTRLEN];
+ char b[INET6_ADDRSTRLEN];
+ char c[INET6_ADDRSTRLEN];
+ char d[INET6_ADDRSTRLEN];
+
+ xgetnameinfo((struct sockaddr *)ss, salen,
+ a, sizeof(a), b, sizeof(b));
+
+ xgetnameinfo((struct sockaddr *)&peerss, peersalen,
+ c, sizeof(c), d, sizeof(d));
+
+ fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n",
+ __func__, a, c, b, d, peersalen, salen);
+ }
+}
+
+static void check_getpeername_connect(int fd)
+{
+ struct sockaddr_storage ss;
+ socklen_t salen = sizeof(ss);
+ char a[INET6_ADDRSTRLEN];
+ char b[INET6_ADDRSTRLEN];
+
+ if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
+ perror("getpeername");
+ return;
+ }
+
+ xgetnameinfo((struct sockaddr *)&ss, salen,
+ a, sizeof(a), b, sizeof(b));
+
+ if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+ fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
+ cfg_host, a, cfg_port, b);
+}
+
+static void maybe_close(int fd)
+{
+ unsigned int r = rand();
+
+ if (r & 1)
+ close(fd);
+}
+
+int main_loop_s(int listensock)
+{
+ struct sockaddr_storage ss;
+ struct pollfd polls;
+ socklen_t salen;
+ int remotesock;
+
+ polls.fd = listensock;
+ polls.events = POLLIN;
+
+ switch (poll(&polls, 1, poll_timeout)) {
+ case -1:
+ perror("poll");
+ return 1;
+ case 0:
+ fprintf(stderr, "%s: timed out\n", __func__);
+ close(listensock);
+ return 2;
+ }
+
+ salen = sizeof(ss);
+ remotesock = accept(listensock, (struct sockaddr *)&ss, &salen);
+ if (remotesock >= 0) {
+ maybe_close(listensock);
+ check_sockaddr(pf, &ss, salen);
+ check_getpeername(remotesock, &ss, salen);
+
+ return copyfd_io(0, remotesock, 1);
+ }
+
+ perror("accept");
+
+ return 1;
+}
+
+static void init_rng(void)
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+ unsigned int foo;
+
+ if (fd > 0) {
+ int ret = read(fd, &foo, sizeof(foo));
+
+ if (ret < 0)
+ srand(fd + foo);
+ close(fd);
+ }
+
+ srand(foo);
+}
+
+int main_loop(void)
+{
+ int fd;
+
+ /* listener is ready. */
+ fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
+ if (fd < 0)
+ return 2;
+
+ check_getpeername_connect(fd);
+
+ if (cfg_sndbuf)
+ set_sndbuf(fd, cfg_sndbuf);
+
+ return copyfd_io(0, fd, 1);
+}
+
+int parse_proto(const char *proto)
+{
+ if (!strcasecmp(proto, "MPTCP"))
+ return IPPROTO_MPTCP;
+ if (!strcasecmp(proto, "TCP"))
+ return IPPROTO_TCP;
+
+ fprintf(stderr, "Unknown protocol: %s\n.", proto);
+ die_usage();
+
+ /* silence compiler warning */
+ return 0;
+}
+
+int parse_mode(const char *mode)
+{
+ if (!strcasecmp(mode, "poll"))
+ return CFG_MODE_POLL;
+ if (!strcasecmp(mode, "mmap"))
+ return CFG_MODE_MMAP;
+ if (!strcasecmp(mode, "sendfile"))
+ return CFG_MODE_SENDFILE;
+
+ fprintf(stderr, "Unknown test mode: %s\n", mode);
+ fprintf(stderr, "Supported modes are:\n");
+ fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
+ fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
+ fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
+
+ die_usage();
+
+ /* silence compiler warning */
+ return 0;
+}
+
+int parse_sndbuf(const char *size)
+{
+ unsigned long s;
+
+ errno = 0;
+
+ s = strtoul(size, NULL, 0);
+
+ if (errno) {
+ fprintf(stderr, "Invalid sndbuf size %s (%s)\n",
+ size, strerror(errno));
+ die_usage();
+ }
+
+ if (s > INT_MAX) {
+ fprintf(stderr, "Invalid sndbuf size %s (%s)\n",
+ size, strerror(ERANGE));
+ die_usage();
+ }
+
+ cfg_sndbuf = s;
+
+ return 0;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) {
+ switch (c) {
+ case 'l':
+ listen_mode = true;
+ break;
+ case 'p':
+ cfg_port = optarg;
+ break;
+ case 's':
+ cfg_sock_proto = parse_proto(optarg);
+ break;
+ case 'h':
+ die_usage();
+ break;
+ case 'u':
+ tcpulp_audit = true;
+ break;
+ case '6':
+ pf = AF_INET6;
+ break;
+ case 't':
+ poll_timeout = atoi(optarg) * 1000;
+ if (poll_timeout <= 0)
+ poll_timeout = -1;
+ break;
+ case 'm':
+ cfg_mode = parse_mode(optarg);
+ break;
+ case 'b':
+ cfg_sndbuf = parse_sndbuf(optarg);
+ break;
+ }
+ }
+
+ if (optind + 1 != argc)
+ die_usage();
+ cfg_host = argv[optind];
+
+ if (strchr(cfg_host, ':'))
+ pf = AF_INET6;
+}
+
+int main(int argc, char *argv[])
+{
+ init_rng();
+
+ parse_opts(argc, argv);
+
+ if (tcpulp_audit)
+ return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1;
+
+ if (listen_mode) {
+ int fd = sock_listen_mptcp(cfg_host, cfg_port);
+
+ if (fd < 0)
+ return 1;
+
+ if (cfg_sndbuf)
+ set_sndbuf(fd, cfg_sndbuf);
+
+ return main_loop_s(fd);
+ }
+
+ return main_loop();
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
new file mode 100755
index 000000000000..d573a0feb98d
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -0,0 +1,595 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+time_start=$(date +%s)
+
+optstring="b:d:e:l:r:h4cm:"
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+capture=false
+timeout=30
+ipv6=true
+ethtool_random_on=true
+tc_delay="$((RANDOM%400))"
+tc_loss=$((RANDOM%101))
+tc_reorder=""
+testmode=""
+sndbuf=0
+options_log=true
+
+if [ $tc_loss -eq 100 ];then
+ tc_loss=1%
+elif [ $tc_loss -ge 10 ]; then
+ tc_loss=0.$tc_loss%
+elif [ $tc_loss -ge 1 ]; then
+ tc_loss=0.0$tc_loss%
+else
+ tc_loss=""
+fi
+
+usage() {
+ echo "Usage: $0 [ -a ]"
+ echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)"
+ echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)"
+ echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)"
+ echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
+ echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-b: set sndbuf value (default: use kernel default)"
+ echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+}
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "d")
+ if [ $OPTARG -ge 0 ];then
+ tc_delay="$OPTARG"
+ else
+ echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
+ exit 1
+ fi
+ ;;
+ "e")
+ ethtool_args="$ethtool_args $OPTARG off"
+ ethtool_random_on=false
+ ;;
+ "l")
+ tc_loss="$OPTARG"
+ ;;
+ "r")
+ tc_reorder="$OPTARG"
+ ;;
+ "4")
+ ipv6=false
+ ;;
+ "c")
+ capture=true
+ ;;
+ "b")
+ if [ $OPTARG -ge 0 ];then
+ sndbuf="$OPTARG"
+ else
+ echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2
+ exit 1
+ fi
+ ;;
+ "m")
+ testmode="$OPTARG"
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+ns4="ns4-$rndh"
+
+TEST_COUNT=0
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ rm -f "$capout"
+
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns del $netns
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+capout=$(mktemp)
+trap cleanup EXIT
+
+for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+done
+
+# "$ns1" ns2 ns3 ns4
+# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3
+# - drop 1% -> reorder 25%
+# <- TSO off -
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3"
+ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4"
+
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+
+ip -net "$ns1" link set ns1eth2 up
+ip -net "$ns1" route add default via 10.0.1.2
+ip -net "$ns1" route add default via dead:beef:1::2
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3
+ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad
+ip -net "$ns2" link set ns2eth3 up
+ip -net "$ns2" route add default via 10.0.2.2
+ip -net "$ns2" route add default via dead:beef:2::2
+ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2
+ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad
+ip -net "$ns3" link set ns3eth2 up
+
+ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4
+ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad
+ip -net "$ns3" link set ns3eth4 up
+ip -net "$ns3" route add default via 10.0.2.1
+ip -net "$ns3" route add default via dead:beef:2::1
+ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1
+ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3
+ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad
+ip -net "$ns4" link set ns4eth3 up
+ip -net "$ns4" route add default via 10.0.3.2
+ip -net "$ns4" route add default via dead:beef:3::2
+
+set_ethtool_flags() {
+ local ns="$1"
+ local dev="$2"
+ local flags="$3"
+
+ ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
+ [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+}
+
+set_random_ethtool_flags() {
+ local flags=""
+ local r=$RANDOM
+
+ local pick1=$((r & 1))
+ local pick2=$((r & 2))
+ local pick3=$((r & 4))
+
+ [ $pick1 -ne 0 ] && flags="tso off"
+ [ $pick2 -ne 0 ] && flags="$flags gso off"
+ [ $pick3 -ne 0 ] && flags="$flags gro off"
+
+ [ -z "$flags" ] && return
+
+ set_ethtool_flags "$1" "$2" "$flags"
+}
+
+if $ethtool_random_on;then
+ set_random_ethtool_flags "$ns3" ns3eth2
+ set_random_ethtool_flags "$ns4" ns4eth3
+else
+ set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args"
+ set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
+fi
+
+print_file_err()
+{
+ ls -l "$1" 1>&2
+ echo "Trailing bytes are: "
+ tail -c 27 "$1"
+}
+
+check_transfer()
+{
+ local in=$1
+ local out=$2
+ local what=$3
+
+ cmp "$in" "$out" > /dev/null 2>&1
+ if [ $? -ne 0 ] ;then
+ echo "[ FAIL ] $what does not match (in, out):"
+ print_file_err "$in"
+ print_file_err "$out"
+
+ return 1
+ fi
+
+ return 0
+}
+
+check_mptcp_disabled()
+{
+ local disabled_ns
+ disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
+ ip netns add ${disabled_ns} || exit $ksft_skip
+
+ # net.mptcp.enabled should be enabled by default
+ if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
+ echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
+ ret=1
+ return 1
+ fi
+ ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
+
+ local err=0
+ LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ grep -q "^socket: Protocol not available$" && err=1
+ ip netns delete ${disabled_ns}
+
+ if [ ${err} -eq 0 ]; then
+ echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
+ ret=1
+ return 1
+ fi
+
+ echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+ return 0
+}
+
+check_mptcp_ulp_setsockopt()
+{
+ local t retval
+ t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
+
+ ip netns add ${t} || exit $ksft_skip
+ if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
+ printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
+ retval=1
+ ret=$retval
+ else
+ printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
+ retval=0
+ fi
+ ip netns del ${t}
+ return $retval
+}
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+ local ping_args="-q -c 1"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+
+ return 1
+ fi
+
+ return 0
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+do_transfer()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local cl_proto="$3"
+ local srv_proto="$4"
+ local connect_addr="$5"
+ local local_addr="$6"
+ local extra_args=""
+
+ local port
+ port=$((10000+$TEST_COUNT))
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ if [ "$sndbuf" -gt 0 ]; then
+ extra_args="$extra_args -b $sndbuf"
+ fi
+
+ if [ -n "$testmode" ]; then
+ extra_args="$extra_args -m $testmode"
+ fi
+
+ if [ -n "$extra_args" ] && $options_log; then
+ options_log=false
+ echo "INFO: extra options: $extra_args"
+ fi
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ local addr_port
+ addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+ printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
+
+ if $capture; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+
+ ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ local cappid=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${listener_ns}" "${port}"
+
+ local start
+ start=$(date +%s%3N)
+ ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ local stop
+ stop=$(date +%s%3N)
+
+ if $capture; then
+ sleep 1
+ kill $cappid
+ fi
+
+ local duration
+ duration=$((stop-start))
+ duration=$(printf "(duration %05sms)" $duration)
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
+ echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+ echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+
+ cat "$capout"
+ return 1
+ fi
+
+ check_transfer $sin $cout "file received by client"
+ retc=$?
+ check_transfer $cin $sout "file received by server"
+ rets=$?
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ echo "$duration [ OK ]"
+ cat "$capout"
+ return 0
+ fi
+
+ cat "$capout"
+ return 1
+}
+
+make_file()
+{
+ local name=$1
+ local who=$2
+
+ local SIZE TSIZE
+ SIZE=$((RANDOM % (1024 * 8)))
+ TSIZE=$((SIZE * 1024))
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+
+ SIZE=$((RANDOM % 1024))
+ SIZE=$((SIZE + 128))
+ TSIZE=$((TSIZE + SIZE))
+ dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+ echo "Created $name (size $TSIZE) containing data sent by $who"
+}
+
+run_tests_lo()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+ local loopback="$4"
+ local lret=0
+
+ # skip if test programs are running inside same netns for subsequent runs.
+ if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then
+ return 0
+ fi
+
+ # skip if we don't want v6
+ if ! $ipv6 && is_v6 "${connect_addr}"; then
+ return 0
+ fi
+
+ local local_addr
+ if is_v6 "${connect_addr}"; then
+ local_addr="::"
+ else
+ local_addr="0.0.0.0"
+ fi
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+
+ # don't bother testing fallback tcp except for loopback case.
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ return 0
+ fi
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+
+ do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+
+ return 0
+}
+
+run_tests()
+{
+ run_tests_lo $1 $2 $3 0
+}
+
+make_file "$cin" "client"
+make_file "$sin" "server"
+
+check_mptcp_disabled
+
+check_mptcp_ulp_setsockopt
+
+echo "INFO: validating network environment with pings"
+for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
+ do_ping "$ns1" $sender 10.0.1.1
+ do_ping "$ns1" $sender dead:beef:1::1
+
+ do_ping "$ns2" $sender 10.0.1.2
+ do_ping "$ns2" $sender dead:beef:1::2
+ do_ping "$ns2" $sender 10.0.2.1
+ do_ping "$ns2" $sender dead:beef:2::1
+
+ do_ping "$ns3" $sender 10.0.2.2
+ do_ping "$ns3" $sender dead:beef:2::2
+ do_ping "$ns3" $sender 10.0.3.2
+ do_ping "$ns3" $sender dead:beef:3::2
+
+ do_ping "$ns4" $sender 10.0.3.1
+ do_ping "$ns4" $sender dead:beef:3::1
+done
+
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
+echo -n "INFO: Using loss of $tc_loss "
+test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+
+if [ -z "${tc_reorder}" ]; then
+ reorder1=$((RANDOM%10))
+ reorder1=$((100 - reorder1))
+ reorder2=$((RANDOM%100))
+
+ if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+ tc_reorder="reorder ${reorder1}% ${reorder2}%"
+ echo -n "$tc_reorder "
+ fi
+elif [ "$tc_reorder" = "0" ];then
+ tc_reorder=""
+elif [ "$tc_delay" -gt 0 ];then
+ # reordering requires some delay
+ tc_reorder="reorder $tc_reorder"
+ echo -n "$tc_reorder "
+fi
+
+echo "on ns3eth4"
+
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+
+for sender in $ns1 $ns2 $ns3 $ns4;do
+ run_tests_lo "$ns1" "$sender" 10.0.1.1 1
+ if [ $ret -ne 0 ] ;then
+ echo "FAIL: Could not even run loopback test" 1>&2
+ exit $ret
+ fi
+ run_tests_lo "$ns1" $sender dead:beef:1::1 1
+ if [ $ret -ne 0 ] ;then
+ echo "FAIL: Could not even run loopback v6 test" 2>&1
+ exit $ret
+ fi
+
+ run_tests "$ns2" $sender 10.0.1.2
+ run_tests "$ns2" $sender dead:beef:1::2
+ run_tests "$ns2" $sender 10.0.2.1
+ run_tests "$ns2" $sender dead:beef:2::1
+
+ run_tests "$ns3" $sender 10.0.2.2
+ run_tests "$ns3" $sender dead:beef:2::2
+ run_tests "$ns3" $sender 10.0.3.2
+ run_tests "$ns3" $sender dead:beef:3::2
+
+ run_tests "$ns4" $sender 10.0.3.1
+ run_tests "$ns4" $sender dead:beef:3::1
+done
+
+time_end=$(date +%s)
+time_run=$((time_end-time_start))
+
+echo "Time: ${time_run} seconds"
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
new file mode 100644
index 000000000000..026384c189c9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -0,0 +1 @@
+timeout=450
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
new file mode 100644
index 000000000000..93208caacbe6
--- /dev/null
+++ b/tools/testing/selftests/net/nettest.c
@@ -0,0 +1,1813 @@
+// SPDX-License-Identifier: GPL-2.0
+/* nettest - used for functional tests of networking APIs
+ *
+ * Copyright (c) 2013-2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+ */
+
+#define _GNU_SOURCE
+#include <features.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+
+#ifndef IPV6_UNICAST_IF
+#define IPV6_UNICAST_IF 76
+#endif
+#ifndef IPV6_MULTICAST_IF
+#define IPV6_MULTICAST_IF 17
+#endif
+
+#define DEFAULT_PORT 12345
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+struct sock_args {
+ /* local address */
+ union {
+ struct in_addr in;
+ struct in6_addr in6;
+ } local_addr;
+
+ /* remote address */
+ union {
+ struct in_addr in;
+ struct in6_addr in6;
+ } remote_addr;
+ int scope_id; /* remote scope; v6 send only */
+
+ struct in_addr grp; /* multicast group */
+
+ unsigned int has_local_ip:1,
+ has_remote_ip:1,
+ has_grp:1,
+ has_expected_laddr:1,
+ has_expected_raddr:1,
+ bind_test_only:1;
+
+ unsigned short port;
+
+ int type; /* DGRAM, STREAM, RAW */
+ int protocol;
+ int version; /* AF_INET/AF_INET6 */
+
+ int use_setsockopt;
+ int use_cmsg;
+ const char *dev;
+ int ifindex;
+
+ const char *password;
+ /* prefix for MD5 password */
+ union {
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } md5_prefix;
+ unsigned int prefix_len;
+
+ /* expected addresses and device index for connection */
+ int expected_ifindex;
+
+ /* local address */
+ union {
+ struct in_addr in;
+ struct in6_addr in6;
+ } expected_laddr;
+
+ /* remote address */
+ union {
+ struct in_addr in;
+ struct in6_addr in6;
+ } expected_raddr;
+};
+
+static int server_mode;
+static unsigned int prog_timeout = 5;
+static unsigned int interactive;
+static int iter = 1;
+static char *msg = "Hello world!";
+static int msglen;
+static int quiet;
+static int try_broadcast = 1;
+
+static char *timestamp(char *timebuf, int buflen)
+{
+ time_t now;
+
+ now = time(NULL);
+ if (strftime(timebuf, buflen, "%T", localtime(&now)) == 0) {
+ memset(timebuf, 0, buflen);
+ strncpy(timebuf, "00:00:00", buflen-1);
+ }
+
+ return timebuf;
+}
+
+static void log_msg(const char *format, ...)
+{
+ char timebuf[64];
+ va_list args;
+
+ if (quiet)
+ return;
+
+ fprintf(stdout, "%s %s:",
+ timestamp(timebuf, sizeof(timebuf)),
+ server_mode ? "server" : "client");
+ va_start(args, format);
+ vfprintf(stdout, format, args);
+ va_end(args);
+
+ fflush(stdout);
+}
+
+static void log_error(const char *format, ...)
+{
+ char timebuf[64];
+ va_list args;
+
+ if (quiet)
+ return;
+
+ fprintf(stderr, "%s %s:",
+ timestamp(timebuf, sizeof(timebuf)),
+ server_mode ? "server" : "client");
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+
+ fflush(stderr);
+}
+
+static void log_err_errno(const char *fmt, ...)
+{
+ char timebuf[64];
+ va_list args;
+
+ if (quiet)
+ return;
+
+ fprintf(stderr, "%s %s: ",
+ timestamp(timebuf, sizeof(timebuf)),
+ server_mode ? "server" : "client");
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ fprintf(stderr, ": %d: %s\n", errno, strerror(errno));
+ fflush(stderr);
+}
+
+static void log_address(const char *desc, struct sockaddr *sa)
+{
+ char addrstr[64];
+
+ if (quiet)
+ return;
+
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *s = (struct sockaddr_in *) sa;
+
+ log_msg("%s %s:%d",
+ desc,
+ inet_ntop(AF_INET, &s->sin_addr, addrstr,
+ sizeof(addrstr)),
+ ntohs(s->sin_port));
+
+ } else if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
+
+ log_msg("%s [%s]:%d",
+ desc,
+ inet_ntop(AF_INET6, &s6->sin6_addr, addrstr,
+ sizeof(addrstr)),
+ ntohs(s6->sin6_port));
+ }
+
+ printf("\n");
+
+ fflush(stdout);
+}
+
+static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args)
+{
+ int keylen = strlen(args->password);
+ struct tcp_md5sig md5sig = {};
+ int opt = TCP_MD5SIG;
+ int rc;
+
+ md5sig.tcpm_keylen = keylen;
+ memcpy(md5sig.tcpm_key, args->password, keylen);
+
+ if (args->prefix_len) {
+ opt = TCP_MD5SIG_EXT;
+ md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_PREFIX;
+
+ md5sig.tcpm_prefixlen = args->prefix_len;
+ addr = &args->md5_prefix;
+ }
+ memcpy(&md5sig.tcpm_addr, addr, alen);
+
+ if (args->ifindex) {
+ opt = TCP_MD5SIG_EXT;
+ md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
+
+ md5sig.tcpm_ifindex = args->ifindex;
+ }
+
+ rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
+ if (rc < 0) {
+ /* ENOENT is harmless. Returned when a password is cleared */
+ if (errno == ENOENT)
+ rc = 0;
+ else
+ log_err_errno("setsockopt(TCP_MD5SIG)");
+ }
+
+ return rc;
+}
+
+static int tcp_md5_remote(int sd, struct sock_args *args)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ };
+ void *addr;
+ int alen;
+
+ switch (args->version) {
+ case AF_INET:
+ sin.sin_port = htons(args->port);
+ sin.sin_addr = args->remote_addr.in;
+ addr = &sin;
+ alen = sizeof(sin);
+ break;
+ case AF_INET6:
+ sin6.sin6_port = htons(args->port);
+ sin6.sin6_addr = args->remote_addr.in6;
+ addr = &sin6;
+ alen = sizeof(sin6);
+ break;
+ default:
+ log_error("unknown address family\n");
+ exit(1);
+ }
+
+ if (tcp_md5sig(sd, addr, alen, args))
+ return -1;
+
+ return 0;
+}
+
+static int get_ifidx(const char *ifname)
+{
+ struct ifreq ifdata;
+ int sd, rc;
+
+ if (!ifname || *ifname == '\0')
+ return -1;
+
+ memset(&ifdata, 0, sizeof(ifdata));
+
+ strcpy(ifdata.ifr_name, ifname);
+
+ sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+ if (sd < 0) {
+ log_err_errno("socket failed");
+ return -1;
+ }
+
+ rc = ioctl(sd, SIOCGIFINDEX, (char *)&ifdata);
+ close(sd);
+ if (rc != 0) {
+ log_err_errno("ioctl(SIOCGIFINDEX) failed");
+ return -1;
+ }
+
+ return ifdata.ifr_ifindex;
+}
+
+static int bind_to_device(int sd, const char *name)
+{
+ int rc;
+
+ rc = setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, strlen(name)+1);
+ if (rc < 0)
+ log_err_errno("setsockopt(SO_BINDTODEVICE)");
+
+ return rc;
+}
+
+static int get_bind_to_device(int sd, char *name, size_t len)
+{
+ int rc;
+ socklen_t optlen = len;
+
+ name[0] = '\0';
+ rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
+ if (rc < 0)
+ log_err_errno("setsockopt(SO_BINDTODEVICE)");
+
+ return rc;
+}
+
+static int check_device(int sd, struct sock_args *args)
+{
+ int ifindex = 0;
+ char name[32];
+
+ if (get_bind_to_device(sd, name, sizeof(name)))
+ *name = '\0';
+ else
+ ifindex = get_ifidx(name);
+
+ log_msg(" bound to device %s/%d\n",
+ *name ? name : "<none>", ifindex);
+
+ if (!args->expected_ifindex)
+ return 0;
+
+ if (args->expected_ifindex != ifindex) {
+ log_error("Device index mismatch: expected %d have %d\n",
+ args->expected_ifindex, ifindex);
+ return 1;
+ }
+
+ log_msg("Device index matches: expected %d have %d\n",
+ args->expected_ifindex, ifindex);
+
+ return 0;
+}
+
+static int set_pktinfo_v4(int sd)
+{
+ int one = 1;
+ int rc;
+
+ rc = setsockopt(sd, SOL_IP, IP_PKTINFO, &one, sizeof(one));
+ if (rc < 0 && rc != -ENOTSUP)
+ log_err_errno("setsockopt(IP_PKTINFO)");
+
+ return rc;
+}
+
+static int set_recvpktinfo_v6(int sd)
+{
+ int one = 1;
+ int rc;
+
+ rc = setsockopt(sd, SOL_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
+ if (rc < 0 && rc != -ENOTSUP)
+ log_err_errno("setsockopt(IPV6_RECVPKTINFO)");
+
+ return rc;
+}
+
+static int set_recverr_v4(int sd)
+{
+ int one = 1;
+ int rc;
+
+ rc = setsockopt(sd, SOL_IP, IP_RECVERR, &one, sizeof(one));
+ if (rc < 0 && rc != -ENOTSUP)
+ log_err_errno("setsockopt(IP_RECVERR)");
+
+ return rc;
+}
+
+static int set_recverr_v6(int sd)
+{
+ int one = 1;
+ int rc;
+
+ rc = setsockopt(sd, SOL_IPV6, IPV6_RECVERR, &one, sizeof(one));
+ if (rc < 0 && rc != -ENOTSUP)
+ log_err_errno("setsockopt(IPV6_RECVERR)");
+
+ return rc;
+}
+
+static int set_unicast_if(int sd, int ifindex, int version)
+{
+ int opt = IP_UNICAST_IF;
+ int level = SOL_IP;
+ int rc;
+
+ ifindex = htonl(ifindex);
+
+ if (version == AF_INET6) {
+ opt = IPV6_UNICAST_IF;
+ level = SOL_IPV6;
+ }
+ rc = setsockopt(sd, level, opt, &ifindex, sizeof(ifindex));
+ if (rc < 0)
+ log_err_errno("setsockopt(IP_UNICAST_IF)");
+
+ return rc;
+}
+
+static int set_multicast_if(int sd, int ifindex)
+{
+ struct ip_mreqn mreq = { .imr_ifindex = ifindex };
+ int rc;
+
+ rc = setsockopt(sd, SOL_IP, IP_MULTICAST_IF, &mreq, sizeof(mreq));
+ if (rc < 0)
+ log_err_errno("setsockopt(IP_MULTICAST_IF)");
+
+ return rc;
+}
+
+static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex)
+{
+ uint32_t if_addr = addr;
+ struct ip_mreqn mreq;
+ int rc;
+
+ if (addr == htonl(INADDR_ANY) && !ifindex) {
+ log_error("Either local address or device needs to be given for multicast membership\n");
+ return -1;
+ }
+
+ mreq.imr_multiaddr.s_addr = grp;
+ mreq.imr_address.s_addr = if_addr;
+ mreq.imr_ifindex = ifindex;
+
+ rc = setsockopt(sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (rc < 0) {
+ log_err_errno("setsockopt(IP_ADD_MEMBERSHIP)");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_broadcast(int sd)
+{
+ unsigned int one = 1;
+ int rc = 0;
+
+ if (setsockopt(sd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) != 0) {
+ log_err_errno("setsockopt(SO_BROADCAST)");
+ rc = -1;
+ }
+
+ return rc;
+}
+
+static int set_reuseport(int sd)
+{
+ unsigned int one = 1;
+ int rc = 0;
+
+ if (setsockopt(sd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) != 0) {
+ log_err_errno("setsockopt(SO_REUSEPORT)");
+ rc = -1;
+ }
+
+ return rc;
+}
+
+static int set_reuseaddr(int sd)
+{
+ unsigned int one = 1;
+ int rc = 0;
+
+ if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) != 0) {
+ log_err_errno("setsockopt(SO_REUSEADDR)");
+ rc = -1;
+ }
+
+ return rc;
+}
+
+static int str_to_uint(const char *str, int min, int max, unsigned int *value)
+{
+ int number;
+ char *end;
+
+ errno = 0;
+ number = (unsigned int) strtoul(str, &end, 0);
+
+ /* entire string should be consumed by conversion
+ * and value should be between min and max
+ */
+ if (((*end == '\0') || (*end == '\n')) && (end != str) &&
+ (errno != ERANGE) && (min <= number) && (number <= max)) {
+ *value = number;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int expected_addr_match(struct sockaddr *sa, void *expected,
+ const char *desc)
+{
+ char addrstr[64];
+ int rc = 0;
+
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *s = (struct sockaddr_in *) sa;
+ struct in_addr *exp_in = (struct in_addr *) expected;
+
+ if (s->sin_addr.s_addr != exp_in->s_addr) {
+ log_error("%s address does not match expected %s",
+ desc,
+ inet_ntop(AF_INET, exp_in,
+ addrstr, sizeof(addrstr)));
+ rc = 1;
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
+ struct in6_addr *exp_in = (struct in6_addr *) expected;
+
+ if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) {
+ log_error("%s address does not match expected %s",
+ desc,
+ inet_ntop(AF_INET6, exp_in,
+ addrstr, sizeof(addrstr)));
+ rc = 1;
+ }
+ } else {
+ log_error("%s address does not match expected - unknown family",
+ desc);
+ rc = 1;
+ }
+
+ if (!rc)
+ log_msg("%s address matches expected\n", desc);
+
+ return rc;
+}
+
+static int show_sockstat(int sd, struct sock_args *args)
+{
+ struct sockaddr_in6 local_addr, remote_addr;
+ socklen_t alen = sizeof(local_addr);
+ struct sockaddr *sa;
+ const char *desc;
+ int rc = 0;
+
+ desc = server_mode ? "server local:" : "client local:";
+ sa = (struct sockaddr *) &local_addr;
+ if (getsockname(sd, sa, &alen) == 0) {
+ log_address(desc, sa);
+
+ if (args->has_expected_laddr) {
+ rc = expected_addr_match(sa, &args->expected_laddr,
+ "local");
+ }
+ } else {
+ log_err_errno("getsockname failed");
+ }
+
+ sa = (struct sockaddr *) &remote_addr;
+ desc = server_mode ? "server peer:" : "client peer:";
+ if (getpeername(sd, sa, &alen) == 0) {
+ log_address(desc, sa);
+
+ if (args->has_expected_raddr) {
+ rc |= expected_addr_match(sa, &args->expected_raddr,
+ "remote");
+ }
+ } else {
+ log_err_errno("getpeername failed");
+ }
+
+ return rc;
+}
+
+static int get_index_from_cmsg(struct msghdr *m)
+{
+ struct cmsghdr *cm;
+ int ifindex = 0;
+ char buf[64];
+
+ for (cm = (struct cmsghdr *)CMSG_FIRSTHDR(m);
+ m->msg_controllen != 0 && cm;
+ cm = (struct cmsghdr *)CMSG_NXTHDR(m, cm)) {
+
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *pi;
+
+ pi = (struct in_pktinfo *)(CMSG_DATA(cm));
+ inet_ntop(AF_INET, &pi->ipi_addr, buf, sizeof(buf));
+ ifindex = pi->ipi_ifindex;
+ } else if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *pi6;
+
+ pi6 = (struct in6_pktinfo *)(CMSG_DATA(cm));
+ inet_ntop(AF_INET6, &pi6->ipi6_addr, buf, sizeof(buf));
+ ifindex = pi6->ipi6_ifindex;
+ }
+ }
+
+ if (ifindex) {
+ log_msg(" pktinfo: ifindex %d dest addr %s\n",
+ ifindex, buf);
+ }
+ return ifindex;
+}
+
+static int send_msg_no_cmsg(int sd, void *addr, socklen_t alen)
+{
+ int err;
+
+again:
+ err = sendto(sd, msg, msglen, 0, addr, alen);
+ if (err < 0) {
+ if (errno == EACCES && try_broadcast) {
+ try_broadcast = 0;
+ if (!set_broadcast(sd))
+ goto again;
+ errno = EACCES;
+ }
+
+ log_err_errno("sendto failed");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int send_msg_cmsg(int sd, void *addr, socklen_t alen,
+ int ifindex, int version)
+{
+ unsigned char cmsgbuf[64];
+ struct iovec iov[2];
+ struct cmsghdr *cm;
+ struct msghdr m;
+ int err;
+
+ iov[0].iov_base = msg;
+ iov[0].iov_len = msglen;
+ m.msg_iov = iov;
+ m.msg_iovlen = 1;
+ m.msg_name = (caddr_t)addr;
+ m.msg_namelen = alen;
+
+ memset(cmsgbuf, 0, sizeof(cmsgbuf));
+ cm = (struct cmsghdr *)cmsgbuf;
+ m.msg_control = (caddr_t)cm;
+
+ if (version == AF_INET) {
+ struct in_pktinfo *pi;
+
+ cm->cmsg_level = SOL_IP;
+ cm->cmsg_type = IP_PKTINFO;
+ cm->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ pi = (struct in_pktinfo *)(CMSG_DATA(cm));
+ pi->ipi_ifindex = ifindex;
+
+ m.msg_controllen = cm->cmsg_len;
+
+ } else if (version == AF_INET6) {
+ struct in6_pktinfo *pi6;
+
+ cm->cmsg_level = SOL_IPV6;
+ cm->cmsg_type = IPV6_PKTINFO;
+ cm->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+
+ pi6 = (struct in6_pktinfo *)(CMSG_DATA(cm));
+ pi6->ipi6_ifindex = ifindex;
+
+ m.msg_controllen = cm->cmsg_len;
+ }
+
+again:
+ err = sendmsg(sd, &m, 0);
+ if (err < 0) {
+ if (errno == EACCES && try_broadcast) {
+ try_broadcast = 0;
+ if (!set_broadcast(sd))
+ goto again;
+ errno = EACCES;
+ }
+
+ log_err_errno("sendmsg failed");
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args)
+{
+ if (args->type == SOCK_STREAM) {
+ if (write(sd, msg, msglen) < 0) {
+ log_err_errno("write failed sending msg to peer");
+ return 1;
+ }
+ } else if (args->ifindex && args->use_cmsg) {
+ if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version))
+ return 1;
+ } else {
+ if (send_msg_no_cmsg(sd, addr, alen))
+ return 1;
+ }
+
+ log_msg("Sent message:\n");
+ log_msg(" %.24s%s\n", msg, msglen > 24 ? " ..." : "");
+
+ return 0;
+}
+
+static int socket_read_dgram(int sd, struct sock_args *args)
+{
+ unsigned char addr[sizeof(struct sockaddr_in6)];
+ struct sockaddr *sa = (struct sockaddr *) addr;
+ socklen_t alen = sizeof(addr);
+ struct iovec iov[2];
+ struct msghdr m = {
+ .msg_name = (caddr_t)addr,
+ .msg_namelen = alen,
+ .msg_iov = iov,
+ .msg_iovlen = 1,
+ };
+ unsigned char cmsgbuf[256];
+ struct cmsghdr *cm = (struct cmsghdr *)cmsgbuf;
+ char buf[16*1024];
+ int ifindex;
+ int len;
+
+ iov[0].iov_base = (caddr_t)buf;
+ iov[0].iov_len = sizeof(buf);
+
+ memset(cmsgbuf, 0, sizeof(cmsgbuf));
+ m.msg_control = (caddr_t)cm;
+ m.msg_controllen = sizeof(cmsgbuf);
+
+ len = recvmsg(sd, &m, 0);
+ if (len == 0) {
+ log_msg("peer closed connection.\n");
+ return 0;
+ } else if (len < 0) {
+ log_msg("failed to read message: %d: %s\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ buf[len] = '\0';
+
+ log_address("Message from:", sa);
+ log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : "");
+
+ ifindex = get_index_from_cmsg(&m);
+ if (args->expected_ifindex) {
+ if (args->expected_ifindex != ifindex) {
+ log_error("Device index mismatch: expected %d have %d\n",
+ args->expected_ifindex, ifindex);
+ return -1;
+ }
+ log_msg("Device index matches: expected %d have %d\n",
+ args->expected_ifindex, ifindex);
+ }
+
+ if (!interactive && server_mode) {
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
+ struct in6_addr *in6 = &s6->sin6_addr;
+
+ if (IN6_IS_ADDR_V4MAPPED(in6)) {
+ const uint32_t *pa = (uint32_t *) &in6->s6_addr;
+ struct in_addr in4;
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) addr;
+ pa += 3;
+ in4.s_addr = *pa;
+ sin->sin_addr = in4;
+ sin->sin_family = AF_INET;
+ if (send_msg_cmsg(sd, addr, alen,
+ ifindex, AF_INET) < 0)
+ goto out_err;
+ }
+ }
+again:
+ iov[0].iov_len = len;
+
+ if (args->version == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
+
+ if (args->dev) {
+ /* avoid PKTINFO conflicts with bindtodev */
+ if (sendto(sd, buf, len, 0,
+ (void *) addr, alen) < 0)
+ goto out_err;
+ } else {
+ /* kernel is allowing scope_id to be set to VRF
+ * index for LLA. for sends to global address
+ * reset scope id
+ */
+ s6->sin6_scope_id = ifindex;
+ if (sendmsg(sd, &m, 0) < 0)
+ goto out_err;
+ }
+ } else {
+ int err;
+
+ err = sendmsg(sd, &m, 0);
+ if (err < 0) {
+ if (errno == EACCES && try_broadcast) {
+ try_broadcast = 0;
+ if (!set_broadcast(sd))
+ goto again;
+ errno = EACCES;
+ }
+ goto out_err;
+ }
+ }
+ log_msg("Sent message:\n");
+ log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : "");
+ }
+
+ return 1;
+out_err:
+ log_err_errno("failed to send msg to peer");
+ return -1;
+}
+
+static int socket_read_stream(int sd)
+{
+ char buf[1024];
+ int len;
+
+ len = read(sd, buf, sizeof(buf)-1);
+ if (len == 0) {
+ log_msg("client closed connection.\n");
+ return 0;
+ } else if (len < 0) {
+ log_msg("failed to read message\n");
+ return -1;
+ }
+
+ buf[len] = '\0';
+ log_msg("Incoming message:\n");
+ log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : "");
+
+ if (!interactive && server_mode) {
+ if (write(sd, buf, len) < 0) {
+ log_err_errno("failed to send buf");
+ return -1;
+ }
+ log_msg("Sent message:\n");
+ log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : "");
+ }
+
+ return 1;
+}
+
+static int socket_read(int sd, struct sock_args *args)
+{
+ if (args->type == SOCK_STREAM)
+ return socket_read_stream(sd);
+
+ return socket_read_dgram(sd, args);
+}
+
+static int stdin_to_socket(int sd, int type, void *addr, socklen_t alen)
+{
+ char buf[1024];
+ int len;
+
+ if (fgets(buf, sizeof(buf), stdin) == NULL)
+ return 0;
+
+ len = strlen(buf);
+ if (type == SOCK_STREAM) {
+ if (write(sd, buf, len) < 0) {
+ log_err_errno("failed to send buf");
+ return -1;
+ }
+ } else {
+ int err;
+
+again:
+ err = sendto(sd, buf, len, 0, addr, alen);
+ if (err < 0) {
+ if (errno == EACCES && try_broadcast) {
+ try_broadcast = 0;
+ if (!set_broadcast(sd))
+ goto again;
+ errno = EACCES;
+ }
+ log_err_errno("failed to send msg to peer");
+ return -1;
+ }
+ }
+ log_msg("Sent message:\n");
+ log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : "");
+
+ return 1;
+}
+
+static void set_recv_attr(int sd, int version)
+{
+ if (version == AF_INET6) {
+ set_recvpktinfo_v6(sd);
+ set_recverr_v6(sd);
+ } else {
+ set_pktinfo_v4(sd);
+ set_recverr_v4(sd);
+ }
+}
+
+static int msg_loop(int client, int sd, void *addr, socklen_t alen,
+ struct sock_args *args)
+{
+ struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL;
+ fd_set rfds;
+ int nfds;
+ int rc;
+
+ if (args->type != SOCK_STREAM)
+ set_recv_attr(sd, args->version);
+
+ if (msg) {
+ msglen = strlen(msg);
+
+ /* client sends first message */
+ if (client) {
+ if (send_msg(sd, addr, alen, args))
+ return 1;
+ }
+ if (!interactive) {
+ ptval = &timeout;
+ if (!prog_timeout)
+ timeout.tv_sec = 5;
+ }
+ }
+
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ while (1) {
+ FD_ZERO(&rfds);
+ FD_SET(sd, &rfds);
+ if (interactive)
+ FD_SET(fileno(stdin), &rfds);
+
+ rc = select(nfds, &rfds, NULL, NULL, ptval);
+ if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+
+ rc = 1;
+ log_err_errno("select failed");
+ break;
+ } else if (rc == 0) {
+ log_error("Timed out waiting for response\n");
+ rc = 2;
+ break;
+ }
+
+ if (FD_ISSET(sd, &rfds)) {
+ rc = socket_read(sd, args);
+ if (rc < 0) {
+ rc = 1;
+ break;
+ }
+ if (rc == 0)
+ break;
+ }
+
+ rc = 0;
+
+ if (FD_ISSET(fileno(stdin), &rfds)) {
+ if (stdin_to_socket(sd, args->type, addr, alen) <= 0)
+ break;
+ }
+
+ if (interactive)
+ continue;
+
+ if (iter != -1) {
+ --iter;
+ if (iter == 0)
+ break;
+ }
+
+ log_msg("Going into quiet mode\n");
+ quiet = 1;
+
+ if (client) {
+ if (send_msg(sd, addr, alen, args)) {
+ rc = 1;
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int msock_init(struct sock_args *args, int server)
+{
+ uint32_t if_addr = htonl(INADDR_ANY);
+ struct sockaddr_in laddr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(args->port),
+ };
+ int one = 1;
+ int sd;
+
+ if (!server && args->has_local_ip)
+ if_addr = args->local_addr.in.s_addr;
+
+ sd = socket(PF_INET, SOCK_DGRAM, 0);
+ if (sd < 0) {
+ log_err_errno("socket");
+ return -1;
+ }
+
+ if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one)) < 0) {
+ log_err_errno("Setting SO_REUSEADDR error");
+ goto out_err;
+ }
+
+ if (setsockopt(sd, SOL_SOCKET, SO_BROADCAST,
+ (char *)&one, sizeof(one)) < 0)
+ log_err_errno("Setting SO_BROADCAST error");
+
+ if (args->dev && bind_to_device(sd, args->dev) != 0)
+ goto out_err;
+ else if (args->use_setsockopt &&
+ set_multicast_if(sd, args->ifindex))
+ goto out_err;
+
+ laddr.sin_addr.s_addr = if_addr;
+
+ if (bind(sd, (struct sockaddr *) &laddr, sizeof(laddr)) < 0) {
+ log_err_errno("bind failed");
+ goto out_err;
+ }
+
+ if (server &&
+ set_membership(sd, args->grp.s_addr,
+ args->local_addr.in.s_addr, args->ifindex))
+ goto out_err;
+
+ return sd;
+out_err:
+ close(sd);
+ return -1;
+}
+
+static int msock_server(struct sock_args *args)
+{
+ return msock_init(args, 1);
+}
+
+static int msock_client(struct sock_args *args)
+{
+ return msock_init(args, 0);
+}
+
+static int bind_socket(int sd, struct sock_args *args)
+{
+ struct sockaddr_in serv_addr = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in6 serv6_addr = {
+ .sin6_family = AF_INET6,
+ };
+ void *addr;
+ socklen_t alen;
+
+ if (!args->has_local_ip && args->type == SOCK_RAW)
+ return 0;
+
+ switch (args->version) {
+ case AF_INET:
+ serv_addr.sin_port = htons(args->port);
+ serv_addr.sin_addr = args->local_addr.in;
+ addr = &serv_addr;
+ alen = sizeof(serv_addr);
+ break;
+
+ case AF_INET6:
+ serv6_addr.sin6_port = htons(args->port);
+ serv6_addr.sin6_addr = args->local_addr.in6;
+ addr = &serv6_addr;
+ alen = sizeof(serv6_addr);
+ break;
+
+ default:
+ log_error("Invalid address family\n");
+ return -1;
+ }
+
+ if (bind(sd, addr, alen) < 0) {
+ log_err_errno("error binding socket");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lsock_init(struct sock_args *args)
+{
+ long flags;
+ int sd;
+
+ sd = socket(args->version, args->type, args->protocol);
+ if (sd < 0) {
+ log_err_errno("Error opening socket");
+ return -1;
+ }
+
+ if (set_reuseaddr(sd) != 0)
+ goto err;
+
+ if (set_reuseport(sd) != 0)
+ goto err;
+
+ if (args->dev && bind_to_device(sd, args->dev) != 0)
+ goto err;
+ else if (args->use_setsockopt &&
+ set_unicast_if(sd, args->ifindex, args->version))
+ goto err;
+
+ if (bind_socket(sd, args))
+ goto err;
+
+ if (args->bind_test_only)
+ goto out;
+
+ if (args->type == SOCK_STREAM && listen(sd, 1) < 0) {
+ log_err_errno("listen failed");
+ goto err;
+ }
+
+ flags = fcntl(sd, F_GETFL);
+ if ((flags < 0) || (fcntl(sd, F_SETFL, flags|O_NONBLOCK) < 0)) {
+ log_err_errno("Failed to set non-blocking option");
+ goto err;
+ }
+
+ if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
+ log_err_errno("Failed to set close-on-exec flag");
+
+out:
+ return sd;
+
+err:
+ close(sd);
+ return -1;
+}
+
+static int do_server(struct sock_args *args)
+{
+ struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL;
+ unsigned char addr[sizeof(struct sockaddr_in6)] = {};
+ socklen_t alen = sizeof(addr);
+ int lsd, csd = -1;
+
+ fd_set rfds;
+ int rc;
+
+ if (prog_timeout)
+ ptval = &timeout;
+
+ if (args->has_grp)
+ lsd = msock_server(args);
+ else
+ lsd = lsock_init(args);
+
+ if (lsd < 0)
+ return 1;
+
+ if (args->bind_test_only) {
+ close(lsd);
+ return 0;
+ }
+
+ if (args->type != SOCK_STREAM) {
+ rc = msg_loop(0, lsd, (void *) addr, alen, args);
+ close(lsd);
+ return rc;
+ }
+
+ if (args->password && tcp_md5_remote(lsd, args)) {
+ close(lsd);
+ return 1;
+ }
+
+ while (1) {
+ log_msg("\n");
+ log_msg("waiting for client connection.\n");
+ FD_ZERO(&rfds);
+ FD_SET(lsd, &rfds);
+
+ rc = select(lsd+1, &rfds, NULL, NULL, ptval);
+ if (rc == 0) {
+ rc = 2;
+ break;
+ }
+
+ if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+
+ log_err_errno("select failed");
+ break;
+ }
+
+ if (FD_ISSET(lsd, &rfds)) {
+
+ csd = accept(lsd, (void *) addr, &alen);
+ if (csd < 0) {
+ log_err_errno("accept failed");
+ break;
+ }
+
+ rc = show_sockstat(csd, args);
+ if (rc)
+ break;
+
+ rc = check_device(csd, args);
+ if (rc)
+ break;
+ }
+
+ rc = msg_loop(0, csd, (void *) addr, alen, args);
+ close(csd);
+
+ if (!interactive)
+ break;
+ }
+
+ close(lsd);
+
+ return rc;
+}
+
+static int wait_for_connect(int sd)
+{
+ struct timeval _tv = { .tv_sec = prog_timeout }, *tv = NULL;
+ fd_set wfd;
+ int val = 0, sz = sizeof(val);
+ int rc;
+
+ FD_ZERO(&wfd);
+ FD_SET(sd, &wfd);
+
+ if (prog_timeout)
+ tv = &_tv;
+
+ rc = select(FD_SETSIZE, NULL, &wfd, NULL, tv);
+ if (rc == 0) {
+ log_error("connect timed out\n");
+ return -2;
+ } else if (rc < 0) {
+ log_err_errno("select failed");
+ return -3;
+ }
+
+ if (getsockopt(sd, SOL_SOCKET, SO_ERROR, &val, (socklen_t *)&sz) < 0) {
+ log_err_errno("getsockopt(SO_ERROR) failed");
+ return -4;
+ }
+
+ if (val != 0) {
+ log_error("connect failed: %d: %s\n", val, strerror(val));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
+{
+ int sd, rc = -1;
+ long flags;
+
+ sd = socket(args->version, args->type, args->protocol);
+ if (sd < 0) {
+ log_err_errno("Failed to create socket");
+ return -1;
+ }
+
+ flags = fcntl(sd, F_GETFL);
+ if ((flags < 0) || (fcntl(sd, F_SETFL, flags|O_NONBLOCK) < 0)) {
+ log_err_errno("Failed to set non-blocking option");
+ goto err;
+ }
+
+ if (set_reuseport(sd) != 0)
+ goto err;
+
+ if (args->dev && bind_to_device(sd, args->dev) != 0)
+ goto err;
+ else if (args->use_setsockopt &&
+ set_unicast_if(sd, args->ifindex, args->version))
+ goto err;
+
+ if (args->has_local_ip && bind_socket(sd, args))
+ goto err;
+
+ if (args->type != SOCK_STREAM)
+ goto out;
+
+ if (args->password && tcp_md5sig(sd, addr, alen, args))
+ goto err;
+
+ if (args->bind_test_only)
+ goto out;
+
+ if (connect(sd, addr, alen) < 0) {
+ if (errno != EINPROGRESS) {
+ log_err_errno("Failed to connect to remote host");
+ rc = -1;
+ goto err;
+ }
+ rc = wait_for_connect(sd);
+ if (rc < 0)
+ goto err;
+ }
+out:
+ return sd;
+
+err:
+ close(sd);
+ return rc;
+}
+
+static int do_client(struct sock_args *args)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ };
+ void *addr;
+ int alen;
+ int rc = 0;
+ int sd;
+
+ if (!args->has_remote_ip && !args->has_grp) {
+ fprintf(stderr, "remote IP or multicast group not given\n");
+ return 1;
+ }
+
+ switch (args->version) {
+ case AF_INET:
+ sin.sin_port = htons(args->port);
+ if (args->has_grp)
+ sin.sin_addr = args->grp;
+ else
+ sin.sin_addr = args->remote_addr.in;
+ addr = &sin;
+ alen = sizeof(sin);
+ break;
+ case AF_INET6:
+ sin6.sin6_port = htons(args->port);
+ sin6.sin6_addr = args->remote_addr.in6;
+ sin6.sin6_scope_id = args->scope_id;
+ addr = &sin6;
+ alen = sizeof(sin6);
+ break;
+ }
+
+ if (args->has_grp)
+ sd = msock_client(args);
+ else
+ sd = connectsock(addr, alen, args);
+
+ if (sd < 0)
+ return -sd;
+
+ if (args->bind_test_only)
+ goto out;
+
+ if (args->type == SOCK_STREAM) {
+ rc = show_sockstat(sd, args);
+ if (rc != 0)
+ goto out;
+ }
+
+ rc = msg_loop(1, sd, addr, alen, args);
+
+out:
+ close(sd);
+
+ return rc;
+}
+
+enum addr_type {
+ ADDR_TYPE_LOCAL,
+ ADDR_TYPE_REMOTE,
+ ADDR_TYPE_MCAST,
+ ADDR_TYPE_EXPECTED_LOCAL,
+ ADDR_TYPE_EXPECTED_REMOTE,
+ ADDR_TYPE_MD5_PREFIX,
+};
+
+static int convert_addr(struct sock_args *args, const char *_str,
+ enum addr_type atype)
+{
+ int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
+ int family = args->version;
+ char *str, *dev, *sep;
+ struct in6_addr *in6;
+ struct in_addr *in;
+ const char *desc;
+ void *addr;
+ int rc = 0;
+
+ str = strdup(_str);
+ if (!str)
+ return -ENOMEM;
+
+ switch (atype) {
+ case ADDR_TYPE_LOCAL:
+ desc = "local";
+ addr = &args->local_addr;
+ break;
+ case ADDR_TYPE_REMOTE:
+ desc = "remote";
+ addr = &args->remote_addr;
+ break;
+ case ADDR_TYPE_MCAST:
+ desc = "mcast grp";
+ addr = &args->grp;
+ break;
+ case ADDR_TYPE_EXPECTED_LOCAL:
+ desc = "expected local";
+ addr = &args->expected_laddr;
+ break;
+ case ADDR_TYPE_EXPECTED_REMOTE:
+ desc = "expected remote";
+ addr = &args->expected_raddr;
+ break;
+ case ADDR_TYPE_MD5_PREFIX:
+ desc = "md5 prefix";
+ if (family == AF_INET) {
+ args->md5_prefix.v4.sin_family = AF_INET;
+ addr = &args->md5_prefix.v4.sin_addr;
+ } else if (family == AF_INET6) {
+ args->md5_prefix.v6.sin6_family = AF_INET6;
+ addr = &args->md5_prefix.v6.sin6_addr;
+ } else
+ return 1;
+
+ sep = strchr(str, '/');
+ if (sep) {
+ *sep = '\0';
+ sep++;
+ if (str_to_uint(sep, 1, pfx_len_max,
+ &args->prefix_len) != 0) {
+ fprintf(stderr, "Invalid port\n");
+ return 1;
+ }
+ } else {
+ args->prefix_len = pfx_len_max;
+ }
+ break;
+ default:
+ log_error("unknown address type");
+ exit(1);
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct in_addr *) addr;
+ if (str) {
+ if (inet_pton(AF_INET, str, in) == 0) {
+ log_error("Invalid %s IP address\n", desc);
+ rc = -1;
+ goto out;
+ }
+ } else {
+ in->s_addr = htonl(INADDR_ANY);
+ }
+ break;
+
+ case AF_INET6:
+ dev = strchr(str, '%');
+ if (dev) {
+ *dev = '\0';
+ dev++;
+ }
+
+ in6 = (struct in6_addr *) addr;
+ if (str) {
+ if (inet_pton(AF_INET6, str, in6) == 0) {
+ log_error("Invalid %s IPv6 address\n", desc);
+ rc = -1;
+ goto out;
+ }
+ } else {
+ *in6 = in6addr_any;
+ }
+ if (dev) {
+ args->scope_id = get_ifidx(dev);
+ if (args->scope_id < 0) {
+ log_error("Invalid scope on %s IPv6 address\n",
+ desc);
+ rc = -1;
+ goto out;
+ }
+ }
+ break;
+
+ default:
+ log_error("Invalid address family\n");
+ }
+
+out:
+ free(str);
+ return rc;
+}
+
+static char *random_msg(int len)
+{
+ int i, n = 0, olen = len + 1;
+ char *m;
+
+ if (len <= 0)
+ return NULL;
+
+ m = malloc(olen);
+ if (!m)
+ return NULL;
+
+ while (len > 26) {
+ i = snprintf(m + n, olen - n, "%.26s",
+ "abcdefghijklmnopqrstuvwxyz");
+ n += i;
+ len -= i;
+ }
+ i = snprintf(m + n, olen - n, "%.*s", len,
+ "abcdefghijklmnopqrstuvwxyz");
+ return m;
+}
+
+#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq"
+
+static void print_usage(char *prog)
+{
+ printf(
+ "usage: %s OPTS\n"
+ "Required:\n"
+ " -r addr remote address to connect to (client mode only)\n"
+ " -p port port to connect to (client mode)/listen on (server mode)\n"
+ " (default: %d)\n"
+ " -s server mode (default: client mode)\n"
+ " -t timeout seconds (default: none)\n"
+ "\n"
+ "Optional:\n"
+ " -F Restart server loop\n"
+ " -6 IPv6 (default is IPv4)\n"
+ " -P proto protocol for socket: icmp, ospf (default: none)\n"
+ " -D|R datagram (D) / raw (R) socket (default stream)\n"
+ " -l addr local address to bind to\n"
+ "\n"
+ " -d dev bind socket to given device name\n"
+ " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n"
+ " to set device binding\n"
+ " -C use cmsg and IP_PKTINFO to specify device binding\n"
+ "\n"
+ " -L len send random message of given length\n"
+ " -n num number of times to send message\n"
+ "\n"
+ " -M password use MD5 sum protection\n"
+ " -m prefix/len prefix and length to use for MD5 key\n"
+ " -g grp multicast group (e.g., 239.1.1.1)\n"
+ " -i interactive mode (default is echo and terminate)\n"
+ "\n"
+ " -0 addr Expected local address\n"
+ " -1 addr Expected remote address\n"
+ " -2 dev Expected device name (or index) to receive packet\n"
+ "\n"
+ " -b Bind test only.\n"
+ " -q Be quiet. Run test without printing anything.\n"
+ , prog, DEFAULT_PORT);
+}
+
+int main(int argc, char *argv[])
+{
+ struct sock_args args = {
+ .version = AF_INET,
+ .type = SOCK_STREAM,
+ .port = DEFAULT_PORT,
+ };
+ struct protoent *pe;
+ unsigned int tmp;
+ int forever = 0;
+
+ /* process inputs */
+ extern char *optarg;
+ int rc = 0;
+
+ /*
+ * process input args
+ */
+
+ while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) {
+ switch (rc) {
+ case 's':
+ server_mode = 1;
+ break;
+ case 'F':
+ forever = 1;
+ break;
+ case 'l':
+ args.has_local_ip = 1;
+ if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0)
+ return 1;
+ break;
+ case 'r':
+ args.has_remote_ip = 1;
+ if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0)
+ return 1;
+ break;
+ case 'p':
+ if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {
+ fprintf(stderr, "Invalid port\n");
+ return 1;
+ }
+ args.port = (unsigned short) tmp;
+ break;
+ case 't':
+ if (str_to_uint(optarg, 0, INT_MAX,
+ &prog_timeout) != 0) {
+ fprintf(stderr, "Invalid timeout\n");
+ return 1;
+ }
+ break;
+ case 'D':
+ args.type = SOCK_DGRAM;
+ break;
+ case 'R':
+ args.type = SOCK_RAW;
+ args.port = 0;
+ break;
+ case 'P':
+ pe = getprotobyname(optarg);
+ if (pe) {
+ args.protocol = pe->p_proto;
+ } else {
+ if (str_to_uint(optarg, 0, 0xffff, &tmp) != 0) {
+ fprintf(stderr, "Invalid protocol\n");
+ return 1;
+ }
+ args.protocol = tmp;
+ }
+ break;
+ case 'n':
+ iter = atoi(optarg);
+ break;
+ case 'L':
+ msg = random_msg(atoi(optarg));
+ break;
+ case 'M':
+ args.password = optarg;
+ break;
+ case 'm':
+ if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0)
+ return 1;
+ break;
+ case 'S':
+ args.use_setsockopt = 1;
+ break;
+ case 'C':
+ args.use_cmsg = 1;
+ break;
+ case 'd':
+ args.dev = optarg;
+ args.ifindex = get_ifidx(optarg);
+ if (args.ifindex < 0) {
+ fprintf(stderr, "Invalid device name\n");
+ return 1;
+ }
+ break;
+ case 'i':
+ interactive = 1;
+ break;
+ case 'g':
+ args.has_grp = 1;
+ if (convert_addr(&args, optarg, ADDR_TYPE_MCAST) < 0)
+ return 1;
+ args.type = SOCK_DGRAM;
+ break;
+ case '6':
+ args.version = AF_INET6;
+ break;
+ case 'b':
+ args.bind_test_only = 1;
+ break;
+ case '0':
+ args.has_expected_laddr = 1;
+ if (convert_addr(&args, optarg,
+ ADDR_TYPE_EXPECTED_LOCAL))
+ return 1;
+ break;
+ case '1':
+ args.has_expected_raddr = 1;
+ if (convert_addr(&args, optarg,
+ ADDR_TYPE_EXPECTED_REMOTE))
+ return 1;
+
+ break;
+ case '2':
+ if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) {
+ args.expected_ifindex = (int)tmp;
+ } else {
+ args.expected_ifindex = get_ifidx(optarg);
+ if (args.expected_ifindex < 0) {
+ fprintf(stderr,
+ "Invalid expected device\n");
+ return 1;
+ }
+ }
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ default:
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (args.password &&
+ ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) {
+ log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n");
+ return 1;
+ }
+
+ if (args.prefix_len && !args.password) {
+ log_error("Prefix range for MD5 protection specified without a password\n");
+ return 1;
+ }
+
+ if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) {
+ fprintf(stderr, "Device binding not specified\n");
+ return 1;
+ }
+ if (args.use_setsockopt || args.use_cmsg)
+ args.dev = NULL;
+
+ if (iter == 0) {
+ fprintf(stderr, "Invalid number of messages to send\n");
+ return 1;
+ }
+
+ if (args.type == SOCK_STREAM && !args.protocol)
+ args.protocol = IPPROTO_TCP;
+ if (args.type == SOCK_DGRAM && !args.protocol)
+ args.protocol = IPPROTO_UDP;
+
+ if ((args.type == SOCK_STREAM || args.type == SOCK_DGRAM) &&
+ args.port == 0) {
+ fprintf(stderr, "Invalid port number\n");
+ return 1;
+ }
+
+ if (!server_mode && !args.has_grp &&
+ !args.has_remote_ip && !args.has_local_ip) {
+ fprintf(stderr,
+ "Local (server mode) or remote IP (client IP) required\n");
+ return 1;
+ }
+
+ if (interactive) {
+ prog_timeout = 0;
+ msg = NULL;
+ }
+
+ if (server_mode) {
+ do {
+ rc = do_server(&args);
+ } while (forever);
+
+ return rc;
+ }
+ return do_client(&args);
+}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index ab367e75f095..71a62e7e35b1 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -11,9 +11,9 @@
# R1 and R2 (also implemented with namespaces), with different MTUs:
#
# segment a_r1 segment b_r1 a_r1: 2000
-# .--------------R1--------------. a_r2: 1500
-# A B a_r3: 2000
-# '--------------R2--------------' a_r4: 1400
+# .--------------R1--------------. b_r1: 1400
+# A B a_r2: 2000
+# '--------------R2--------------' b_r2: 1500
# segment a_r2 segment b_r2
#
# Check that PMTU exceptions with the correct PMTU are created. Then
@@ -1249,8 +1249,7 @@ test_list_flush_ipv4_exception() {
done
run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
- # Each exception is printed as two lines
- if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
+ if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
err " can't list cached exceptions"
fail=1
fi
@@ -1300,7 +1299,7 @@ test_list_flush_ipv6_exception() {
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
done
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
- if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
+ if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
err " can't list cached exceptions"
fail=1
fi
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
index fe3230c55986..fb7a59ed759e 100644
--- a/tools/testing/selftests/net/reuseport_dualstack.c
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto)
{
struct epoll_event ev;
int epfd, i, test_fd;
- uint16_t test_family;
+ int test_family;
socklen_t len;
epfd = epoll_create(1);
@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto)
send_from_v4(proto);
test_fd = receive_once(epfd, proto);
+ len = sizeof(test_family);
if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
error(1, errno, "failed to read socket domain");
if (test_family != AF_INET)
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 53f598f06647..383bac05ac32 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -12,7 +12,11 @@
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
+#include <inttypes.h>
#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
@@ -28,7 +32,7 @@ static int cfg_clockid = CLOCK_TAI;
static bool cfg_do_ipv4;
static bool cfg_do_ipv6;
static uint16_t cfg_port = 8000;
-static int cfg_variance_us = 2000;
+static int cfg_variance_us = 4000;
static uint64_t glob_tstart;
@@ -43,6 +47,9 @@ static struct timed_send cfg_in[MAX_NUM_PKT];
static struct timed_send cfg_out[MAX_NUM_PKT];
static int cfg_num_pkt;
+static int cfg_errq_level;
+static int cfg_errq_type;
+
static uint64_t gettime_ns(void)
{
struct timespec ts;
@@ -90,13 +97,15 @@ static void do_send_one(int fdt, struct timed_send *ts)
}
-static void do_recv_one(int fdr, struct timed_send *ts)
+static bool do_recv_one(int fdr, struct timed_send *ts)
{
int64_t tstop, texpect;
char rbuf[2];
int ret;
ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+ if (ret == -1 && errno == EAGAIN)
+ return true;
if (ret == -1)
error(1, errno, "read");
if (ret != 1)
@@ -105,14 +114,16 @@ static void do_recv_one(int fdr, struct timed_send *ts)
tstop = (gettime_ns() - glob_tstart) / 1000;
texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
- fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
- rbuf[0], tstop, texpect);
+ fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
+ rbuf[0], (long long)tstop, (long long)texpect);
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
if (labs(tstop - texpect) > cfg_variance_us)
error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+
+ return false;
}
static void do_recv_verify_empty(int fdr)
@@ -125,12 +136,70 @@ static void do_recv_verify_empty(int fdr)
error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
}
+static void do_recv_errqueue_timeout(int fdt)
+{
+ char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
+ CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
+ char data[sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr) + 1];
+ struct sock_extended_err *err;
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+ int64_t tstamp = 0;
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (1) {
+ ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != MSG_ERRQUEUE)
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (cm->cmsg_level != cfg_errq_level ||
+ cm->cmsg_type != cfg_errq_type)
+ error(1, 0, "errqueue: type 0x%x.0x%x\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ err = (struct sock_extended_err *)CMSG_DATA(cm);
+ if (err->ee_origin != SO_EE_ORIGIN_TXTIME)
+ error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin);
+ if (err->ee_code != ECANCELED)
+ error(1, 0, "errqueue: code 0x%x\n", err->ee_code);
+
+ tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info;
+ tstamp -= (int64_t) glob_tstart;
+ tstamp /= 1000 * 1000;
+ fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n",
+ data[ret - 1], tstamp);
+
+ msg.msg_flags = 0;
+ msg.msg_controllen = sizeof(control);
+ }
+
+ error(1, 0, "recv: timeout");
+}
+
static void setsockopt_txtime(int fd)
{
struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
struct sock_txtime so_txtime_val_read = { 0 };
socklen_t vallen = sizeof(so_txtime_val);
+ so_txtime_val.flags = SOF_TXTIME_REPORT_ERRORS;
+
if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
&so_txtime_val, sizeof(so_txtime_val)))
error(1, errno, "setsockopt txtime");
@@ -194,7 +263,8 @@ static void do_test(struct sockaddr *addr, socklen_t alen)
for (i = 0; i < cfg_num_pkt; i++)
do_send_one(fdt, &cfg_in[i]);
for (i = 0; i < cfg_num_pkt; i++)
- do_recv_one(fdr, &cfg_out[i]);
+ if (do_recv_one(fdr, &cfg_out[i]))
+ do_recv_errqueue_timeout(fdt);
do_recv_verify_empty(fdr);
@@ -280,6 +350,10 @@ int main(int argc, char **argv)
addr6.sin6_family = AF_INET6;
addr6.sin6_port = htons(cfg_port);
addr6.sin6_addr = in6addr_loopback;
+
+ cfg_errq_level = SOL_IPV6;
+ cfg_errq_type = IPV6_RECVERR;
+
do_test((void *)&addr6, sizeof(addr6));
}
@@ -289,6 +363,10 @@ int main(int argc, char **argv)
addr4.sin_family = AF_INET;
addr4.sin_port = htons(cfg_port);
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ cfg_errq_level = SOL_IP;
+ cfg_errq_type = IP_RECVERR;
+
do_test((void *)&addr4, sizeof(addr4));
}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 5aa519328a5b..3f7800eaecb1 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -5,7 +5,12 @@
# Run in network namespace
if [[ $# -eq 0 ]]; then
- ./in_netns.sh $0 __subprocess
+ if ! ./in_netns.sh $0 __subprocess; then
+ # test is time sensitive, can be flaky
+ echo "test failed: retry once"
+ ./in_netns.sh $0 __subprocess
+ fi
+
exit $?
fi
@@ -18,7 +23,7 @@ tc qdisc add dev lo root fq
./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
! ./so_txtime -4 -6 -c tai a,-1 a,-1
! ./so_txtime -4 -6 -c tai a,0 a,0
./so_txtime -4 -6 -c tai a,10 a,10
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 31ced79f4f25..35505b31e5cc 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -71,7 +71,7 @@
#define MSG_ZEROCOPY 0x4000000
#endif
-#define FILE_SZ (1UL << 35)
+#define FILE_SZ (1ULL << 35)
static int cfg_family = AF_INET6;
static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
static int cfg_port = 8787;
@@ -82,7 +82,9 @@ static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for recei
static int xflg; /* hash received data (simple xor) (-h option) */
static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
-static int chunk_size = 512*1024;
+static size_t chunk_size = 512*1024;
+
+static size_t map_align;
unsigned long htotal;
@@ -118,6 +120,9 @@ void hash_zone(void *zone, unsigned int length)
htotal = temp;
}
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
void *child_thread(void *arg)
{
unsigned long total_mmap = 0, total = 0;
@@ -126,6 +131,7 @@ void *child_thread(void *arg)
int flags = MAP_SHARED;
struct timeval t0, t1;
char *buffer = NULL;
+ void *raddr = NULL;
void *addr = NULL;
double throughput;
struct rusage ru;
@@ -142,9 +148,13 @@ void *child_thread(void *arg)
goto error;
}
if (zflg) {
- addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
- if (addr == (void *)-1)
+ raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0);
+ if (raddr == (void *)-1) {
+ perror("mmap");
zflg = 0;
+ } else {
+ addr = ALIGN_PTR_UP(raddr, map_align);
+ }
}
while (1) {
struct pollfd pfd = { .fd = fd, .events = POLLIN, };
@@ -155,7 +165,7 @@ void *child_thread(void *arg)
socklen_t zc_len = sizeof(zc);
int res;
- zc.address = (__u64)addr;
+ zc.address = (__u64)((unsigned long)addr);
zc.length = chunk_size;
zc.recv_skip_hint = 0;
res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
@@ -222,7 +232,7 @@ error:
free(buffer);
close(fd);
if (zflg)
- munmap(addr, chunk_size);
+ munmap(raddr, chunk_size + map_align);
pthread_exit(0);
}
@@ -270,6 +280,11 @@ static void setup_sockaddr(int domain, const char *str_addr,
static void do_accept(int fdlisten)
{
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
&chunk_size, sizeof(chunk_size)) == -1) {
perror("setsockopt SO_RCVLOWAT");
@@ -288,7 +303,7 @@ static void do_accept(int fdlisten)
perror("accept");
continue;
}
- res = pthread_create(&th, NULL, child_thread,
+ res = pthread_create(&th, &attr, child_thread,
(void *)(unsigned long)fd);
if (res) {
errno = res;
@@ -298,18 +313,42 @@ static void do_accept(int fdlisten)
}
}
+/* Each thread should reserve a big enough vma to avoid
+ * spinlock collisions in ptl locks.
+ * This size is 2MB on x86_64, and is exported in /proc/meminfo.
+ */
+static unsigned long default_huge_page_size(void)
+{
+ FILE *f = fopen("/proc/meminfo", "r");
+ unsigned long hps = 0;
+ size_t linelen = 0;
+ char *line = NULL;
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+ free(line);
+ fclose(f);
+ return hps;
+}
+
int main(int argc, char *argv[])
{
struct sockaddr_storage listenaddr, addr;
unsigned int max_pacing_rate = 0;
- unsigned long total = 0;
+ size_t total = 0;
char *host = NULL;
int fd, c, on = 1;
char *buffer;
int sflg = 0;
int mss = 0;
- while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
switch (c) {
case '4':
cfg_family = PF_INET;
@@ -349,10 +388,24 @@ int main(int argc, char *argv[])
case 'P':
max_pacing_rate = atoi(optarg) ;
break;
+ case 'C':
+ chunk_size = atol(optarg);
+ break;
+ case 'a':
+ map_align = atol(optarg);
+ break;
default:
exit(1);
}
}
+ if (!map_align) {
+ map_align = default_huge_page_size();
+ /* if really /proc/meminfo is not helping,
+ * we use the default x86_64 hugepagesize.
+ */
+ if (!map_align)
+ map_align = 2*1024*1024;
+ }
if (sflg) {
int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
@@ -417,7 +470,7 @@ int main(int argc, char *argv[])
zflg = 0;
}
while (total < FILE_SZ) {
- long wr = FILE_SZ - total;
+ ssize_t wr = FILE_SZ - total;
if (wr > chunk_size)
wr = chunk_size;
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 4c285b6e1db8..0ea44d975b6c 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,10 +25,6 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
-#ifndef ENOTSUPP
-#define ENOTSUPP 524
-#endif
-
FIXTURE(tls_basic)
{
int fd, cfd;
@@ -268,6 +264,38 @@ TEST_F(tls, sendmsg_single)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+#define MAX_FRAGS 64
+#define SEND_LEN 13
+TEST_F(tls, sendmsg_fragmented)
+{
+ char const *test_str = "test_sendmsg";
+ char buf[SEND_LEN * MAX_FRAGS];
+ struct iovec vec[MAX_FRAGS];
+ struct msghdr msg;
+ int i, frags;
+
+ for (frags = 1; frags <= MAX_FRAGS; frags++) {
+ for (i = 0; i < frags; i++) {
+ vec[i].iov_base = (char *)test_str;
+ vec[i].iov_len = SEND_LEN;
+ }
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = vec;
+ msg.msg_iovlen = frags;
+
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), SEND_LEN * frags);
+ EXPECT_EQ(recv(self->cfd, buf, SEND_LEN * frags, MSG_WAITALL),
+ SEND_LEN * frags);
+
+ for (i = 0; i < frags; i++)
+ EXPECT_EQ(memcmp(buf + SEND_LEN * i,
+ test_str, SEND_LEN), 0);
+ }
+}
+#undef MAX_FRAGS
+#undef SEND_LEN
+
TEST_F(tls, sendmsg_large)
{
void *mem = malloc(16384);
@@ -898,6 +926,114 @@ TEST_F(tls, nonblocking)
}
}
+static void
+test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self,
+ bool sendpg, unsigned int n_readers, unsigned int n_writers)
+{
+ const unsigned int n_children = n_readers + n_writers;
+ const size_t data = 6 * 1000 * 1000;
+ const size_t file_sz = data / 100;
+ size_t read_bias, write_bias;
+ int i, fd, child_id;
+ char buf[file_sz];
+ pid_t pid;
+
+ /* Only allow multiples for simplicity */
+ ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true);
+ read_bias = n_writers / n_readers ?: 1;
+ write_bias = n_readers / n_writers ?: 1;
+
+ /* prep a file to send */
+ fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_GE(fd, 0);
+
+ memset(buf, 0xac, file_sz);
+ ASSERT_EQ(write(fd, buf, file_sz), file_sz);
+
+ /* spawn children */
+ for (child_id = 0; child_id < n_children; child_id++) {
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid)
+ break;
+ }
+
+ /* parent waits for all children */
+ if (pid) {
+ for (i = 0; i < n_children; i++) {
+ int status;
+
+ wait(&status);
+ EXPECT_EQ(status, 0);
+ }
+
+ return;
+ }
+
+ /* Split threads for reading and writing */
+ if (child_id < n_readers) {
+ size_t left = data * read_bias;
+ char rb[8001];
+
+ while (left) {
+ int res;
+
+ res = recv(self->cfd, rb,
+ left > sizeof(rb) ? sizeof(rb) : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ } else {
+ size_t left = data * write_bias;
+
+ while (left) {
+ int res;
+
+ ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0);
+ if (sendpg)
+ res = sendfile(self->fd, fd, NULL,
+ left > file_sz ? file_sz : left);
+ else
+ res = send(self->fd, buf,
+ left > file_sz ? file_sz : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ }
+}
+
+TEST_F(tls, mutliproc_even)
+{
+ test_mutliproc(_metadata, self, false, 6, 6);
+}
+
+TEST_F(tls, mutliproc_readers)
+{
+ test_mutliproc(_metadata, self, false, 4, 12);
+}
+
+TEST_F(tls, mutliproc_writers)
+{
+ test_mutliproc(_metadata, self, false, 10, 2);
+}
+
+TEST_F(tls, mutliproc_sendpage_even)
+{
+ test_mutliproc(_metadata, self, true, 6, 6);
+}
+
+TEST_F(tls, mutliproc_sendpage_readers)
+{
+ test_mutliproc(_metadata, self, true, 4, 12);
+}
+
+TEST_F(tls, mutliproc_sendpage_writers)
+{
+ test_mutliproc(_metadata, self, true, 10, 2);
+}
+
TEST_F(tls, control_msg)
{
if (self->notls)
@@ -1037,11 +1173,11 @@ TEST(non_established) {
/* TLS ULP not supported */
if (errno == ENOENT)
return;
- EXPECT_EQ(errno, ENOTSUPP);
+ EXPECT_EQ(errno, ENOTCONN);
ret = setsockopt(sfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
EXPECT_EQ(ret, -1);
- EXPECT_EQ(errno, ENOTSUPP);
+ EXPECT_EQ(errno, ENOTCONN);
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
new file mode 100755
index 000000000000..de9ca97abc30
--- /dev/null
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -0,0 +1,322 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run traceroute/traceroute6 tests
+#
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+################################################################################
+#
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local ns
+ local cmd
+ local out
+ local rc
+
+ ns="$1"
+ shift
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval ip netns exec ${ns} ${cmd} 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# create namespaces and interconnects
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ [ -z "${addr}" ] && addr="-"
+ [ -z "${addr6}" ] && addr6="-"
+
+ ip netns add ${ns}
+
+ ip netns exec ${ns} ip link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip netns exec ${ns} ip addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip netns exec ${ns} ip -6 addr add dev lo ${addr6}
+ fi
+
+ ip netns exec ${ns} ip ro add unreachable default metric 8192
+ ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+
+ ip netns exec ${ns1} ip li add ${ns1_dev} type veth peer name tmp
+ ip netns exec ${ns1} ip li set ${ns1_dev} up
+ ip netns exec ${ns1} ip li set tmp netns ${ns2} name ${ns2_dev}
+ ip netns exec ${ns2} ip li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr}
+ fi
+
+ if [ "${ns2_addr}" != "-" ]; then
+ ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr6}
+ fi
+
+ if [ "${ns2_addr6}" != "-" ]; then
+ ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr6}
+ fi
+}
+
+################################################################################
+# traceroute6 test
+#
+# Verify that in this scenario
+#
+# ------------------------ N2
+# | |
+# ------ ------ N3 ----
+# | R1 | | R2 |------|H2|
+# ------ ------ ----
+# | |
+# ------------------------ N1
+# |
+# ----
+# |H1|
+# ----
+#
+# where H1's default route goes through R1 and R1's default route goes
+# through R2 over N2, traceroute6 from H1 to H2 reports R2's address
+# on N2 and not N1.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6()
+{
+ local ns
+
+ for ns in host-1 host-2 router-1 router-2
+ do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+setup_traceroute6()
+{
+ brdev=br0
+
+ # start clean
+ cleanup_traceroute6
+
+ set -e
+ create_ns host-1
+ create_ns host-2
+ create_ns router-1
+ create_ns router-2
+
+ # Setup N3
+ connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64
+ ip netns exec host-2 ip route add default via 2000:103::2
+
+ # Setup N2
+ connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64
+ ip netns exec router-1 ip route add default via 2000:102::2
+
+ # Setup N1. host-1 and router-2 connect to a bridge in router-1.
+ ip netns exec router-1 ip link add name ${brdev} type bridge
+ ip netns exec router-1 ip link set ${brdev} up
+ ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev}
+
+ connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - -
+ ip netns exec router-1 ip link set dev eth0 master ${brdev}
+ ip netns exec host-1 ip route add default via 2000:101::1
+
+ connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - -
+ ip netns exec router-1 ip link set dev eth1 master ${brdev}
+
+ # Prime the network
+ ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1
+
+ set +e
+}
+
+run_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return
+ fi
+
+ setup_traceroute6
+
+ # traceroute6 host-2 from host-1 (expects 2000:102::2)
+ run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
+ log_test $? 0 "IPV6 traceroute"
+
+ cleanup_traceroute6
+}
+
+################################################################################
+# traceroute test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+#
+# 1.0.3.1/24
+# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
+# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
+# address on N1.
+#
+
+cleanup_traceroute()
+{
+ local ns
+
+ for ns in host-1 host-2 router
+ do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+setup_traceroute()
+{
+ # start clean
+ cleanup_traceroute
+
+ set -e
+ create_ns host-1
+ create_ns host-2
+ create_ns router
+
+ connect_ns host-1 eth0 1.0.1.3/24 - \
+ router eth1 1.0.3.1/24 -
+ ip netns exec host-1 ip route add default via 1.0.1.1
+
+ ip netns exec router ip addr add 1.0.1.1/24 dev eth1
+ ip netns exec router sysctl -qw \
+ net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+ connect_ns host-2 eth0 1.0.2.4/24 - \
+ router eth2 1.0.2.1/24 -
+ ip netns exec host-2 ip route add default via 1.0.2.1
+
+ # Prime the network
+ ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1
+
+ set +e
+}
+
+run_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return
+ fi
+
+ setup_traceroute
+
+ # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
+ run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
+ log_test $? 0 "IPV4 traceroute"
+
+ cleanup_traceroute
+}
+
+################################################################################
+# Run tests
+
+run_tests()
+{
+ run_traceroute6
+ run_traceroute
+}
+
+################################################################################
+# main
+
+declare -i nfail=0
+declare -i nsuccess=0
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ *) exit 1;;
+ esac
+done
+
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index b8265ee9923f..c66da6ffd6d8 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -89,12 +89,9 @@ struct testcase testcases_v4[] = {
.tfail = true,
},
{
- /* send a single MSS: will fail with GSO, because the segment
- * logic in udp4_ufo_fragment demands a gso skb to be > MTU
- */
+ /* send a single MSS: will fall back to no GSO */
.tlen = CONST_MSS_V4,
.gso_len = CONST_MSS_V4,
- .tfail = true,
.r_num_mss = 1,
},
{
@@ -139,10 +136,9 @@ struct testcase testcases_v4[] = {
.tfail = true,
},
{
- /* send a single 1B MSS: will fail, see single MSS above */
+ /* send a single 1B MSS: will fall back to no GSO */
.tlen = 1,
.gso_len = 1,
- .tfail = true,
.r_num_mss = 1,
},
{
@@ -196,12 +192,9 @@ struct testcase testcases_v6[] = {
.tfail = true,
},
{
- /* send a single MSS: will fail with GSO, because the segment
- * logic in udp4_ufo_fragment demands a gso skb to be > MTU
- */
+ /* send a single MSS: will fall back to no GSO */
.tlen = CONST_MSS_V6,
.gso_len = CONST_MSS_V6,
- .tfail = true,
.r_num_mss = 1,
},
{
@@ -246,10 +239,9 @@ struct testcase testcases_v6[] = {
.tfail = true,
},
{
- /* send a single 1B MSS: will fail, see single MSS above */
+ /* send a single 1B MSS: will fall back to no GSO */
.tlen = 1,
.gso_len = 1,
- .tfail = true,
.r_num_mss = 1,
},
{
@@ -448,7 +440,8 @@ static bool __send_one(int fd, struct msghdr *msg, int flags)
if (ret == -1)
error(1, errno, "sendmsg");
if (ret != msg->msg_iov->iov_len)
- error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ error(1, 0, "sendto: %d != %llu", ret,
+ (unsigned long long)msg->msg_iov->iov_len);
if (msg->msg_flags)
error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index ada99496634a..17512a43885e 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -405,7 +405,8 @@ static int send_udp_segment(int fd, char *data)
if (ret == -1)
error(1, errno, "sendmsg");
if (ret != iov.iov_len)
- error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+ error(1, 0, "sendmsg: %u != %llu\n", ret,
+ (unsigned long long)iov.iov_len);
return 1;
}
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 5445943bf07f..7a1bf94c5bd3 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -106,6 +106,13 @@ do_overlap()
#
# 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23.
ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block
+
+ # similar to above: add policies (with partially random address), with shrinking prefixes.
+ for p in 29 28 27;do
+ for k in $(seq 1 32); do
+ ip -net $ns xfrm policy add src 10.253.1.$((RANDOM%255))/$p dst 10.254.1.$((RANDOM%255))/$p dir fwd priority $((200+k)) action block 2>/dev/null
+ done
+ done
}
do_esp_policy_get_check() {
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 4144984ebee5..08194aa44006 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,7 @@
# Makefile for netfilter selftests
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
- conntrack_icmp_related.sh nft_flowtable.sh
+ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
+ nft_concat_range.sh
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh
new file mode 100755
index 000000000000..c3b8f90c497e
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipvs.sh
@@ -0,0 +1,228 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+# | |
+# ns0 | ns1 |
+# ----------- | ----------- ----------- |
+# | veth01 | --------- | veth10 | | veth12 | |
+# ----------- peer ----------- ----------- |
+# | | | |
+# ----------- | | |
+# | br0 | |----------------- peer |--------------|
+# ----------- | | |
+# | | | |
+# ---------- peer ---------- ----------- |
+# | veth02 | --------- | veth20 | | veth21 | |
+# ---------- | ---------- ----------- |
+# | ns2 |
+# | |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+ modprobe -q ip_vs
+ if [ $? -ne 0 ]; then
+ echo "skip: could not run test without ipvs module"
+ exit $ksft_skip
+ fi
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ipvsadm -v > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not run test without ipvsadm"
+ exit $ksft_skip
+fi
+
+setup() {
+ ip netns add ns0
+ ip netns add ns1
+ ip netns add ns2
+
+ ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
+ ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
+ ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
+
+ ip netns exec ns0 ip link set veth01 up
+ ip netns exec ns0 ip link set veth02 up
+ ip netns exec ns0 ip link add br0 type bridge
+ ip netns exec ns0 ip link set veth01 master br0
+ ip netns exec ns0 ip link set veth02 master br0
+ ip netns exec ns0 ip link set br0 up
+ ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
+
+ ip netns exec ns1 ip link set lo up
+ ip netns exec ns1 ip link set veth10 up
+ ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
+ ip netns exec ns1 ip link set veth12 up
+ ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
+
+ ip netns exec ns2 ip link set lo up
+ ip netns exec ns2 ip link set veth21 up
+ ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
+ ip netns exec ns2 ip link set veth20 up
+ ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
+
+ sleep 1
+
+ dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+ for i in 0 1 2
+ do
+ ip netns del ns$i > /dev/null 2>&1
+ done
+
+ if [ -f "${outfile}" ]; then
+ rm "${outfile}"
+ fi
+ if [ -f "${infile}" ]; then
+ rm "${infile}"
+ fi
+}
+
+server_listen() {
+ ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
+ server_pid=$!
+ sleep 0.2
+}
+
+client_connect() {
+ ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
+}
+
+verify_data() {
+ wait "${server_pid}"
+ cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+ server_listen
+ client_connect
+ verify_data
+}
+
+
+test_dr() {
+ ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ # avoid incorrect arp response
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ # avoid reverse route lookup
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+ test_service
+}
+
+test_nat() {
+ ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ ip netns exec ns2 ip link del veth20
+ ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
+
+ test_service
+}
+
+test_tun() {
+ ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
+
+ ip netns exec ns1 modprobe ipip
+ ip netns exec ns1 ip link set tunl0 up
+ ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
+ ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
+ ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
+ ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
+ ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
+ ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
+
+ ip netns exec ns2 modprobe ipip
+ ip netns exec ns2 ip link set tunl0 up
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
+
+ test_service
+}
+
+run_tests() {
+ local errors=
+
+ echo "Testing DR mode..."
+ cleanup
+ setup
+ test_dr
+ errors=$(( $errors + $? ))
+
+ echo "Testing NAT mode..."
+ cleanup
+ setup
+ test_nat
+ errors=$(( $errors + $? ))
+
+ echo "Testing Tunnel mode..."
+ cleanup
+ setup
+ test_tun
+ errors=$(( $errors + $? ))
+
+ return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+ echo -e "$(basename $0): ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
new file mode 100755
index 000000000000..aca21dde102a
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -0,0 +1,1481 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# nft_concat_range.sh - Tests for sets with concatenation of ranged fields
+#
+# Copyright (c) 2019 Red Hat GmbH
+#
+# Author: Stefano Brivio <sbrivio@redhat.com>
+#
+# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031
+# ^ Configuration and templates sourced with eval, counters reused in subshells
+
+KSELFTEST_SKIP=4
+
+# Available test groups:
+# - correctness: check that packets match given entries, and only those
+# - concurrency: attempt races between insertion, deletion and lookup
+# - timeout: check that packets match entries until they expire
+# - performance: estimate matching rate, compare with rbtree and hash baselines
+TESTS="correctness concurrency timeout"
+[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
+
+# Set types, defined by TYPE_ variables below
+TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
+ net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
+ net_port_mac_proto_net"
+
+# List of possible paths to pktgen script from kernel tree for performance tests
+PKTGEN_SCRIPT_PATHS="
+ ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
+
+# Definition of set types:
+# display display text for test report
+# type_spec nftables set type specifier
+# chain_spec nftables type specifier for rules mapping to set
+# dst call sequence of format_*() functions for destination fields
+# src call sequence of format_*() functions for source fields
+# start initial integer used to generate addresses and ports
+# count count of entries to generate and match
+# src_delta number summed to destination generator for source fields
+# tools list of tools for correctness and timeout tests, any can be used
+# proto L4 protocol of test packets
+#
+# race_repeat race attempts per thread, 0 disables concurrency test for type
+# flood_tools list of tools for concurrency tests, any can be used
+# flood_proto L4 protocol of test packets for concurrency tests
+# flood_spec nftables type specifier for concurrency tests
+#
+# perf_duration duration of single pktgen injection test
+# perf_spec nftables type specifier for performance tests
+# perf_dst format_*() functions for destination fields in performance test
+# perf_src format_*() functions for source fields in performance test
+# perf_entries number of set entries for performance test
+# perf_proto L3 protocol of test packets
+TYPE_net_port="
+display net,port
+type_spec ipv4_addr . inet_service
+chain_spec ip daddr . udp dport
+dst addr4 port
+src
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto udp
+flood_spec ip daddr . udp dport
+
+perf_duration 5
+perf_spec ip daddr . udp dport
+perf_dst addr4 port
+perf_src
+perf_entries 1000
+perf_proto ipv4
+"
+
+TYPE_port_net="
+display port,net
+type_spec inet_service . ipv4_addr
+chain_spec udp dport . ip daddr
+dst port addr4
+src
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto udp
+flood_spec udp dport . ip daddr
+
+perf_duration 5
+perf_spec udp dport . ip daddr
+perf_dst port addr4
+perf_src
+perf_entries 100
+perf_proto ipv4
+"
+
+TYPE_net6_port="
+display net6,port
+type_spec ipv6_addr . inet_service
+chain_spec ip6 daddr . udp dport
+dst addr6 port
+src
+start 10
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp6
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp6
+flood_spec ip6 daddr . udp dport
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport
+perf_dst addr6 port
+perf_src
+perf_entries 1000
+perf_proto ipv6
+"
+
+TYPE_port_proto="
+display port,proto
+type_spec inet_service . inet_proto
+chain_spec udp dport . meta l4proto
+dst port proto
+src
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 5
+perf_spec udp dport . meta l4proto
+perf_dst port proto
+perf_src
+perf_entries 30000
+perf_proto ipv4
+"
+
+TYPE_net6_port_mac="
+display net6,port,mac
+type_spec ipv6_addr . inet_service . ether_addr
+chain_spec ip6 daddr . udp dport . ether saddr
+dst addr6 port
+src mac
+start 10
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp6
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport . ether daddr
+perf_dst addr6 port mac
+perf_src
+perf_entries 10
+perf_proto ipv6
+"
+
+TYPE_net6_port_mac_proto="
+display net6,port,mac,proto
+type_spec ipv6_addr . inet_service . ether_addr . inet_proto
+chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto
+dst addr6 port
+src mac proto
+start 10
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp6
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto
+perf_dst addr6 port mac proto
+perf_src
+perf_entries 1000
+perf_proto ipv6
+"
+
+TYPE_net_port_net="
+display net,port,net
+type_spec ipv4_addr . inet_service . ipv4_addr
+chain_spec ip daddr . udp dport . ip saddr
+dst addr4 port
+src addr4
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp
+flood_spec ip daddr . udp dport . ip saddr
+
+perf_duration 0
+"
+
+TYPE_net6_port_net6_port="
+display net6,port,net6,port
+type_spec ipv6_addr . inet_service . ipv6_addr . inet_service
+chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport
+dst addr6 port
+src addr6 port
+start 10
+count 5
+src_delta 2000
+tools sendip nc
+proto udp6
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp6
+flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport
+
+perf_duration 0
+"
+
+TYPE_net_port_mac_proto_net="
+display net,port,mac,proto,net
+type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr
+chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr
+dst addr4 port
+src mac proto addr4
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net_mac="
+display net,mac
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 5
+src_delta 2000
+tools sendip nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip daddr . ether daddr
+perf_dst addr4 mac
+perf_src
+perf_entries 1000
+perf_proto ipv4
+"
+
+TYPE_net_mac_icmp="
+display net,mac - ICMP
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 5
+src_delta 2000
+tools ping
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net6_mac_icmp="
+display net6,mac - ICMPv6
+type_spec ipv6_addr . ether_addr
+chain_spec ip6 daddr . ether saddr
+dst addr6
+src mac
+start 10
+count 50
+src_delta 2000
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net_port_proto_net="
+display net,port,proto,net
+type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr
+chain_spec ip daddr . udp dport . meta l4proto . ip saddr
+dst addr4 port proto
+src addr4
+start 1
+count 5
+src_delta 2000
+tools sendip nc
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp
+flood_spec ip daddr . tcp dport . meta l4proto . ip saddr
+
+perf_duration 0
+"
+
+# Set template for all tests, types and rules are filled in depending on test
+set_template='
+flush ruleset
+
+table inet filter {
+ counter test {
+ packets 0 bytes 0
+ }
+
+ set test {
+ type ${type_spec}
+ flags interval,timeout
+ }
+
+ chain input {
+ type filter hook prerouting priority 0; policy accept;
+ ${chain_spec} @test counter name \"test\"
+ }
+}
+
+table netdev perf {
+ counter test {
+ packets 0 bytes 0
+ }
+
+ counter match {
+ packets 0 bytes 0
+ }
+
+ set test {
+ type ${type_spec}
+ flags interval
+ }
+
+ set norange {
+ type ${type_spec}
+ }
+
+ set noconcat {
+ type ${type_spec%% *}
+ flags interval
+ }
+
+ chain test {
+ type filter hook ingress device veth_a priority 0;
+ }
+}
+'
+
+err_buf=
+info_buf=
+
+# Append string to error buffer
+err() {
+ err_buf="${err_buf}${1}
+"
+}
+
+# Append string to information buffer
+info() {
+ info_buf="${info_buf}${1}
+"
+}
+
+# Flush error buffer to stdout
+err_flush() {
+ printf "%s" "${err_buf}"
+ err_buf=
+}
+
+# Flush information buffer to stdout
+info_flush() {
+ printf "%s" "${info_buf}"
+ info_buf=
+}
+
+# Setup veth pair: this namespace receives traffic, B generates it
+setup_veth() {
+ ip netns add B
+ ip link add veth_a type veth peer name veth_b || return 1
+
+ ip link set veth_a up
+ ip link set veth_b netns B
+
+ ip -n B link set veth_b up
+
+ ip addr add dev veth_a 10.0.0.1
+ ip route add default dev veth_a
+
+ ip -6 addr add fe80::1/64 dev veth_a nodad
+ ip -6 addr add 2001:db8::1/64 dev veth_a nodad
+ ip -6 route add default dev veth_a
+
+ ip -n B route add default dev veth_b
+
+ ip -6 -n B addr add fe80::2/64 dev veth_b nodad
+ ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad
+ ip -6 -n B route add default dev veth_b
+
+ B() {
+ ip netns exec B "$@" >/dev/null 2>&1
+ }
+
+ sleep 2
+}
+
+# Fill in set template and initialise set
+setup_set() {
+ eval "echo \"${set_template}\"" | nft -f -
+}
+
+# Check that at least one of the needed tools is available
+check_tools() {
+ __tools=
+ for tool in ${tools}; do
+ if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \
+ ! nc -u -w0 1.1.1.1 1 2>/dev/null; then
+ # Some GNU netcat builds might not support IPv6
+ __tools="${__tools} netcat-openbsd"
+ continue
+ fi
+ __tools="${__tools} ${tool}"
+
+ command -v "${tool}" >/dev/null && return 0
+ done
+ err "need one of:${__tools}, skipping" && return 1
+}
+
+# Set up function to send ICMP packets
+setup_send_icmp() {
+ send_icmp() {
+ B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1
+ }
+}
+
+# Set up function to send ICMPv6 packets
+setup_send_icmp6() {
+ if command -v ping6 >/dev/null; then
+ send_icmp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ping6 -q -c1 -W1 "${dst_addr6}"
+ }
+ else
+ send_icmp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ping -q -6 -c1 -W1 "${dst_addr6}"
+ }
+ fi
+}
+
+# Set up function to send single UDP packets on IPv4
+setup_send_udp() {
+ if command -v sendip >/dev/null; then
+ send_udp() {
+ [ -n "${src_port}" ] && src_port="-us ${src_port}"
+ [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+ [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}"
+
+ # shellcheck disable=SC2086 # sendip needs split options
+ B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \
+ ${dst_port} "${dst_addr4}"
+
+ src_port=
+ dst_port=
+ src_addr4=
+ }
+ elif command -v nc >/dev/null; then
+ if nc -u -w0 1.1.1.1 1 2>/dev/null; then
+ # OpenBSD netcat
+ nc_opt="-w0"
+ else
+ # GNU netcat
+ nc_opt="-q0"
+ fi
+
+ send_udp() {
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}" dev veth_b
+ __src_addr4="-s ${src_addr4}"
+ fi
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ [ -n "${src_port}" ] && src_port="-p ${src_port}"
+
+ echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \
+ "${src_port}" "${dst_addr4}" "${dst_port}"
+
+ src_addr4=
+ src_port=
+ }
+ elif [ -z "$(bash -c 'type -p')" ]; then
+ send_udp() {
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ B ip route add default dev veth_b
+ fi
+
+ B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}"
+
+ if [ -n "${src_addr4}" ]; then
+ B ip addr del "${src_addr4}/16" dev veth_b
+ fi
+ src_addr4=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send single UDP packets on IPv6
+setup_send_udp6() {
+ if command -v sendip >/dev/null; then
+ send_udp6() {
+ [ -n "${src_port}" ] && src_port="-us ${src_port}"
+ [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ src_addr6="-6s ${src_addr6}"
+ else
+ src_addr6="-6s 2001:db8::2"
+ fi
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \
+ ${dst_port} "${dst_addr6}"
+
+ src_port=
+ dst_port=
+ src_addr6=
+ }
+ elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
+ # GNU netcat might not work with IPv6, try next tool
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ else
+ src_addr6="2001:db8::2"
+ fi
+ [ -n "${src_port}" ] && src_port="-p ${src_port}"
+
+ # shellcheck disable=SC2086 # this needs split options
+ echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \
+ ${dst_addr6} ${dst_port}
+
+ src_addr6=
+ src_port=
+ }
+ elif [ -z "$(bash -c 'type -p')" ]; then
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}"
+ ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send TCP traffic on IPv4
+setup_flood_tcp() {
+ if command -v iperf3 >/dev/null; then
+ flood_tcp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b 2>/dev/null
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \
+ ${src_addr4} -l16 -t 1000
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_tcp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr4="${src_addr4}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \
+ -l20 -t 1000
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_tcp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -4 ${dst_port} -L "${dst_addr4}" \
+ >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -4 -H "${dst_addr4}" ${dst_port} \
+ -L "${src_addr4}" -l 1000 -t TCP_STREAM
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send TCP traffic on IPv6
+setup_flood_tcp6() {
+ if command -v iperf3 >/dev/null; then
+ flood_tcp6() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ src_addr6="-B ${src_addr6}"
+ else
+ src_addr6="-B 2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -c "${dst_addr6}" ${dst_port} \
+ ${src_port} ${src_addr6} -l16 -t 1000
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_tcp6() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ src_addr6="-B ${src_addr6}"
+ else
+ src_addr6="-B 2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr6="${src_addr6}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -c "${dst_addr6}" -V ${dst_port} \
+ ${src_addr6} -l1 -t 1000
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_tcp6() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ else
+ src_addr6="2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -6 ${dst_port} -L "${dst_addr6}" \
+ >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -6 -H "${dst_addr6}" ${dst_port} \
+ -L "${src_addr6}" -l 1000 -t TCP_STREAM
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send UDP traffic on IPv4
+setup_flood_udp() {
+ if command -v iperf3 >/dev/null; then
+ flood_udp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr4}" ${dst_port}
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \
+ ${dst_port} ${src_port} ${src_addr4}
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_udp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr4="${src_addr4}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \
+ ${dst_port} ${src_addr4}
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_udp() {
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -4 ${dst_port} -L "${dst_addr4}" \
+ >/dev/null 2>&1
+ sleep 2
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -4 -H "${dst_addr4}" ${dst_port} \
+ -L "${src_addr4}" -l 1000 -t UDP_STREAM
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Find pktgen script and set up function to start pktgen injection
+setup_perf() {
+ for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do
+ command -v "${pktgen_script_path}" >/dev/null && break
+ done
+ [ "${pktgen_script_path}" = "__notfound" ] && return 1
+
+ perf_ipv4() {
+ ${pktgen_script_path} -s80 \
+ -i veth_a -d "${dst_addr4}" -p "${dst_port}" \
+ -m "${dst_mac}" \
+ -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+ perf_pid=$!
+ }
+ perf_ipv6() {
+ IP6=6 ${pktgen_script_path} -s100 \
+ -i veth_a -d "${dst_addr6}" -p "${dst_port}" \
+ -m "${dst_mac}" \
+ -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+ perf_pid=$!
+ }
+}
+
+# Clean up before each test
+cleanup() {
+ nft reset counter inet filter test >/dev/null 2>&1
+ nft flush ruleset >/dev/null 2>&1
+ ip link del dummy0 2>/dev/null
+ ip route del default 2>/dev/null
+ ip -6 route del default 2>/dev/null
+ ip netns del B 2>/dev/null
+ ip link del veth_a 2>/dev/null
+ timeout=
+ killall iperf3 2>/dev/null
+ killall iperf 2>/dev/null
+ killall netperf 2>/dev/null
+ killall netserver 2>/dev/null
+ rm -f ${tmp}
+ sleep 2
+}
+
+# Entry point for setup functions
+setup() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo " need to run as root"
+ exit ${KSELFTEST_SKIP}
+ fi
+
+ cleanup
+ check_tools || return 1
+ for arg do
+ if ! eval setup_"${arg}"; then
+ err " ${arg} not supported"
+ return 1
+ fi
+ done
+}
+
+# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it
+format_addr4() {
+ a=$((${1} + 16777216 * 10 + 5))
+ printf "%i.%i.%i.%i" \
+ "$((a / 16777216))" "$((a % 16777216 / 65536))" \
+ "$((a % 65536 / 256))" "$((a % 256))"
+}
+
+# Format integer into IPv6 address, summing 2001:db8:: to it
+format_addr6() {
+ printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))"
+}
+
+# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it
+format_mac() {
+ printf "00:01:%02x:%02x:%02x:%02x" \
+ "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \
+ "$((${1} % 65536 / 256))" "$((${1} % 256))"
+}
+
+# Format integer into port, avoid 0 port
+format_port() {
+ printf "%i" "$((${1} % 65534 + 1))"
+}
+
+# Drop suffixed '6' from L4 protocol, if any
+format_proto() {
+ printf "%s" "${proto}" | tr -d 6
+}
+
+# Format destination and source fields into nft concatenated type
+format() {
+ __start=
+ __end=
+ __expr="{ "
+
+ for f in ${dst}; do
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+ __start="$(eval format_"${f}" "${start}")"
+ __end="$(eval format_"${f}" "${end}")"
+
+ if [ "${f}" = "proto" ]; then
+ __expr="${__expr}${__start}"
+ else
+ __expr="${__expr}${__start}-${__end}"
+ fi
+ done
+ for f in ${src}; do
+ __expr="${__expr} . "
+ __start="$(eval format_"${f}" "${srcstart}")"
+ __end="$(eval format_"${f}" "${srcend}")"
+
+ if [ "${f}" = "proto" ]; then
+ __expr="${__expr}${__start}"
+ else
+ __expr="${__expr}${__start}-${__end}"
+ fi
+ done
+
+ if [ -n "${timeout}" ]; then
+ echo "${__expr} timeout ${timeout}s }"
+ else
+ echo "${__expr} }"
+ fi
+}
+
+# Format destination and source fields into nft type, start element only
+format_norange() {
+ __expr="{ "
+
+ for f in ${dst}; do
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+ __expr="${__expr}$(eval format_"${f}" "${start}")"
+ done
+ for f in ${src}; do
+ __expr="${__expr} . $(eval format_"${f}" "${start}")"
+ done
+
+ echo "${__expr} }"
+}
+
+# Format first destination field into nft type
+format_noconcat() {
+ for f in ${dst}; do
+ __start="$(eval format_"${f}" "${start}")"
+ __end="$(eval format_"${f}" "${end}")"
+
+ if [ "${f}" = "proto" ]; then
+ echo "{ ${__start} }"
+ else
+ echo "{ ${__start}-${__end} }"
+ fi
+ return
+ done
+}
+
+# Add single entry to 'test' set in 'inet filter' table
+add() {
+ if ! nft add element inet filter test "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+}
+
+# Format and output entries for sets in 'netdev perf' table
+add_perf() {
+ if [ "${1}" = "test" ]; then
+ echo "add element netdev perf test $(format)"
+ elif [ "${1}" = "norange" ]; then
+ echo "add element netdev perf norange $(format_norange)"
+ elif [ "${1}" = "noconcat" ]; then
+ echo "add element netdev perf noconcat $(format_noconcat)"
+ fi
+}
+
+# Add single entry to 'norange' set in 'netdev perf' table
+add_perf_norange() {
+ if ! nft add element netdev perf norange "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+}
+
+# Add single entry to 'noconcat' set in 'netdev perf' table
+add_perf_noconcat() {
+ if ! nft add element netdev perf noconcat "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+}
+
+# Delete single entry from set
+del() {
+ if ! nft delete element inet filter test "${1}"; then
+ err "Failed to delete ${1} given ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets() {
+ found=0
+ for token in $(nft list counter inet filter test); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'netdev perf' table
+count_perf_packets() {
+ found=0
+ for token in $(nft list counter netdev perf test); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Set MAC addresses, send traffic according to specifier
+flood() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval flood_\$proto
+}
+
+# Set MAC addresses, start pktgen injection
+perf() {
+ dst_mac="$(format_mac "${1}")"
+ ip link set veth_a address "${dst_mac}"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval perf_\$perf_proto
+}
+
+# Set MAC addresses, send single packet, check that it matches, reset counter
+send_match() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval send_\$proto
+ if [ "$(count_packets)" != "1" ]; then
+ err "${proto} packet to:"
+ err " $(for f in ${dst}; do
+ eval format_\$f "${1}"; printf ' '; done)"
+ err "from:"
+ err " $(for f in ${src}; do
+ eval format_\$f "${2}"; printf ' '; done)"
+ err "should have matched ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+}
+
+# Set MAC addresses, send single packet, check that it doesn't match
+send_nomatch() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval send_\$proto
+ if [ "$(count_packets)" != "0" ]; then
+ err "${proto} packet to:"
+ err " $(for f in ${dst}; do
+ eval format_\$f "${1}"; printf ' '; done)"
+ err "from:"
+ err " $(for f in ${src}; do
+ eval format_\$f "${2}"; printf ' '; done)"
+ err "should not have matched ruleset:"
+ err "$(nft list ruleset -a)"
+ return 1
+ fi
+}
+
+# Correctness test template:
+# - add ranged element, check that packets match it
+# - check that packets outside range don't match it
+# - remove some elements, check that packets don't match anymore
+test_correctness() {
+ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
+ # Delete elements now and then
+ if [ $((i % 3)) -eq 0 ]; then
+ del "$(format)" || return 1
+ for j in $(seq ${start} \
+ $((range_size / 2 + 1)) ${end}); do
+ send_nomatch "${j}" $((j + src_delta)) \
+ || return 1
+ done
+ fi
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+}
+
+# Concurrency test template:
+# - add all the elements
+# - start a thread for each physical thread that:
+# - adds all the elements
+# - flushes the set
+# - adds all the elements
+# - flushes the entire ruleset
+# - adds the set back
+# - adds all the elements
+# - delete all the elements
+test_concurrency() {
+ proto=${flood_proto}
+ tools=${flood_tools}
+ chain_spec=${flood_spec}
+ setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP}
+
+ range_size=1
+ cstart=${start}
+ flood_pids=
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+
+ flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!"
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ sleep 10
+
+ pids=
+ for c in $(seq 1 "$(nproc)"); do (
+ for r in $(seq 1 "${race_repeat}"); do
+ range_size=1
+
+ # $start needs to be local to this subshell
+ # shellcheck disable=SC2030
+ start=${cstart}
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush inet filter test 2>/dev/null
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+ setup set 2>/dev/null
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ del "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+ done
+ ) & pids="${pids} $!"
+ done
+
+ # shellcheck disable=SC2046,SC2086 # word splitting wanted here
+ wait $(for pid in ${pids}; do echo ${pid}; done)
+ # shellcheck disable=SC2046,SC2086
+ kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+ # shellcheck disable=SC2046,SC2086
+ wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+
+ return 0
+}
+
+# Timeout test template:
+# - add all the elements with 3s timeout while checking that packets match
+# - wait 3s after the last insertion, check that packets don't match any entry
+test_timeout() {
+ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+
+ timeout=3
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+ sleep 3
+ for i in $(seq ${start} $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_nomatch "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+}
+
+# Performance test template:
+# - add concatenated ranged entries
+# - add non-ranged concatenated entries (for hash set matching rate baseline)
+# - add ranged entries with first field only (for rbhash baseline)
+# - start pktgen injection directly on device rx path of this namespace
+# - measure drop only rate, hash and rbtree baselines, then matching rate
+test_performance() {
+ chain_spec=${perf_spec}
+ dst="${perf_dst}"
+ src="${perf_src}"
+ setup veth perf set || return ${KSELFTEST_SKIP}
+
+ first=${start}
+ range_size=1
+ for set in test norange noconcat; do
+ start=${first}
+ for i in $(seq ${start} $((start + perf_entries))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ elif [ ${start} -eq ${end} ]; then
+ end=$((start + 1))
+ fi
+
+ add_perf ${set}
+
+ start=$((end + range_size))
+ done > "${tmp}"
+ nft -f "${tmp}"
+ done
+
+ perf $((end - 1)) ${srcstart}
+
+ sleep 2
+
+ nft add rule netdev perf test counter name \"test\" drop
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline (drop from netdev hook): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec} @norange \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline hash (non-ranged entries): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline rbtree (match on first field only): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec} @test \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ p5="$(printf %5s "${perf_entries}")"
+ info " set with ${p5} full, ranged entries: ${pps}pps"
+ kill "${perf_pid}"
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+tmp="$(mktemp)"
+trap cleanup EXIT
+
+# Entry point for test runs
+passed=0
+for name in ${TESTS}; do
+ printf "TEST: %s\n" "${name}"
+ for type in ${TYPES}; do
+ eval desc=\$TYPE_"${type}"
+ IFS='
+'
+ for __line in ${desc}; do
+ # shellcheck disable=SC2086
+ eval ${__line%% *}=\"${__line##* }\";
+ done
+ IFS='
+'
+
+ if [ "${name}" = "concurrency" ] && \
+ [ "${race_repeat}" = "0" ]; then
+ continue
+ fi
+ if [ "${name}" = "performance" ] && \
+ [ "${perf_duration}" = "0" ]; then
+ continue
+ fi
+
+ printf " %-60s " "${display}"
+ eval test_"${name}"
+ ret=$?
+
+ if [ $ret -eq 0 ]; then
+ printf "[ OK ]\n"
+ info_flush
+ passed=$((passed + 1))
+ elif [ $ret -eq 1 ]; then
+ printf "[FAIL]\n"
+ err_flush
+ exit 1
+ elif [ $ret -eq ${KSELFTEST_SKIP} ]; then
+ printf "[SKIP]\n"
+ err_flush
+ fi
+ done
+done
+
+[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 16571ac1dab4..d3e0809ab368 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -226,17 +226,19 @@ check_transfer()
return 0
}
-test_tcp_forwarding()
+test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
+ local dstip=$3
+ local dstport=$4
local lret=0
ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
lpid=$!
sleep 1
- ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
+ ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
cpid=$!
sleep 3
@@ -258,6 +260,28 @@ test_tcp_forwarding()
return $lret
}
+test_tcp_forwarding()
+{
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+
+ return $?
+}
+
+test_tcp_forwarding_nat()
+{
+ local lret
+
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ lret=$?
+
+ if [ $lret -eq 0 ] ; then
+ test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ lret=$?
+ fi
+
+ return $lret
+}
+
make_file "$ns1in" "ns1"
make_file "$ns2in" "ns2"
@@ -283,14 +307,19 @@ ip -net ns2 route add 192.168.10.1 via 10.0.2.1
# Same, but with NAT enabled.
ip netns exec nsr1 nft -f - <<EOF
table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oifname "veth1" masquerade
+ meta oifname "veth1" counter masquerade
}
}
EOF
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
if [ $? -eq 0 ] ;then
echo "PASS: flow offloaded for ns1/ns2 with NAT"
@@ -313,7 +342,7 @@ fi
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
if [ $? -eq 0 ] ;then
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
else
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 1be55e705780..d7e07f4c3d7f 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -8,9 +8,14 @@ ksft_skip=4
ret=0
test_inet_nat=true
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
cleanup()
{
- for i in 0 1 2; do ip netns del ns$i;done
+ for i in 0 1 2; do ip netns del ns$i-"$sfx";done
}
nft --version > /dev/null 2>&1
@@ -25,40 +30,49 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
-ip netns add ns0
+ip netns add "$ns0"
if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
+ echo "SKIP: Could not create net namespace $ns0"
exit $ksft_skip
fi
trap cleanup EXIT
-ip netns add ns1
-ip netns add ns2
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns1"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns2"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns2"
+ exit $ksft_skip
+fi
-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
-ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
-ip -net ns0 link set lo up
-ip -net ns0 link set veth0 up
-ip -net ns0 addr add 10.0.1.1/24 dev veth0
-ip -net ns0 addr add dead:1::1/64 dev veth0
+ip -net "$ns0" link set lo up
+ip -net "$ns0" link set veth0 up
+ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0
-ip -net ns0 link set veth1 up
-ip -net ns0 addr add 10.0.2.1/24 dev veth1
-ip -net ns0 addr add dead:2::1/64 dev veth1
+ip -net "$ns0" link set veth1 up
+ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
+ip -net "$ns0" addr add dead:2::1/64 dev veth1
for i in 1 2; do
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i route add default via 10.0.$i.1
- ip -net ns$i addr add dead:$i::99/64 dev eth0
- ip -net ns$i route add default via dead:$i::1
+ ip -net ns$i-$sfx link set lo up
+ ip -net ns$i-$sfx link set eth0 up
+ ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0
+ ip -net ns$i-$sfx route add default via 10.0.$i.1
+ ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0
+ ip -net ns$i-$sfx route add default via dead:$i::1
done
bad_counter()
@@ -66,8 +80,9 @@ bad_counter()
local ns=$1
local counter=$2
local expect=$3
+ local tag=$4
- echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+ echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
ip netns exec $ns nft list counter inet filter $counter 1>&2
}
@@ -78,24 +93,24 @@ check_counters()
cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0in "packets 1 bytes 84"
+ bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1"
lret=1
fi
cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0out "packets 1 bytes 84"
+ bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2"
lret=1
fi
expect="packets 1 bytes 104"
cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0in6 "$expect"
+ bad_counter $ns ns0in6 "$expect" "check_counters 3"
lret=1
fi
cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0out6 "$expect"
+ bad_counter $ns ns0out6 "$expect" "check_counters 4"
lret=1
fi
@@ -107,41 +122,41 @@ check_ns0_counters()
local ns=$1
local lret=0
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0in "packets 0 bytes 0"
+ bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0in6 "packets 0 bytes 0"
+ bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0out "packets 0 bytes 0"
+ bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0out6 "packets 0 bytes 0"
+ bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
lret=1
fi
for dir in "in" "out" ; do
expect="packets 1 bytes 84"
- cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 $ns$dir "$expect"
+ bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 $ns$dir6 "$expect"
+ bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5"
lret=1
fi
done
@@ -152,7 +167,7 @@ check_ns0_counters()
reset_counters()
{
for i in 0 1 2;do
- ip netns exec ns$i nft reset counters inet > /dev/null
+ ip netns exec ns$i-$sfx nft reset counters inet > /dev/null
done
}
@@ -166,7 +181,7 @@ test_local_dnat6()
IPF="ip6"
fi
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain output {
type nat hook output priority 0; policy accept;
@@ -180,7 +195,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping6 failed"
@@ -189,18 +204,18 @@ EOF
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
lret=1
fi
done
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
lret=1
fi
done
@@ -208,9 +223,9 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
lret=1
fi
done
@@ -218,15 +233,15 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns0$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2"
- ip netns exec ns0 nft flush chain ip6 nat output
+ test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2"
+ ip netns exec "$ns0" nft flush chain ip6 nat output
return $lret
}
@@ -241,7 +256,7 @@ test_local_dnat()
IPF="ip"
fi
-ip netns exec ns0 nft -f - <<EOF 2>/dev/null
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null
table $family nat {
chain output {
type nat hook output priority 0; policy accept;
@@ -260,7 +275,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping failed"
@@ -269,18 +284,18 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1"
lret=1
fi
done
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2"
lret=1
fi
done
@@ -288,9 +303,9 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3"
lret=1
fi
done
@@ -298,19 +313,19 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns0$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2"
+ test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
- ip netns exec ns0 nft flush chain $family nat output
+ ip netns exec "$ns0" nft flush chain $family nat output
reset_counters
- ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping failed"
@@ -319,17 +334,17 @@ EOF
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns1$dir "$expect"
+ bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
lret=1
fi
done
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
lret=1
fi
done
@@ -337,9 +352,9 @@ EOF
# expect 1 count in ns1
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0$dir "$expect"
+ bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
lret=1
fi
done
@@ -347,14 +362,14 @@ EOF
# expect 0 packet in ns2
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns2$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush"
+ test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush"
return $lret
}
@@ -366,26 +381,26 @@ test_masquerade6()
local natflags=$2
local lret=0
- ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
return 1
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2"
lret=1
fi
done
@@ -393,7 +408,7 @@ test_masquerade6()
reset_counters
# add masquerading rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -406,24 +421,24 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
lret=1
fi
done
@@ -431,32 +446,32 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
lret=1
fi
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6"
lret=1
fi
done
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec ns0 nft flush chain $family nat postrouting
+ ip netns exec "$ns0" nft flush chain $family nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for ns2"
+ test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2"
return $lret
}
@@ -467,26 +482,26 @@ test_masquerade()
local natflags=$2
local lret=0
- ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 $natflags"
+ echo "ERROR: cannot ping $ns1 from "$ns2" $natflags"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2"
lret=1
fi
done
@@ -494,7 +509,7 @@ test_masquerade()
reset_counters
# add masquerading rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -507,24 +522,24 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4"
lret=1
fi
done
@@ -532,32 +547,32 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5"
lret=1
fi
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6"
lret=1
fi
done
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec ns0 nft flush chain $family nat postrouting
+ ip netns exec "$ns0" nft flush chain $family nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for ns2"
+ test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2"
return $lret
}
@@ -567,25 +582,25 @@ test_redirect6()
local family=$1
local lret=0
- ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+ echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
lret=1
fi
done
@@ -593,7 +608,7 @@ test_redirect6()
reset_counters
# add redirect rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -606,18 +621,18 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
lret=1
fi
# ns1 should have seen no packets from ns2, due to redirection
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
lret=1
fi
done
@@ -625,20 +640,20 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
lret=1
fi
done
- ip netns exec ns0 nft delete table $family nat
+ ip netns exec "$ns0" nft delete table $family nat
if [ $? -ne 0 ]; then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2"
return $lret
}
@@ -648,26 +663,26 @@ test_redirect()
local family=$1
local lret=0
- ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2"
+ echo "ERROR: cannot ping $ns1 from $ns2"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
lret=1
fi
done
@@ -675,7 +690,7 @@ test_redirect()
reset_counters
# add redirect rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -688,9 +703,9 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
lret=1
fi
@@ -698,9 +713,9 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
lret=1
fi
done
@@ -708,28 +723,28 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
lret=1
fi
done
- ip netns exec ns0 nft delete table $family nat
+ ip netns exec "$ns0" nft delete table $family nat
if [ $? -ne 0 ]; then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IP redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2"
return $lret
}
-# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
-ip netns exec ns$i nft -f - <<EOF
+ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
table inet filter {
counter ns0in {}
counter ns1in {}
@@ -796,18 +811,18 @@ done
sleep 3
# test basic connectivity
for i in 1 2; do
- ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+ ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null
if [ $? -ne 0 ];then
echo "ERROR: Could not reach other namespace(s)" 1>&2
ret=1
fi
- ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+ ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null
if [ $? -ne 0 ];then
echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
ret=1
fi
- check_counters ns$i
+ check_counters ns$i-$sfx
if [ $? -ne 0 ]; then
ret=1
fi
@@ -820,7 +835,7 @@ for i in 1 2; do
done
if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+ echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
fi
reset_counters
@@ -846,4 +861,9 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+if [ $ret -ne 0 ];then
+ echo -n "FAIL: "
+ nft --version
+fi
+
exit $ret
diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/openat2/.gitignore
new file mode 100644
index 000000000000..bd68f6c3fd07
--- /dev/null
+++ b/tools/testing/selftests/openat2/.gitignore
@@ -0,0 +1 @@
+/*_test
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
new file mode 100644
index 000000000000..4b93b1417b86
--- /dev/null
+++ b/tools/testing/selftests/openat2/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
+TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+
+include ../lib.mk
+
+$(TEST_GEN_PROGS): helpers.c
diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c
new file mode 100644
index 000000000000..e9a6557ab16f
--- /dev/null
+++ b/tools/testing/selftests/openat2/helpers.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <string.h>
+#include <syscall.h>
+#include <limits.h>
+
+#include "helpers.h"
+
+bool needs_openat2(const struct open_how *how)
+{
+ return how->resolve != 0;
+}
+
+int raw_openat2(int dfd, const char *path, void *how, size_t size)
+{
+ int ret = syscall(__NR_openat2, dfd, path, how, size);
+ return ret >= 0 ? ret : -errno;
+}
+
+int sys_openat2(int dfd, const char *path, struct open_how *how)
+{
+ return raw_openat2(dfd, path, how, sizeof(*how));
+}
+
+int sys_openat(int dfd, const char *path, struct open_how *how)
+{
+ int ret = openat(dfd, path, how->flags, how->mode);
+ return ret >= 0 ? ret : -errno;
+}
+
+int sys_renameat2(int olddirfd, const char *oldpath,
+ int newdirfd, const char *newpath, unsigned int flags)
+{
+ int ret = syscall(__NR_renameat2, olddirfd, oldpath,
+ newdirfd, newpath, flags);
+ return ret >= 0 ? ret : -errno;
+}
+
+int touchat(int dfd, const char *path)
+{
+ int fd = openat(dfd, path, O_CREAT);
+ if (fd >= 0)
+ close(fd);
+ return fd;
+}
+
+char *fdreadlink(int fd)
+{
+ char *target, *tmp;
+
+ E_asprintf(&tmp, "/proc/self/fd/%d", fd);
+
+ target = malloc(PATH_MAX);
+ if (!target)
+ ksft_exit_fail_msg("fdreadlink: malloc failed\n");
+ memset(target, 0, PATH_MAX);
+
+ E_readlink(tmp, target, PATH_MAX);
+ free(tmp);
+ return target;
+}
+
+bool fdequal(int fd, int dfd, const char *path)
+{
+ char *fdpath, *dfdpath, *other;
+ bool cmp;
+
+ fdpath = fdreadlink(fd);
+ dfdpath = fdreadlink(dfd);
+
+ if (!path)
+ E_asprintf(&other, "%s", dfdpath);
+ else if (*path == '/')
+ E_asprintf(&other, "%s", path);
+ else
+ E_asprintf(&other, "%s/%s", dfdpath, path);
+
+ cmp = !strcmp(fdpath, other);
+
+ free(fdpath);
+ free(dfdpath);
+ free(other);
+ return cmp;
+}
+
+bool openat2_supported = false;
+
+void __attribute__((constructor)) init(void)
+{
+ struct open_how how = {};
+ int fd;
+
+ BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0);
+
+ /* Check openat2(2) support. */
+ fd = sys_openat2(AT_FDCWD, ".", &how);
+ openat2_supported = (fd >= 0);
+
+ if (fd >= 0)
+ close(fd);
+}
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
new file mode 100644
index 000000000000..a6ea27344db2
--- /dev/null
+++ b/tools/testing/selftests/openat2/helpers.h
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#ifndef __RESOLVEAT_H__
+#define __RESOLVEAT_H__
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <errno.h>
+#include <linux/types.h>
+#include "../kselftest.h"
+
+#define ARRAY_LEN(X) (sizeof (X) / sizeof (*(X)))
+#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); })))
+
+#ifndef SYS_openat2
+#ifndef __NR_openat2
+#define __NR_openat2 437
+#endif /* __NR_openat2 */
+#define SYS_openat2 __NR_openat2
+#endif /* SYS_openat2 */
+
+/*
+ * Arguments for how openat2(2) should open the target path. If @resolve is
+ * zero, then openat2(2) operates very similarly to openat(2).
+ *
+ * However, unlike openat(2), unknown bits in @flags result in -EINVAL rather
+ * than being silently ignored. @mode must be zero unless one of {O_CREAT,
+ * O_TMPFILE} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @resolve: RESOLVE_* flags.
+ */
+struct open_how {
+ __u64 flags;
+ __u64 mode;
+ __u64 resolve;
+};
+
+#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
+#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0
+
+bool needs_openat2(const struct open_how *how);
+
+#ifndef RESOLVE_IN_ROOT
+/* how->resolve flags for openat2(2). */
+#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
+ (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
+ "magic-links". */
+#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
+ (implies OEXT_NO_MAGICLINKS) */
+#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
+ "..", symlinks, and absolute
+ paths which escape the dirfd. */
+#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
+ be scoped inside the dirfd
+ (similar to chroot(2)). */
+#endif /* RESOLVE_IN_ROOT */
+
+#define E_func(func, ...) \
+ do { \
+ if (func(__VA_ARGS__) < 0) \
+ ksft_exit_fail_msg("%s:%d %s failed\n", \
+ __FILE__, __LINE__, #func);\
+ } while (0)
+
+#define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
+#define E_chmod(...) E_func(chmod, __VA_ARGS__)
+#define E_dup2(...) E_func(dup2, __VA_ARGS__)
+#define E_fchdir(...) E_func(fchdir, __VA_ARGS__)
+#define E_fstatat(...) E_func(fstatat, __VA_ARGS__)
+#define E_kill(...) E_func(kill, __VA_ARGS__)
+#define E_mkdirat(...) E_func(mkdirat, __VA_ARGS__)
+#define E_mount(...) E_func(mount, __VA_ARGS__)
+#define E_prctl(...) E_func(prctl, __VA_ARGS__)
+#define E_readlink(...) E_func(readlink, __VA_ARGS__)
+#define E_setresuid(...) E_func(setresuid, __VA_ARGS__)
+#define E_symlinkat(...) E_func(symlinkat, __VA_ARGS__)
+#define E_touchat(...) E_func(touchat, __VA_ARGS__)
+#define E_unshare(...) E_func(unshare, __VA_ARGS__)
+
+#define E_assert(expr, msg, ...) \
+ do { \
+ if (!(expr)) \
+ ksft_exit_fail_msg("ASSERT(%s:%d) failed (%s): " msg "\n", \
+ __FILE__, __LINE__, #expr, ##__VA_ARGS__); \
+ } while (0)
+
+int raw_openat2(int dfd, const char *path, void *how, size_t size);
+int sys_openat2(int dfd, const char *path, struct open_how *how);
+int sys_openat(int dfd, const char *path, struct open_how *how);
+int sys_renameat2(int olddirfd, const char *oldpath,
+ int newdirfd, const char *newpath, unsigned int flags);
+
+int touchat(int dfd, const char *path);
+char *fdreadlink(int fd);
+bool fdequal(int fd, int dfd, const char *path);
+
+extern bool openat2_supported;
+
+#endif /* __RESOLVEAT_H__ */
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
new file mode 100644
index 000000000000..b386367c606b
--- /dev/null
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "helpers.h"
+
+/*
+ * O_LARGEFILE is set to 0 by glibc.
+ * XXX: This is wrong on {mips, parisc, powerpc, sparc}.
+ */
+#undef O_LARGEFILE
+#define O_LARGEFILE 0x8000
+
+struct open_how_ext {
+ struct open_how inner;
+ uint32_t extra1;
+ char pad1[128];
+ uint32_t extra2;
+ char pad2[128];
+ uint32_t extra3;
+};
+
+struct struct_test {
+ const char *name;
+ struct open_how_ext arg;
+ size_t size;
+ int err;
+};
+
+#define NUM_OPENAT2_STRUCT_TESTS 7
+#define NUM_OPENAT2_STRUCT_VARIATIONS 13
+
+void test_openat2_struct(void)
+{
+ int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 };
+
+ struct struct_test tests[] = {
+ /* Normal struct. */
+ { .name = "normal struct",
+ .arg.inner.flags = O_RDONLY,
+ .size = sizeof(struct open_how) },
+ /* Bigger struct, with zeroed out end. */
+ { .name = "bigger struct (zeroed out)",
+ .arg.inner.flags = O_RDONLY,
+ .size = sizeof(struct open_how_ext) },
+
+ /* TODO: Once expanded, check zero-padding. */
+
+ /* Smaller than version-0 struct. */
+ { .name = "zero-sized 'struct'",
+ .arg.inner.flags = O_RDONLY, .size = 0, .err = -EINVAL },
+ { .name = "smaller-than-v0 struct",
+ .arg.inner.flags = O_RDONLY,
+ .size = OPEN_HOW_SIZE_VER0 - 1, .err = -EINVAL },
+
+ /* Bigger struct, with non-zero trailing bytes. */
+ { .name = "bigger struct (non-zero data in first 'future field')",
+ .arg.inner.flags = O_RDONLY, .arg.extra1 = 0xdeadbeef,
+ .size = sizeof(struct open_how_ext), .err = -E2BIG },
+ { .name = "bigger struct (non-zero data in middle of 'future fields')",
+ .arg.inner.flags = O_RDONLY, .arg.extra2 = 0xfeedcafe,
+ .size = sizeof(struct open_how_ext), .err = -E2BIG },
+ { .name = "bigger struct (non-zero data at end of 'future fields')",
+ .arg.inner.flags = O_RDONLY, .arg.extra3 = 0xabad1dea,
+ .size = sizeof(struct open_how_ext), .err = -E2BIG },
+ };
+
+ BUILD_BUG_ON(ARRAY_LEN(misalignments) != NUM_OPENAT2_STRUCT_VARIATIONS);
+ BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_STRUCT_TESTS);
+
+ for (int i = 0; i < ARRAY_LEN(tests); i++) {
+ struct struct_test *test = &tests[i];
+ struct open_how_ext how_ext = test->arg;
+
+ for (int j = 0; j < ARRAY_LEN(misalignments); j++) {
+ int fd, misalign = misalignments[j];
+ char *fdpath = NULL;
+ bool failed;
+ void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
+
+ void *copy = NULL, *how_copy = &how_ext;
+
+ if (!openat2_supported) {
+ ksft_print_msg("openat2(2) unsupported\n");
+ resultfn = ksft_test_result_skip;
+ goto skip;
+ }
+
+ if (misalign) {
+ /*
+ * Explicitly misalign the structure copying it with the given
+ * (mis)alignment offset. The other data is set to be non-zero to
+ * make sure that non-zero bytes outside the struct aren't checked
+ *
+ * This is effectively to check that is_zeroed_user() works.
+ */
+ copy = malloc(misalign + sizeof(how_ext));
+ how_copy = copy + misalign;
+ memset(copy, 0xff, misalign);
+ memcpy(how_copy, &how_ext, sizeof(how_ext));
+ }
+
+ fd = raw_openat2(AT_FDCWD, ".", how_copy, test->size);
+ if (test->err >= 0)
+ failed = (fd < 0);
+ else
+ failed = (fd != test->err);
+ if (fd >= 0) {
+ fdpath = fdreadlink(fd);
+ close(fd);
+ }
+
+ if (failed) {
+ resultfn = ksft_test_result_fail;
+
+ ksft_print_msg("openat2 unexpectedly returned ");
+ if (fdpath)
+ ksft_print_msg("%d['%s']\n", fd, fdpath);
+ else
+ ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
+ }
+
+skip:
+ if (test->err >= 0)
+ resultfn("openat2 with %s argument [misalign=%d] succeeds\n",
+ test->name, misalign);
+ else
+ resultfn("openat2 with %s argument [misalign=%d] fails with %d (%s)\n",
+ test->name, misalign, test->err,
+ strerror(-test->err));
+
+ free(copy);
+ free(fdpath);
+ fflush(stdout);
+ }
+ }
+}
+
+struct flag_test {
+ const char *name;
+ struct open_how how;
+ int err;
+};
+
+#define NUM_OPENAT2_FLAG_TESTS 23
+
+void test_openat2_flags(void)
+{
+ struct flag_test tests[] = {
+ /* O_TMPFILE is incompatible with O_PATH and O_CREAT. */
+ { .name = "incompatible flags (O_TMPFILE | O_PATH)",
+ .how.flags = O_TMPFILE | O_PATH | O_RDWR, .err = -EINVAL },
+ { .name = "incompatible flags (O_TMPFILE | O_CREAT)",
+ .how.flags = O_TMPFILE | O_CREAT | O_RDWR, .err = -EINVAL },
+
+ /* O_PATH only permits certain other flags to be set ... */
+ { .name = "compatible flags (O_PATH | O_CLOEXEC)",
+ .how.flags = O_PATH | O_CLOEXEC },
+ { .name = "compatible flags (O_PATH | O_DIRECTORY)",
+ .how.flags = O_PATH | O_DIRECTORY },
+ { .name = "compatible flags (O_PATH | O_NOFOLLOW)",
+ .how.flags = O_PATH | O_NOFOLLOW },
+ /* ... and others are absolutely not permitted. */
+ { .name = "incompatible flags (O_PATH | O_RDWR)",
+ .how.flags = O_PATH | O_RDWR, .err = -EINVAL },
+ { .name = "incompatible flags (O_PATH | O_CREAT)",
+ .how.flags = O_PATH | O_CREAT, .err = -EINVAL },
+ { .name = "incompatible flags (O_PATH | O_EXCL)",
+ .how.flags = O_PATH | O_EXCL, .err = -EINVAL },
+ { .name = "incompatible flags (O_PATH | O_NOCTTY)",
+ .how.flags = O_PATH | O_NOCTTY, .err = -EINVAL },
+ { .name = "incompatible flags (O_PATH | O_DIRECT)",
+ .how.flags = O_PATH | O_DIRECT, .err = -EINVAL },
+ { .name = "incompatible flags (O_PATH | O_LARGEFILE)",
+ .how.flags = O_PATH | O_LARGEFILE, .err = -EINVAL },
+
+ /* ->mode must only be set with O_{CREAT,TMPFILE}. */
+ { .name = "non-zero how.mode and O_RDONLY",
+ .how.flags = O_RDONLY, .how.mode = 0600, .err = -EINVAL },
+ { .name = "non-zero how.mode and O_PATH",
+ .how.flags = O_PATH, .how.mode = 0600, .err = -EINVAL },
+ { .name = "valid how.mode and O_CREAT",
+ .how.flags = O_CREAT, .how.mode = 0600 },
+ { .name = "valid how.mode and O_TMPFILE",
+ .how.flags = O_TMPFILE | O_RDWR, .how.mode = 0600 },
+ /* ->mode must only contain 0777 bits. */
+ { .name = "invalid how.mode and O_CREAT",
+ .how.flags = O_CREAT,
+ .how.mode = 0xFFFF, .err = -EINVAL },
+ { .name = "invalid (very large) how.mode and O_CREAT",
+ .how.flags = O_CREAT,
+ .how.mode = 0xC000000000000000ULL, .err = -EINVAL },
+ { .name = "invalid how.mode and O_TMPFILE",
+ .how.flags = O_TMPFILE | O_RDWR,
+ .how.mode = 0x1337, .err = -EINVAL },
+ { .name = "invalid (very large) how.mode and O_TMPFILE",
+ .how.flags = O_TMPFILE | O_RDWR,
+ .how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
+
+ /* ->resolve must only contain RESOLVE_* flags. */
+ { .name = "invalid how.resolve and O_RDONLY",
+ .how.flags = O_RDONLY,
+ .how.resolve = 0x1337, .err = -EINVAL },
+ { .name = "invalid how.resolve and O_CREAT",
+ .how.flags = O_CREAT,
+ .how.resolve = 0x1337, .err = -EINVAL },
+ { .name = "invalid how.resolve and O_TMPFILE",
+ .how.flags = O_TMPFILE | O_RDWR,
+ .how.resolve = 0x1337, .err = -EINVAL },
+ { .name = "invalid how.resolve and O_PATH",
+ .how.flags = O_PATH,
+ .how.resolve = 0x1337, .err = -EINVAL },
+ };
+
+ BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
+
+ for (int i = 0; i < ARRAY_LEN(tests); i++) {
+ int fd, fdflags = -1;
+ char *path, *fdpath = NULL;
+ bool failed = false;
+ struct flag_test *test = &tests[i];
+ void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
+
+ if (!openat2_supported) {
+ ksft_print_msg("openat2(2) unsupported\n");
+ resultfn = ksft_test_result_skip;
+ goto skip;
+ }
+
+ path = (test->how.flags & O_CREAT) ? "/tmp/ksft.openat2_tmpfile" : ".";
+ unlink(path);
+
+ fd = sys_openat2(AT_FDCWD, path, &test->how);
+ if (test->err >= 0)
+ failed = (fd < 0);
+ else
+ failed = (fd != test->err);
+ if (fd >= 0) {
+ int otherflags;
+
+ fdpath = fdreadlink(fd);
+ fdflags = fcntl(fd, F_GETFL);
+ otherflags = fcntl(fd, F_GETFD);
+ close(fd);
+
+ E_assert(fdflags >= 0, "fcntl F_GETFL of new fd");
+ E_assert(otherflags >= 0, "fcntl F_GETFD of new fd");
+
+ /* O_CLOEXEC isn't shown in F_GETFL. */
+ if (otherflags & FD_CLOEXEC)
+ fdflags |= O_CLOEXEC;
+ /* O_CREAT is hidden from F_GETFL. */
+ if (test->how.flags & O_CREAT)
+ fdflags |= O_CREAT;
+ if (!(test->how.flags & O_LARGEFILE))
+ fdflags &= ~O_LARGEFILE;
+ failed |= (fdflags != test->how.flags);
+ }
+
+ if (failed) {
+ resultfn = ksft_test_result_fail;
+
+ ksft_print_msg("openat2 unexpectedly returned ");
+ if (fdpath)
+ ksft_print_msg("%d['%s'] with %X (!= %X)\n",
+ fd, fdpath, fdflags,
+ test->how.flags);
+ else
+ ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
+ }
+
+skip:
+ if (test->err >= 0)
+ resultfn("openat2 with %s succeeds\n", test->name);
+ else
+ resultfn("openat2 with %s fails with %d (%s)\n",
+ test->name, test->err, strerror(-test->err));
+
+ free(fdpath);
+ fflush(stdout);
+ }
+}
+
+#define NUM_TESTS (NUM_OPENAT2_STRUCT_VARIATIONS * NUM_OPENAT2_STRUCT_TESTS + \
+ NUM_OPENAT2_FLAG_TESTS)
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ test_openat2_struct();
+ test_openat2_flags();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/openat2/rename_attack_test.c b/tools/testing/selftests/openat2/rename_attack_test.c
new file mode 100644
index 000000000000..0a770728b436
--- /dev/null
+++ b/tools/testing/selftests/openat2/rename_attack_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <syscall.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "helpers.h"
+
+/* Construct a test directory with the following structure:
+ *
+ * root/
+ * |-- a/
+ * | `-- c/
+ * `-- b/
+ */
+int setup_testdir(void)
+{
+ int dfd;
+ char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX";
+
+ /* Make the top-level directory. */
+ if (!mkdtemp(dirname))
+ ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n");
+ dfd = open(dirname, O_PATH | O_DIRECTORY);
+ if (dfd < 0)
+ ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
+
+ E_mkdirat(dfd, "a", 0755);
+ E_mkdirat(dfd, "b", 0755);
+ E_mkdirat(dfd, "a/c", 0755);
+
+ return dfd;
+}
+
+/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */
+pid_t spawn_attack(int dirfd, char *a, char *b)
+{
+ pid_t child = fork();
+ if (child != 0)
+ return child;
+
+ /* If the parent (the test process) dies, kill ourselves too. */
+ E_prctl(PR_SET_PDEATHSIG, SIGKILL);
+
+ /* Swap @a and @b. */
+ for (;;)
+ renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE);
+ exit(1);
+}
+
+#define NUM_RENAME_TESTS 2
+#define ROUNDS 400000
+
+const char *flagname(int resolve)
+{
+ switch (resolve) {
+ case RESOLVE_IN_ROOT:
+ return "RESOLVE_IN_ROOT";
+ case RESOLVE_BENEATH:
+ return "RESOLVE_BENEATH";
+ }
+ return "(unknown)";
+}
+
+void test_rename_attack(int resolve)
+{
+ int dfd, afd;
+ pid_t child;
+ void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
+ int escapes = 0, other_errs = 0, exdevs = 0, eagains = 0, successes = 0;
+
+ struct open_how how = {
+ .flags = O_PATH,
+ .resolve = resolve,
+ };
+
+ if (!openat2_supported) {
+ how.resolve = 0;
+ ksft_print_msg("openat2(2) unsupported -- using openat(2) instead\n");
+ }
+
+ dfd = setup_testdir();
+ afd = openat(dfd, "a", O_PATH);
+ if (afd < 0)
+ ksft_exit_fail_msg("test_rename_attack: failed to open 'a'\n");
+
+ child = spawn_attack(dfd, "a/c", "b");
+
+ for (int i = 0; i < ROUNDS; i++) {
+ int fd;
+ char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../..";
+
+ if (openat2_supported)
+ fd = sys_openat2(afd, victim_path, &how);
+ else
+ fd = sys_openat(afd, victim_path, &how);
+
+ if (fd < 0) {
+ if (fd == -EAGAIN)
+ eagains++;
+ else if (fd == -EXDEV)
+ exdevs++;
+ else if (fd == -ENOENT)
+ escapes++; /* escaped outside and got ENOENT... */
+ else
+ other_errs++; /* unexpected error */
+ } else {
+ if (fdequal(fd, afd, NULL))
+ successes++;
+ else
+ escapes++; /* we got an unexpected fd */
+ }
+ close(fd);
+ }
+
+ if (escapes > 0)
+ resultfn = ksft_test_result_fail;
+ ksft_print_msg("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d\n",
+ eagains, exdevs, other_errs, successes);
+ resultfn("rename attack with %s (%d runs, got %d escapes)\n",
+ flagname(resolve), ROUNDS, escapes);
+
+ /* Should be killed anyway, but might as well make sure. */
+ E_kill(child, SIGKILL);
+}
+
+#define NUM_TESTS NUM_RENAME_TESTS
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ test_rename_attack(RESOLVE_BENEATH);
+ test_rename_attack(RESOLVE_IN_ROOT);
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/openat2/resolve_test.c
new file mode 100644
index 000000000000..7a94b1da8e7b
--- /dev/null
+++ b/tools/testing/selftests/openat2/resolve_test.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "helpers.h"
+
+/*
+ * Construct a test directory with the following structure:
+ *
+ * root/
+ * |-- procexe -> /proc/self/exe
+ * |-- procroot -> /proc/self/root
+ * |-- root/
+ * |-- mnt/ [mountpoint]
+ * | |-- self -> ../mnt/
+ * | `-- absself -> /mnt/
+ * |-- etc/
+ * | `-- passwd
+ * |-- creatlink -> /newfile3
+ * |-- reletc -> etc/
+ * |-- relsym -> etc/passwd
+ * |-- absetc -> /etc/
+ * |-- abssym -> /etc/passwd
+ * |-- abscheeky -> /cheeky
+ * `-- cheeky/
+ * |-- absself -> /
+ * |-- self -> ../../root/
+ * |-- garbageself -> /../../root/
+ * |-- passwd -> ../cheeky/../cheeky/../etc/../etc/passwd
+ * |-- abspasswd -> /../cheeky/../cheeky/../etc/../etc/passwd
+ * |-- dotdotlink -> ../../../../../../../../../../../../../../etc/passwd
+ * `-- garbagelink -> /../../../../../../../../../../../../../../etc/passwd
+ */
+int setup_testdir(void)
+{
+ int dfd, tmpfd;
+ char dirname[] = "/tmp/ksft-openat2-testdir.XXXXXX";
+
+ /* Unshare and make /tmp a new directory. */
+ E_unshare(CLONE_NEWNS);
+ E_mount("", "/tmp", "", MS_PRIVATE, "");
+
+ /* Make the top-level directory. */
+ if (!mkdtemp(dirname))
+ ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n");
+ dfd = open(dirname, O_PATH | O_DIRECTORY);
+ if (dfd < 0)
+ ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
+
+ /* A sub-directory which is actually used for tests. */
+ E_mkdirat(dfd, "root", 0755);
+ tmpfd = openat(dfd, "root", O_PATH | O_DIRECTORY);
+ if (tmpfd < 0)
+ ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
+ close(dfd);
+ dfd = tmpfd;
+
+ E_symlinkat("/proc/self/exe", dfd, "procexe");
+ E_symlinkat("/proc/self/root", dfd, "procroot");
+ E_mkdirat(dfd, "root", 0755);
+
+ /* There is no mountat(2), so use chdir. */
+ E_mkdirat(dfd, "mnt", 0755);
+ E_fchdir(dfd);
+ E_mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, "");
+ E_symlinkat("../mnt/", dfd, "mnt/self");
+ E_symlinkat("/mnt/", dfd, "mnt/absself");
+
+ E_mkdirat(dfd, "etc", 0755);
+ E_touchat(dfd, "etc/passwd");
+
+ E_symlinkat("/newfile3", dfd, "creatlink");
+ E_symlinkat("etc/", dfd, "reletc");
+ E_symlinkat("etc/passwd", dfd, "relsym");
+ E_symlinkat("/etc/", dfd, "absetc");
+ E_symlinkat("/etc/passwd", dfd, "abssym");
+ E_symlinkat("/cheeky", dfd, "abscheeky");
+
+ E_mkdirat(dfd, "cheeky", 0755);
+
+ E_symlinkat("/", dfd, "cheeky/absself");
+ E_symlinkat("../../root/", dfd, "cheeky/self");
+ E_symlinkat("/../../root/", dfd, "cheeky/garbageself");
+
+ E_symlinkat("../cheeky/../etc/../etc/passwd", dfd, "cheeky/passwd");
+ E_symlinkat("/../cheeky/../etc/../etc/passwd", dfd, "cheeky/abspasswd");
+
+ E_symlinkat("../../../../../../../../../../../../../../etc/passwd",
+ dfd, "cheeky/dotdotlink");
+ E_symlinkat("/../../../../../../../../../../../../../../etc/passwd",
+ dfd, "cheeky/garbagelink");
+
+ return dfd;
+}
+
+struct basic_test {
+ const char *name;
+ const char *dir;
+ const char *path;
+ struct open_how how;
+ bool pass;
+ union {
+ int err;
+ const char *path;
+ } out;
+};
+
+#define NUM_OPENAT2_OPATH_TESTS 88
+
+void test_openat2_opath_tests(void)
+{
+ int rootfd, hardcoded_fd;
+ char *procselfexe, *hardcoded_fdpath;
+
+ E_asprintf(&procselfexe, "/proc/%d/exe", getpid());
+ rootfd = setup_testdir();
+
+ hardcoded_fd = open("/dev/null", O_RDONLY);
+ E_assert(hardcoded_fd >= 0, "open fd to hardcode");
+ E_asprintf(&hardcoded_fdpath, "self/fd/%d", hardcoded_fd);
+
+ struct basic_test tests[] = {
+ /** RESOLVE_BENEATH **/
+ /* Attempts to cross dirfd should be blocked. */
+ { .name = "[beneath] jump to /",
+ .path = "/", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] absolute link to $root",
+ .path = "cheeky/absself", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] chained absolute links to $root",
+ .path = "abscheeky/absself", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] jump outside $root",
+ .path = "..", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] temporary jump outside $root",
+ .path = "../root/", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] symlink temporary jump outside $root",
+ .path = "cheeky/self", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] chained symlink temporary jump outside $root",
+ .path = "abscheeky/self", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] garbage links to $root",
+ .path = "cheeky/garbageself", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] chained garbage links to $root",
+ .path = "abscheeky/garbageself", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ /* Only relative paths that stay inside dirfd should work. */
+ { .name = "[beneath] ordinary path to 'root'",
+ .path = "root", .how.resolve = RESOLVE_BENEATH,
+ .out.path = "root", .pass = true },
+ { .name = "[beneath] ordinary path to 'etc'",
+ .path = "etc", .how.resolve = RESOLVE_BENEATH,
+ .out.path = "etc", .pass = true },
+ { .name = "[beneath] ordinary path to 'etc/passwd'",
+ .path = "etc/passwd", .how.resolve = RESOLVE_BENEATH,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[beneath] relative symlink inside $root",
+ .path = "relsym", .how.resolve = RESOLVE_BENEATH,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[beneath] chained-'..' relative symlink inside $root",
+ .path = "cheeky/passwd", .how.resolve = RESOLVE_BENEATH,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[beneath] absolute symlink component outside $root",
+ .path = "abscheeky/passwd", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] absolute symlink target outside $root",
+ .path = "abssym", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] absolute path outside $root",
+ .path = "/etc/passwd", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] cheeky absolute path outside $root",
+ .path = "cheeky/abspasswd", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] chained cheeky absolute path outside $root",
+ .path = "abscheeky/abspasswd", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ /* Tricky paths should fail. */
+ { .name = "[beneath] tricky '..'-chained symlink outside $root",
+ .path = "cheeky/dotdotlink", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] tricky absolute + '..'-chained symlink outside $root",
+ .path = "abscheeky/dotdotlink", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] tricky garbage link outside $root",
+ .path = "cheeky/garbagelink", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[beneath] tricky absolute + garbage link outside $root",
+ .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_BENEATH,
+ .out.err = -EXDEV, .pass = false },
+
+ /** RESOLVE_IN_ROOT **/
+ /* All attempts to cross the dirfd will be scoped-to-root. */
+ { .name = "[in_root] jump to /",
+ .path = "/", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = NULL, .pass = true },
+ { .name = "[in_root] absolute symlink to /root",
+ .path = "cheeky/absself", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = NULL, .pass = true },
+ { .name = "[in_root] chained absolute symlinks to /root",
+ .path = "abscheeky/absself", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = NULL, .pass = true },
+ { .name = "[in_root] '..' at root",
+ .path = "..", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = NULL, .pass = true },
+ { .name = "[in_root] '../root' at root",
+ .path = "../root/", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "root", .pass = true },
+ { .name = "[in_root] relative symlink containing '..' above root",
+ .path = "cheeky/self", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "root", .pass = true },
+ { .name = "[in_root] garbage link to /root",
+ .path = "cheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "root", .pass = true },
+ { .name = "[in_root] chainged garbage links to /root",
+ .path = "abscheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "root", .pass = true },
+ { .name = "[in_root] relative path to 'root'",
+ .path = "root", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "root", .pass = true },
+ { .name = "[in_root] relative path to 'etc'",
+ .path = "etc", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc", .pass = true },
+ { .name = "[in_root] relative path to 'etc/passwd'",
+ .path = "etc/passwd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] relative symlink to 'etc/passwd'",
+ .path = "relsym", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] chained-'..' relative symlink to 'etc/passwd'",
+ .path = "cheeky/passwd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] chained-'..' absolute + relative symlink to 'etc/passwd'",
+ .path = "abscheeky/passwd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] absolute symlink to 'etc/passwd'",
+ .path = "abssym", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] absolute path 'etc/passwd'",
+ .path = "/etc/passwd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] cheeky absolute path 'etc/passwd'",
+ .path = "cheeky/abspasswd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] chained cheeky absolute path 'etc/passwd'",
+ .path = "abscheeky/abspasswd", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky '..'-chained symlink outside $root",
+ .path = "cheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky absolute + '..'-chained symlink outside $root",
+ .path = "abscheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky absolute path + absolute + '..'-chained symlink outside $root",
+ .path = "/../../../../abscheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky garbage link outside $root",
+ .path = "cheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky absolute + garbage link outside $root",
+ .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ { .name = "[in_root] tricky absolute path + absolute + garbage link outside $root",
+ .path = "/../../../../abscheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "etc/passwd", .pass = true },
+ /* O_CREAT should handle trailing symlinks correctly. */
+ { .name = "[in_root] O_CREAT of relative path inside $root",
+ .path = "newfile1", .how.flags = O_CREAT,
+ .how.mode = 0700,
+ .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "newfile1", .pass = true },
+ { .name = "[in_root] O_CREAT of absolute path",
+ .path = "/newfile2", .how.flags = O_CREAT,
+ .how.mode = 0700,
+ .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "newfile2", .pass = true },
+ { .name = "[in_root] O_CREAT of tricky symlink outside root",
+ .path = "/creatlink", .how.flags = O_CREAT,
+ .how.mode = 0700,
+ .how.resolve = RESOLVE_IN_ROOT,
+ .out.path = "newfile3", .pass = true },
+
+ /** RESOLVE_NO_XDEV **/
+ /* Crossing *down* into a mountpoint is disallowed. */
+ { .name = "[no_xdev] cross into $mnt",
+ .path = "mnt", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] cross into $mnt/",
+ .path = "mnt/", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] cross into $mnt/.",
+ .path = "mnt/.", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ /* Crossing *up* out of a mountpoint is disallowed. */
+ { .name = "[no_xdev] goto mountpoint root",
+ .dir = "mnt", .path = ".", .how.resolve = RESOLVE_NO_XDEV,
+ .out.path = "mnt", .pass = true },
+ { .name = "[no_xdev] cross up through '..'",
+ .dir = "mnt", .path = "..", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] temporary cross up through '..'",
+ .dir = "mnt", .path = "../mnt", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] temporary relative symlink cross up",
+ .dir = "mnt", .path = "self", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] temporary absolute symlink cross up",
+ .dir = "mnt", .path = "absself", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ /* Jumping to "/" is ok, but later components cannot cross. */
+ { .name = "[no_xdev] jump to / directly",
+ .dir = "mnt", .path = "/", .how.resolve = RESOLVE_NO_XDEV,
+ .out.path = "/", .pass = true },
+ { .name = "[no_xdev] jump to / (from /) directly",
+ .dir = "/", .path = "/", .how.resolve = RESOLVE_NO_XDEV,
+ .out.path = "/", .pass = true },
+ { .name = "[no_xdev] jump to / then proc",
+ .path = "/proc/1", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] jump to / then tmp",
+ .path = "/tmp", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ /* Magic-links are blocked since they can switch vfsmounts. */
+ { .name = "[no_xdev] cross through magic-link to self/root",
+ .dir = "/proc", .path = "self/root", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ { .name = "[no_xdev] cross through magic-link to self/cwd",
+ .dir = "/proc", .path = "self/cwd", .how.resolve = RESOLVE_NO_XDEV,
+ .out.err = -EXDEV, .pass = false },
+ /* Except magic-link jumps inside the same vfsmount. */
+ { .name = "[no_xdev] jump through magic-link to same procfs",
+ .dir = "/proc", .path = hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV,
+ .out.path = "/proc", .pass = true, },
+
+ /** RESOLVE_NO_MAGICLINKS **/
+ /* Regular symlinks should work. */
+ { .name = "[no_magiclinks] ordinary relative symlink",
+ .path = "relsym", .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.path = "etc/passwd", .pass = true },
+ /* Magic-links should not work. */
+ { .name = "[no_magiclinks] symlink to magic-link",
+ .path = "procexe", .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_magiclinks] normal path to magic-link",
+ .path = "/proc/self/exe", .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_magiclinks] normal path to magic-link with O_NOFOLLOW",
+ .path = "/proc/self/exe", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.path = procselfexe, .pass = true },
+ { .name = "[no_magiclinks] symlink to magic-link path component",
+ .path = "procroot/etc", .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_magiclinks] magic-link path component",
+ .path = "/proc/self/root/etc", .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_magiclinks] magic-link path component with O_NOFOLLOW",
+ .path = "/proc/self/root/etc", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_MAGICLINKS,
+ .out.err = -ELOOP, .pass = false },
+
+ /** RESOLVE_NO_SYMLINKS **/
+ /* Normal paths should work. */
+ { .name = "[no_symlinks] ordinary path to '.'",
+ .path = ".", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = NULL, .pass = true },
+ { .name = "[no_symlinks] ordinary path to 'root'",
+ .path = "root", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "root", .pass = true },
+ { .name = "[no_symlinks] ordinary path to 'etc'",
+ .path = "etc", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "etc", .pass = true },
+ { .name = "[no_symlinks] ordinary path to 'etc/passwd'",
+ .path = "etc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "etc/passwd", .pass = true },
+ /* Regular symlinks are blocked. */
+ { .name = "[no_symlinks] relative symlink target",
+ .path = "relsym", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] relative symlink component",
+ .path = "reletc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] absolute symlink target",
+ .path = "abssym", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] absolute symlink component",
+ .path = "absetc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] cheeky garbage link",
+ .path = "cheeky/garbagelink", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] cheeky absolute + garbage link",
+ .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] cheeky absolute + absolute symlink",
+ .path = "abscheeky/absself", .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ /* Trailing symlinks with NO_FOLLOW. */
+ { .name = "[no_symlinks] relative symlink with O_NOFOLLOW",
+ .path = "relsym", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "relsym", .pass = true },
+ { .name = "[no_symlinks] absolute symlink with O_NOFOLLOW",
+ .path = "abssym", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "abssym", .pass = true },
+ { .name = "[no_symlinks] trailing symlink with O_NOFOLLOW",
+ .path = "cheeky/garbagelink", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.path = "cheeky/garbagelink", .pass = true },
+ { .name = "[no_symlinks] multiple symlink components with O_NOFOLLOW",
+ .path = "abscheeky/absself", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ { .name = "[no_symlinks] multiple symlink (and garbage link) components with O_NOFOLLOW",
+ .path = "abscheeky/garbagelink", .how.flags = O_NOFOLLOW,
+ .how.resolve = RESOLVE_NO_SYMLINKS,
+ .out.err = -ELOOP, .pass = false },
+ };
+
+ BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_OPATH_TESTS);
+
+ for (int i = 0; i < ARRAY_LEN(tests); i++) {
+ int dfd, fd;
+ char *fdpath = NULL;
+ bool failed;
+ void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
+ struct basic_test *test = &tests[i];
+
+ if (!openat2_supported) {
+ ksft_print_msg("openat2(2) unsupported\n");
+ resultfn = ksft_test_result_skip;
+ goto skip;
+ }
+
+ /* Auto-set O_PATH. */
+ if (!(test->how.flags & O_CREAT))
+ test->how.flags |= O_PATH;
+
+ if (test->dir)
+ dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY);
+ else
+ dfd = dup(rootfd);
+ E_assert(dfd, "failed to openat root '%s': %m", test->dir);
+
+ E_dup2(dfd, hardcoded_fd);
+
+ fd = sys_openat2(dfd, test->path, &test->how);
+ if (test->pass)
+ failed = (fd < 0 || !fdequal(fd, rootfd, test->out.path));
+ else
+ failed = (fd != test->out.err);
+ if (fd >= 0) {
+ fdpath = fdreadlink(fd);
+ close(fd);
+ }
+ close(dfd);
+
+ if (failed) {
+ resultfn = ksft_test_result_fail;
+
+ ksft_print_msg("openat2 unexpectedly returned ");
+ if (fdpath)
+ ksft_print_msg("%d['%s']\n", fd, fdpath);
+ else
+ ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
+ }
+
+skip:
+ if (test->pass)
+ resultfn("%s gives path '%s'\n", test->name,
+ test->out.path ?: ".");
+ else
+ resultfn("%s fails with %d (%s)\n", test->name,
+ test->out.err, strerror(-test->out.err));
+
+ fflush(stdout);
+ free(fdpath);
+ }
+
+ free(procselfexe);
+ close(rootfd);
+
+ free(hardcoded_fdpath);
+ close(hardcoded_fd);
+}
+
+#define NUM_TESTS NUM_OPENAT2_OPATH_TESTS
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ /* NOTE: We should be checking for CAP_SYS_ADMIN here... */
+ if (geteuid() != 0)
+ ksft_exit_skip("all tests require euid == 0\n");
+
+ test_openat2_opath_tests();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 16d84d117bc0..3a779c084d96 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -1,2 +1,5 @@
pidfd_open_test
+pidfd_poll_test
pidfd_test
+pidfd_wait
+pidfd_getfd_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 720b2d884b3c..75a545861375 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/ -lpthread
+CFLAGS += -g -I../../../../usr/include/ -pthread
-TEST_GEN_PROGS := pidfd_test pidfd_open_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 8452e910463f..d482515604db 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -16,6 +16,30 @@
#include "../kselftest.h"
+#ifndef P_PIDFD
+#define P_PIDFD 3
+#endif
+
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x00001000
+#endif
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open -1
+#endif
+
+#ifndef __NR_pidfd_send_signal
+#define __NR_pidfd_send_signal -1
+#endif
+
+#ifndef __NR_clone3
+#define __NR_clone3 -1
+#endif
+
+#ifndef __NR_pidfd_getfd
+#define __NR_pidfd_getfd -1
+#endif
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
@@ -53,5 +77,20 @@ again:
return WEXITSTATUS(status);
}
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
+ unsigned int flags)
+{
+ return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
+{
+ return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
+}
#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
new file mode 100644
index 000000000000..22558524f71c
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/wait.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+
+struct error {
+ int code;
+ char msg[512];
+};
+
+static int error_set(struct error *err, int code, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ if (code == PIDFD_PASS || !err || err->code != PIDFD_PASS)
+ return code;
+
+ err->code = code;
+ va_start(args, fmt);
+ r = vsnprintf(err->msg, sizeof(err->msg), fmt, args);
+ assert((size_t)r < sizeof(err->msg));
+ va_end(args);
+
+ return code;
+}
+
+static void error_report(struct error *err, const char *test_name)
+{
+ switch (err->code) {
+ case PIDFD_ERROR:
+ ksft_exit_fail_msg("%s test: Fatal: %s\n", test_name, err->msg);
+ break;
+
+ case PIDFD_FAIL:
+ /* will be: not ok %d # error %s test: %s */
+ ksft_test_result_error("%s test: %s\n", test_name, err->msg);
+ break;
+
+ case PIDFD_SKIP:
+ /* will be: not ok %d # SKIP %s test: %s */
+ ksft_test_result_skip("%s test: %s\n", test_name, err->msg);
+ break;
+
+ case PIDFD_XFAIL:
+ ksft_test_result_pass("%s test: Expected failure: %s\n",
+ test_name, err->msg);
+ break;
+
+ case PIDFD_PASS:
+ ksft_test_result_pass("%s test: Passed\n");
+ break;
+
+ default:
+ ksft_exit_fail_msg("%s test: Unknown code: %d %s\n",
+ test_name, err->code, err->msg);
+ break;
+ }
+}
+
+static inline int error_check(struct error *err, const char *test_name)
+{
+ /* In case of error we bail out and terminate the test program */
+ if (err->code == PIDFD_ERROR)
+ error_report(err, test_name);
+
+ return err->code;
+}
+
+struct child {
+ pid_t pid;
+ int fd;
+};
+
+static struct child clone_newns(int (*fn)(void *), void *args,
+ struct error *err)
+{
+ static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;
+ size_t stack_size = 1024;
+ char *stack[1024] = { 0 };
+ struct child ret;
+
+ if (!(flags & CLONE_NEWUSER) && geteuid() != 0)
+ flags |= CLONE_NEWUSER;
+
+#ifdef __ia64__
+ ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd);
+#else
+ ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd);
+#endif
+
+ if (ret.pid < 0) {
+ error_set(err, PIDFD_ERROR, "clone failed (ret %d, errno %d)",
+ ret.fd, errno);
+ return ret;
+ }
+
+ ksft_print_msg("New child: %d, fd: %d\n", ret.pid, ret.fd);
+
+ return ret;
+}
+
+static inline void child_close(struct child *child)
+{
+ close(child->fd);
+}
+
+static inline int child_join(struct child *child, struct error *err)
+{
+ int r;
+
+ r = wait_for_pid(child->pid);
+ if (r < 0)
+ error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)",
+ r, errno);
+ else if (r > 0)
+ error_set(err, r, "child %d reported: %d", child->pid, r);
+
+ return r;
+}
+
+static inline int child_join_close(struct child *child, struct error *err)
+{
+ child_close(child);
+ return child_join(child, err);
+}
+
+static inline void trim_newline(char *str)
+{
+ char *pos = strrchr(str, '\n');
+
+ if (pos)
+ *pos = '\0';
+}
+
+static int verify_fdinfo(int pidfd, struct error *err, const char *prefix,
+ size_t prefix_len, const char *expect, ...)
+{
+ char buffer[512] = {0, };
+ char path[512] = {0, };
+ va_list args;
+ FILE *f;
+ char *line = NULL;
+ size_t n = 0;
+ int found = 0;
+ int r;
+
+ va_start(args, expect);
+ r = vsnprintf(buffer, sizeof(buffer), expect, args);
+ assert((size_t)r < sizeof(buffer));
+ va_end(args);
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+ f = fopen(path, "re");
+ if (!f)
+ return error_set(err, PIDFD_ERROR, "fdinfo open failed for %d",
+ pidfd);
+
+ while (getline(&line, &n, f) != -1) {
+ char *val;
+
+ if (strncmp(line, prefix, prefix_len))
+ continue;
+
+ found = 1;
+
+ val = line + prefix_len;
+ r = strcmp(val, buffer);
+ if (r != 0) {
+ trim_newline(line);
+ trim_newline(buffer);
+ error_set(err, PIDFD_FAIL, "%s '%s' != '%s'",
+ prefix, val, buffer);
+ }
+ break;
+ }
+
+ free(line);
+ fclose(f);
+
+ if (found == 0)
+ return error_set(err, PIDFD_FAIL, "%s not found for fd %d",
+ prefix, pidfd);
+
+ return PIDFD_PASS;
+}
+
+static int child_fdinfo_nspid_test(void *args)
+{
+ struct error err;
+ int pidfd;
+ int r;
+
+ /* if we got no fd for the sibling, we are done */
+ if (!args)
+ return PIDFD_PASS;
+
+ /* verify that we can not resolve the pidfd for a process
+ * in a sibling pid namespace, i.e. a pid namespace it is
+ * not in our or a descended namespace
+ */
+ r = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (r < 0) {
+ ksft_print_msg("Failed to remount / private\n");
+ return PIDFD_ERROR;
+ }
+
+ (void)umount2("/proc", MNT_DETACH);
+ r = mount("proc", "/proc", "proc", 0, NULL);
+ if (r < 0) {
+ ksft_print_msg("Failed to remount /proc\n");
+ return PIDFD_ERROR;
+ }
+
+ pidfd = *(int *)args;
+ r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n");
+
+ if (r != PIDFD_PASS)
+ ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg);
+
+ return r;
+}
+
+static void test_pidfd_fdinfo_nspid(void)
+{
+ struct child a, b;
+ struct error err = {0, };
+ const char *test_name = "pidfd check for NSpid in fdinfo";
+
+ /* Create a new child in a new pid and mount namespace */
+ a = clone_newns(child_fdinfo_nspid_test, NULL, &err);
+ error_check(&err, test_name);
+
+ /* Pass the pidfd representing the first child to the
+ * second child, which will be in a sibling pid namespace,
+ * which means that the fdinfo NSpid entry for the pidfd
+ * should only contain '0'.
+ */
+ b = clone_newns(child_fdinfo_nspid_test, &a.fd, &err);
+ error_check(&err, test_name);
+
+ /* The children will have pid 1 in the new pid namespace,
+ * so the line must be 'NSPid:\t<pid>\t1'.
+ */
+ verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1);
+ verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1);
+
+ /* wait for the process, check the exit status and set
+ * 'err' accordingly, if it is not already set.
+ */
+ child_join_close(&a, &err);
+ child_join_close(&b, &err);
+
+ error_report(&err, test_name);
+}
+
+static void test_pidfd_dead_fdinfo(void)
+{
+ struct child a;
+ struct error err = {0, };
+ const char *test_name = "pidfd check fdinfo for dead process";
+
+ /* Create a new child in a new pid and mount namespace */
+ a = clone_newns(child_fdinfo_nspid_test, NULL, &err);
+ error_check(&err, test_name);
+ child_join(&a, &err);
+
+ verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n");
+ verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n");
+ child_close(&a);
+ error_report(&err, test_name);
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ test_pidfd_fdinfo_nspid();
+ test_pidfd_dead_fdinfo();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
new file mode 100644
index 000000000000..401a7c1d0312
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/*
+ * UNKNOWN_FD is an fd number that should never exist in the child, as it is
+ * used to check the negative case.
+ */
+#define UNKNOWN_FD 111
+#define UID_NOBODY 65535
+
+static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1,
+ unsigned long idx2)
+{
+ return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
+}
+
+static int sys_memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
+static int __child(int sk, int memfd)
+{
+ int ret;
+ char buf;
+
+ /*
+ * Ensure we don't leave around a bunch of orphaned children if our
+ * tests fail.
+ */
+ ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (ret) {
+ fprintf(stderr, "%s: Child could not set DEATHSIG\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = send(sk, &memfd, sizeof(memfd), 0);
+ if (ret != sizeof(memfd)) {
+ fprintf(stderr, "%s: Child failed to send fd number\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /*
+ * The fixture setup is completed at this point. The tests will run.
+ *
+ * This blocking recv enables the parent to message the child.
+ * Either we will read 'P' off of the sk, indicating that we need
+ * to disable ptrace, or we will read a 0, indicating that the other
+ * side has closed the sk. This occurs during fixture teardown time,
+ * indicating that the child should exit.
+ */
+ while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
+ if (buf == 'P') {
+ ret = prctl(PR_SET_DUMPABLE, 0);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s: Child failed to disable ptrace\n",
+ strerror(errno));
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "Child received unknown command %c\n",
+ buf);
+ return -1;
+ }
+ ret = send(sk, &buf, sizeof(buf), 0);
+ if (ret != 1) {
+ fprintf(stderr, "%s: Child failed to ack\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+ if (ret < 0) {
+ fprintf(stderr, "%s: Child failed to read from socket\n",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int child(int sk)
+{
+ int memfd, ret;
+
+ memfd = sys_memfd_create("test", 0);
+ if (memfd < 0) {
+ fprintf(stderr, "%s: Child could not create memfd\n",
+ strerror(errno));
+ ret = -1;
+ } else {
+ ret = __child(sk, memfd);
+ close(memfd);
+ }
+
+ close(sk);
+ return ret;
+}
+
+FIXTURE(child)
+{
+ /*
+ * remote_fd is the number of the FD which we are trying to retrieve
+ * from the child.
+ */
+ int remote_fd;
+ /* pid points to the child which we are fetching FDs from */
+ pid_t pid;
+ /* pidfd is the pidfd of the child */
+ int pidfd;
+ /*
+ * sk is our side of the socketpair used to communicate with the child.
+ * When it is closed, the child will exit.
+ */
+ int sk;
+};
+
+FIXTURE_SETUP(child)
+{
+ int ret, sk_pair[2];
+
+ ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
+ TH_LOG("%s: failed to create socketpair", strerror(errno));
+ }
+ self->sk = sk_pair[0];
+
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0);
+
+ if (self->pid == 0) {
+ close(sk_pair[0]);
+ if (child(sk_pair[1]))
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(sk_pair[1]);
+
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ ASSERT_GE(self->pidfd, 0);
+
+ /*
+ * Wait for the child to complete setup. It'll send the remote memfd's
+ * number when ready.
+ */
+ ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
+ ASSERT_EQ(sizeof(self->remote_fd), ret);
+}
+
+FIXTURE_TEARDOWN(child)
+{
+ EXPECT_EQ(0, close(self->pidfd));
+ EXPECT_EQ(0, close(self->sk));
+
+ EXPECT_EQ(0, wait_for_pid(self->pid));
+}
+
+TEST_F(child, disable_ptrace)
+{
+ int uid, fd;
+ char c;
+
+ /*
+ * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
+ *
+ * The tests should run in their own process, so even this test fails,
+ * it shouldn't result in subsequent tests failing.
+ */
+ uid = getuid();
+ if (uid == 0)
+ ASSERT_EQ(0, seteuid(UID_NOBODY));
+
+ ASSERT_EQ(1, send(self->sk, "P", 1, 0));
+ ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
+
+ fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
+ EXPECT_EQ(-1, fd);
+ EXPECT_EQ(EPERM, errno);
+
+ if (uid == 0)
+ ASSERT_EQ(0, seteuid(0));
+}
+
+TEST_F(child, fetch_fd)
+{
+ int fd, ret;
+
+ fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
+ ASSERT_GE(fd, 0);
+
+ EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
+
+ ret = fcntl(fd, F_GETFD);
+ ASSERT_GE(ret, 0);
+ EXPECT_GE(ret & FD_CLOEXEC, 0);
+
+ close(fd);
+}
+
+TEST_F(child, test_unknown_fd)
+{
+ int fd;
+
+ fd = sys_pidfd_getfd(self->pidfd, UNKNOWN_FD, 0);
+ EXPECT_EQ(-1, fd) {
+ TH_LOG("getfd succeeded while fetching unknown fd");
+ };
+ EXPECT_EQ(EBADF, errno) {
+ TH_LOG("%s: getfd did not get EBADF", strerror(errno));
+ }
+}
+
+TEST(flags_set)
+{
+ ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1));
+ EXPECT_EQ(errno, EINVAL);
+}
+
+#if __NR_pidfd_getfd == -1
+int main(void)
+{
+ fprintf(stderr, "__NR_pidfd_getfd undefined. The pidfd_getfd syscall is unavailable. Test aborting\n");
+ return KSFT_SKIP;
+}
+#else
+TEST_HARNESS_MAIN
+#endif
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index 0377133dd6dc..b9fe75fc3e51 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -22,11 +22,6 @@
#include "pidfd.h"
#include "../kselftest.h"
-static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(__NR_pidfd_open, pid, flags);
-}
-
static int safe_int(const char *numstr, int *converted)
{
char *err = NULL;
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
new file mode 100644
index 000000000000..4b115444dfe9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+
+static bool timeout;
+
+static void handle_alarm(int sig)
+{
+ timeout = true;
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd fds;
+ int iter, nevents;
+ int nr_iterations = 10000;
+
+ fds.events = POLLIN;
+
+ if (argc > 2)
+ ksft_exit_fail_msg("Unexpected command line argument\n");
+
+ if (argc == 2) {
+ nr_iterations = atoi(argv[1]);
+ if (nr_iterations <= 0)
+ ksft_exit_fail_msg("invalid input parameter %s\n",
+ argv[1]);
+ }
+
+ ksft_print_msg("running pidfd poll test for %d iterations\n",
+ nr_iterations);
+
+ for (iter = 0; iter < nr_iterations; iter++) {
+ int pidfd;
+ int child_pid = fork();
+
+ if (child_pid < 0) {
+ if (errno == EAGAIN) {
+ iter--;
+ continue;
+ }
+ ksft_exit_fail_msg(
+ "%s - failed to fork a child process\n",
+ strerror(errno));
+ }
+
+ if (child_pid == 0) {
+ /* Child process just sleeps for a min and exits */
+ sleep(60);
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Parent kills the child and waits for its death */
+ pidfd = sys_pidfd_open(child_pid, 0);
+ if (pidfd < 0)
+ ksft_exit_fail_msg("%s - pidfd_open failed\n",
+ strerror(errno));
+
+ /* Setup 3 sec alarm - plenty of time */
+ if (signal(SIGALRM, handle_alarm) == SIG_ERR)
+ ksft_exit_fail_msg("%s - signal failed\n",
+ strerror(errno));
+ alarm(3);
+
+ /* Send SIGKILL to the child */
+ if (sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0))
+ ksft_exit_fail_msg("%s - pidfd_send_signal failed\n",
+ strerror(errno));
+
+ /* Wait for the death notification */
+ fds.fd = pidfd;
+ nevents = poll(&fds, 1, -1);
+
+ /* Check for error conditions */
+ if (nevents < 0)
+ ksft_exit_fail_msg("%s - poll failed\n",
+ strerror(errno));
+
+ if (nevents != 1)
+ ksft_exit_fail_msg("unexpected poll result: %d\n",
+ nevents);
+
+ if (!(fds.revents & POLLIN))
+ ksft_exit_fail_msg(
+ "unexpected event type received: 0x%x\n",
+ fds.revents);
+
+ if (timeout)
+ ksft_exit_fail_msg(
+ "death notification wait timeout\n");
+
+ close(pidfd);
+ /* Wait for child to prevent zombies */
+ if (waitpid(child_pid, NULL, 0) < 0)
+ ksft_exit_fail_msg("%s - waitpid failed\n",
+ strerror(errno));
+
+ }
+
+ ksft_test_result_pass("pidfd poll test: pass\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index b632965e60eb..7aff2d3b42c0 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -21,20 +21,12 @@
#include "pidfd.h"
#include "../kselftest.h"
-#ifndef __NR_pidfd_send_signal
-#define __NR_pidfd_send_signal -1
-#endif
-
#define str(s) _str(s)
#define _str(s) #s
#define CHILD_THREAD_MIN_WAIT 3 /* seconds */
#define MAX_EVENTS 5
-#ifndef CLONE_PIDFD
-#define CLONE_PIDFD 0x00001000
-#endif
-
static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
{
size_t stack_size = 1024;
@@ -47,12 +39,6 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
#endif
}
-static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
- unsigned int flags)
-{
- return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
-}
-
static int signal_received;
static void set_signal_received_on_sigusr1(int sig)
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
new file mode 100644
index 000000000000..7079f8eef792
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+
+#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+
+static pid_t sys_clone3(struct clone_args *args)
+{
+ return syscall(__NR_clone3, args, sizeof(struct clone_args));
+}
+
+static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
+ struct rusage *ru)
+{
+ return syscall(__NR_waitid, which, pid, info, options, ru);
+}
+
+static int test_pidfd_wait_simple(void)
+{
+ const char *test_name = "pidfd wait simple";
+ int pidfd = -1, status = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .pidfd = ptr_to_u64(&pidfd),
+ .flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+ pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ if (pidfd < 0)
+ ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
+ test_name, strerror(errno));
+
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ if (pid == 0)
+ ksft_exit_fail_msg(
+ "%s test: succeeded to wait on invalid pidfd %s\n",
+ test_name, strerror(errno));
+ close(pidfd);
+ pidfd = -1;
+
+ pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ if (pidfd == 0)
+ ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
+ test_name, strerror(errno));
+
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ if (pid == 0)
+ ksft_exit_fail_msg(
+ "%s test: succeeded to wait on invalid pidfd %s\n",
+ test_name, strerror(errno));
+ close(pidfd);
+ pidfd = -1;
+
+ pid = sys_clone3(&args);
+ if (pid < 0)
+ ksft_exit_fail_msg("%s test: failed to create new process %s\n",
+ test_name, strerror(errno));
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ if (pid < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
+ ksft_exit_fail_msg(
+ "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+ close(pidfd);
+
+ if (info.si_signo != SIGCHLD)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_signo, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_code != CLD_EXITED)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_code, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_pid != parent_tid)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_pid, parent_tid, pidfd,
+ strerror(errno));
+
+ ksft_test_result_pass("%s test: Passed\n", test_name);
+ return 0;
+}
+
+static int test_pidfd_wait_states(void)
+{
+ const char *test_name = "pidfd wait states";
+ int pidfd = -1, status = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .pidfd = ptr_to_u64(&pidfd),
+ .flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+ pid = sys_clone3(&args);
+ if (pid < 0)
+ ksft_exit_fail_msg("%s test: failed to create new process %s\n",
+ test_name, strerror(errno));
+
+ if (pid == 0) {
+ kill(getpid(), SIGSTOP);
+ kill(getpid(), SIGSTOP);
+ exit(EXIT_SUCCESS);
+ }
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ if (info.si_signo != SIGCHLD)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_signo, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_code != CLD_STOPPED)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_code, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_pid != parent_tid)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_pid, parent_tid, pidfd,
+ strerror(errno));
+
+ ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ if (info.si_signo != SIGCHLD)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_signo, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_code != CLD_CONTINUED)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_code, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_pid != parent_tid)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_pid, parent_tid, pidfd,
+ strerror(errno));
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ if (info.si_signo != SIGCHLD)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_signo, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_code != CLD_STOPPED)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_code, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_pid != parent_tid)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_pid, parent_tid, pidfd,
+ strerror(errno));
+
+ ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
+ test_name, parent_tid, pidfd, strerror(errno));
+
+ if (info.si_signo != SIGCHLD)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_signo, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_code != CLD_KILLED)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_code, parent_tid, pidfd,
+ strerror(errno));
+
+ if (info.si_pid != parent_tid)
+ ksft_exit_fail_msg(
+ "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
+ test_name, info.si_pid, parent_tid, pidfd,
+ strerror(errno));
+
+ close(pidfd);
+
+ ksft_test_result_pass("%s test: Passed\n", test_name);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ test_pidfd_wait_simple();
+ test_pidfd_wait_states();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index b3ad909aefbc..644770c3b754 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -26,6 +26,7 @@ SUB_DIRS = alignment \
switch_endian \
syscalls \
tm \
+ eeh \
vphn \
math \
ptrace \
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ce12cd0e2967..12ef5b031974 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -1,13 +1,14 @@
copyuser_64_t0
copyuser_64_t1
copyuser_64_t2
-copyuser_power7_t0
-copyuser_power7_t1
+copyuser_p7_t0
+copyuser_p7_t1
memcpy_64_t0
memcpy_64_t1
memcpy_64_t2
-memcpy_power7_t0
-memcpy_power7_t1
+memcpy_p7_t0
+memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
+memcpy_mcsafe_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 44574f3818b3..0917983a1c78 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 \
+ memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,6 +45,11 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
+$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test_memcpy_mcsafe \
+ -o $@ $^
+
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
copy_tofrom_user_reference.S stubs.S
$(CC) $(CPPFLAGS) $(CFLAGS) \
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
index 05c1663c89b0..e6b80d5fbd14 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/export.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h
@@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#define EXPORT_SYMBOL(x)
+#define EXPORT_SYMBOL_GPL(x)
#define EXPORT_SYMBOL_KASAN(x)
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
new file mode 120000
index 000000000000..f0feef3062f6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
new file mode 100644
index 000000000000..b397babd569b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_PROGS := eeh-basic.sh
+TEST_FILES := eeh-functions.sh
+
+top_srcdir = ../../../../..
+include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
new file mode 100755
index 000000000000..f988d2f42e8f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+. ./eeh-functions.sh
+
+if ! eeh_supported ; then
+ echo "EEH not supported on this system, skipping"
+ exit 0;
+fi
+
+if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
+ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
+ echo "debugfs EEH testing files are missing. Is debugfs mounted?"
+ exit 1;
+fi
+
+pre_lspci=`mktemp`
+lspci > $pre_lspci
+
+# Bump the max freeze count to something absurd so we don't
+# trip over it while breaking things.
+echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
+
+# record the devices that we break in here. Assuming everything
+# goes to plan we should get them back once the recover process
+# is finished.
+devices=""
+
+# Build up a list of candidate devices.
+for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
+ # skip bridges since we can't recover them (yet...)
+ if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
+ echo "$dev, Skipped: bridge"
+ continue;
+ fi
+
+ # Skip VFs for now since we don't have a reliable way
+ # to break them.
+ if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
+ echo "$dev, Skipped: virtfn"
+ continue;
+ fi
+
+ # Don't inject errosr into an already-frozen PE. This happens with
+ # PEs that contain multiple PCI devices (e.g. multi-function cards)
+ # and injecting new errors during the recovery process will probably
+ # result in the recovery failing and the device being marked as
+ # failed.
+ if ! pe_ok $dev ; then
+ echo "$dev, Skipped: Bad initial PE state"
+ continue;
+ fi
+
+ echo "$dev, Added"
+
+ # Add to this list of device to check
+ devices="$devices $dev"
+done
+
+dev_count="$(echo $devices | wc -w)"
+echo "Found ${dev_count} breakable devices..."
+
+failed=0
+for dev in $devices ; do
+ echo "Breaking $dev..."
+
+ if ! pe_ok $dev ; then
+ echo "Skipping $dev, Initial PE state is not ok"
+ failed="$((failed + 1))"
+ continue;
+ fi
+
+ if ! eeh_one_dev $dev ; then
+ failed="$((failed + 1))"
+ fi
+done
+
+echo "$failed devices failed to recover ($dev_count tested)"
+lspci | diff -u $pre_lspci -
+rm -f $pre_lspci
+
+exit $failed
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
new file mode 100755
index 000000000000..f52ed92b53e7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+pe_ok() {
+ local dev="$1"
+ local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
+
+ if ! [ -e "$path" ] ; then
+ return 1;
+ fi
+
+ local fw_state="$(cut -d' ' -f1 < $path)"
+ local sw_state="$(cut -d' ' -f2 < $path)"
+
+ # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
+ # error state or being recovered. Either way, not ok.
+ if [ "$((sw_state & 0x3))" -ne 0 ] ; then
+ return 1
+ fi
+
+ # A functioning PE should have the EEH_STATE_MMIO_ACTIVE and
+ # EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason
+ # the platform backends set these when the PE is in reset. The
+ # RECOVERING check above should stop any false positives though.
+ if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then
+ return 1
+ fi
+
+ return 0;
+}
+
+eeh_supported() {
+ test -e /proc/powerpc/eeh && \
+ grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh
+}
+
+eeh_one_dev() {
+ local dev="$1"
+
+ # Using this function from the command line is sometimes useful for
+ # testing so check that the argument is a well-formed sysfs device
+ # name.
+ if ! test -e /sys/bus/pci/devices/$dev/ ; then
+ echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
+ return 1;
+ fi
+
+ # Break it
+ echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break
+
+ # Force an EEH device check. If the kernel has already
+ # noticed the EEH (due to a driver poll or whatever), this
+ # is a no-op.
+ echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check
+
+ # Default to a 60s timeout when waiting for a device to recover. This
+ # is an arbitrary default which can be overridden by setting the
+ # EEH_MAX_WAIT environmental variable when required.
+
+ # The current record holder for longest recovery time is:
+ # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds
+ max_wait=${EEH_MAX_WAIT:=60}
+
+ for i in `seq 0 ${max_wait}` ; do
+ if pe_ok $dev ; then
+ break;
+ fi
+ echo "$dev, waited $i/${max_wait}"
+ sleep 1
+ done
+
+ if ! pe_ok $dev ; then
+ echo "$dev, Failed to recover!"
+ return 1;
+ fi
+
+ echo "$dev, Recovered after $i seconds"
+ return 0;
+}
+
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 0e2b2e6284ac..e089a0c30d9a 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -34,6 +34,7 @@ int pick_online_cpu(void);
int read_debugfs_file(char *debugfs_file, int *result);
int write_debugfs_file(char *debugfs_file, int result);
+int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
void set_dscr(unsigned long val);
int perf_event_open_counter(unsigned int type,
unsigned long config, int group_fd);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 7101ffd08d66..0ebeaea22641 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -5,3 +5,4 @@ prot_sao
segv_errors
wild_bctr
large_vm_fork_separation
+bad_accesses
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index f1fbc15800c4..b9103c4bb414 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,7 +3,8 @@ noarg:
$(MAKE) -C ../
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
- large_vm_fork_separation
+ large_vm_fork_separation bad_accesses
+TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
@@ -15,7 +16,9 @@ $(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
+$(OUTPUT)/bad_accesses: CFLAGS += -m64
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
new file mode 100644
index 000000000000..adc465f499ef
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that out-of-bounds reads/writes behave as expected.
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+// Old distros (Ubuntu 16.04 at least) don't define this
+#ifndef SEGV_BNDERR
+#define SEGV_BNDERR 3
+#endif
+
+// 64-bit kernel is always here
+#define PAGE_OFFSET (0xcul << 60)
+
+static unsigned long kernel_virt_end;
+
+static volatile int fault_code;
+static volatile unsigned long fault_addr;
+static jmp_buf setjmp_env;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ fault_code = info->si_code;
+ fault_addr = (unsigned long)info->si_addr;
+ siglongjmp(setjmp_env, 1);
+}
+
+int bad_access(char *p, bool write)
+{
+ char x;
+
+ fault_code = 0;
+ fault_addr = 0;
+
+ if (sigsetjmp(setjmp_env, 1) == 0) {
+ if (write)
+ *p = 1;
+ else
+ x = *p;
+
+ printf("Bad - no SEGV! (%c)\n", x);
+ return 1;
+ }
+
+ // If we see MAPERR that means we took a page fault rather than an SLB
+ // miss. We only expect to take page faults for addresses within the
+ // valid kernel range.
+ FAIL_IF(fault_code == SEGV_MAPERR && \
+ (fault_addr < PAGE_OFFSET || fault_addr >= kernel_virt_end));
+
+ FAIL_IF(fault_code != SEGV_MAPERR && fault_code != SEGV_BNDERR);
+
+ return 0;
+}
+
+static int using_hash_mmu(bool *using_hash)
+{
+ char line[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/cpuinfo", "r");
+ FAIL_IF(!f);
+
+ rc = 0;
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (strcmp(line, "MMU : Hash\n") == 0) {
+ *using_hash = true;
+ goto out;
+ }
+
+ if (strcmp(line, "MMU : Radix\n") == 0) {
+ *using_hash = false;
+ goto out;
+ }
+ }
+
+ rc = -1;
+out:
+ fclose(f);
+ return rc;
+}
+
+static int test(void)
+{
+ unsigned long i, j, addr, region_shift, page_shift, page_size;
+ struct sigaction sig;
+ bool hash_mmu;
+
+ sig = (struct sigaction) {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ FAIL_IF(sigaction(SIGSEGV, &sig, NULL) != 0);
+
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size == (64 * 1024))
+ page_shift = 16;
+ else
+ page_shift = 12;
+
+ if (page_size == (64 * 1024) || !hash_mmu) {
+ region_shift = 52;
+
+ // We have 7 512T regions (4 kernel linear, vmalloc, io, vmemmap)
+ kernel_virt_end = PAGE_OFFSET + (7 * (512ul << 40));
+ } else if (page_size == (4 * 1024) && hash_mmu) {
+ region_shift = 46;
+
+ // We have 7 64T regions (4 kernel linear, vmalloc, io, vmemmap)
+ kernel_virt_end = PAGE_OFFSET + (7 * (64ul << 40));
+ } else
+ FAIL_IF(true);
+
+ printf("Using %s MMU, PAGE_SIZE = %dKB start address 0x%016lx\n",
+ hash_mmu ? "hash" : "radix",
+ (1 << page_shift) >> 10,
+ 1ul << region_shift);
+
+ // This generates access patterns like:
+ // 0x0010000000000000
+ // 0x0010000000010000
+ // 0x0010000000020000
+ // ...
+ // 0x0014000000000000
+ // 0x0018000000000000
+ // 0x0020000000000000
+ // 0x0020000000010000
+ // 0x0020000000020000
+ // ...
+ // 0xf400000000000000
+ // 0xf800000000000000
+
+ for (i = 1; i <= ((0xful << 60) >> region_shift); i++) {
+ for (j = page_shift - 1; j < 60; j++) {
+ unsigned long base, delta;
+
+ base = i << region_shift;
+ delta = 1ul << j;
+
+ if (delta >= base)
+ break;
+
+ addr = (base | delta) & ~((1 << page_shift) - 1);
+
+ FAIL_IF(bad_access((char *)addr, false));
+ FAIL_IF(bad_access((char *)addr, true));
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "bad_accesses");
+}
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000000..f85a0938ab25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+ __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+ time_t now;
+ time(&now);
+ printf("=================================\n");
+ printf(" Error: %s\n", msg);
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+ exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ * 1) FLUSH the contents of that word.
+ * 2) LOAD the contents of that word.
+ * 3) COMPARE the contents of that word with the content that was
+ * previously stored at that word
+ * 4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS 64
+#define THREAD_ID_BITS 8
+#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE 1024
+#define BITS_PER_BYTE 8
+#define WORD_SIZE (sizeof(unsigned int))
+#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+ char *chunk_start;
+
+ chunk_start = (char *)((unsigned long)map1 +
+ (thread_id * RIM_CHUNK_SIZE));
+
+ return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+ unsigned int delta_bytes, ret;
+ delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+ ret = delta_bytes/WORD_SIZE;
+
+ return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ * - The thread-id of the rim_thread performing the store
+ * (The most significant THREAD_ID_BITS)
+ *
+ * - The word-offset of the address into which the store is being
+ * performed (The next WORD_OFFSET_BITS)
+ *
+ * - The sweep_id of the current sweep in which the store is
+ * being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * | Thread id | Word offset | sweep_id |
+ * |------------------|--------------------|---------------------------------|
+ * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ * address == map1 +
+ * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT 0
+#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+ unsigned int *addr,
+ unsigned int sweep_id)
+{
+ unsigned int ret = 0;
+ char *start = compute_chunk_start_addr(tid);
+ unsigned int word_offset = compute_word_offset(start, addr);
+
+ ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+ ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+ ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+ return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+ return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+ return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+ unsigned int ret;
+
+ ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+ return ret;
+}
+
+/************************************************************
+ * *
+ * Logging the output of the verification *
+ * *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+ unsigned int *addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ FILE *f;
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[2] = "/";
+ char *chunk_start = compute_chunk_start_addr(tid);
+ unsigned int size = RIM_CHUNK_SIZE;
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+ f = fopen(path, "w");
+
+ if (!f) {
+ err_msg("Unable to create logfile\n");
+ }
+
+ fp[tid] = f;
+
+ fprintf(f, "----------------------------------------------------------\n");
+ fprintf(f, "PID = %d\n", rim_process_pid);
+ fprintf(f, "Thread id = %02d\n", tid);
+ fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
+ fprintf(f, "Chunk Size = %d\n", size);
+ fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
+ fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
+ fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+ unsigned int expected, unsigned int observed)
+{
+ FILE *f = fp[tid];
+
+ fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+ tid, (unsigned long)addr, expected, observed);
+ fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
+ fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
+ fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+ fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+ fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
+ fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+ FILE *f = fp[tid];
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[] = "/";
+
+ fclose(f);
+
+ if (nr_anamolies == 0) {
+ remove(path);
+ return;
+ }
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+
+ printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+ tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | cur_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | prev_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ unsigned int *iter_ptr;
+ unsigned int size = RIM_CHUNK_SIZE;
+ unsigned int expected;
+ unsigned int observed;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ int nr_anamolies = 0;
+
+ start_verification_log(tid, next_store_addr,
+ cur_sweep_id, prev_sweep_id);
+
+ for (iter_ptr = (unsigned int *)chunk_start;
+ (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+ iter_ptr++) {
+ unsigned int expected_sweep_id;
+
+ if (iter_ptr < next_store_addr) {
+ expected_sweep_id = cur_sweep_id;
+ } else {
+ expected_sweep_id = prev_sweep_id;
+ }
+
+ expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+ dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+ observed = *iter_ptr;
+
+ if (observed != expected) {
+ nr_anamolies++;
+ log_anamoly(tid, iter_ptr, expected, observed);
+ }
+ }
+
+ end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ /* haven't reproduced with this setting, it kills random preemption which may be a factor */
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static void set_mycpu(int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+ while (segv_wait) {
+ sched_yield();
+ }
+
+}
+
+static void set_segv_handler(void)
+{
+ struct sigaction sa;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = segv_handler;
+
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+ unsigned int tid = *((unsigned int *)arg);
+
+ int size = RIM_CHUNK_SIZE;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ unsigned int prev_sweep_id;
+ unsigned int cur_sweep_id = 0;
+
+ /* word access */
+ unsigned int pattern = cur_sweep_id;
+ unsigned int *pattern_ptr = &pattern;
+ unsigned int *w_ptr, read_data;
+
+ set_segv_handler();
+
+ /*
+ * Let us initialize the chunk:
+ *
+ * Each word-aligned address addr in the chunk,
+ * is initialized to :
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | 0 |
+ * |-------------------------------------------------|
+ */
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+ *w_ptr = *pattern_ptr;
+ }
+
+ while (!corruption_found && !timeout) {
+ prev_sweep_id = cur_sweep_id;
+ cur_sweep_id = cur_sweep_id + 1;
+
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+ unsigned int old_pattern;
+
+ /*
+ * Compute the pattern that we would have
+ * stored at this location in the previous
+ * sweep.
+ */
+ old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+ /*
+ * FLUSH:Ensure that we flush the contents of
+ * the cache before loading
+ */
+ dcbf((volatile unsigned int*)w_ptr); //Flush
+
+ /* LOAD: Read the value */
+ read_data = *w_ptr; //Load
+
+ /*
+ * COMPARE: Is it the same as what we had stored
+ * in the previous sweep ? It better be!
+ */
+ if (read_data != old_pattern) {
+ /* No it isn't! Tell everyone */
+ corruption_found = 1;
+ }
+
+ /*
+ * Before performing a store, let us check if
+ * any rim_thread has found a corruption.
+ */
+ if (corruption_found || timeout) {
+ /*
+ * Yes. Someone (including us!) has found
+ * a corruption :(
+ *
+ * Let us verify that our chunk is
+ * correct.
+ */
+ /* But first, let us allow the dust to settle down! */
+ verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+ return 0;
+ }
+
+ /*
+ * Compute the new pattern that we are going
+ * to write to this location
+ */
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+ /*
+ * STORE: Now let us write this pattern into
+ * the location
+ */
+ *w_ptr = *pattern_ptr;
+ }
+ }
+
+ return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+ int page_size = getpagesize();
+ size_t size = page_size;
+ void *tmp = malloc(size);
+
+ while (!corruption_found && !timeout) {
+ /* Stop memory migration once corruption is found */
+ segv_wait = 1;
+
+ mprotect(map1, size, PROT_READ);
+
+ /*
+ * Load from the working alias (map1). Loading from map2
+ * also fails.
+ */
+ memcpy(tmp, map1, size);
+
+ /*
+ * Stores must go via map2 which has write permissions, but
+ * the corrupted data tends to be seen in the snapshot buffer,
+ * so corruption does not appear to be introduced at the
+ * copy-back via map2 alias here.
+ */
+ memcpy(map2, tmp, size);
+ /*
+ * Before releasing other threads, must ensure the copy
+ * back to
+ */
+ asm volatile("sync" ::: "memory");
+ mprotect(map1, size, PROT_READ|PROT_WRITE);
+ asm volatile("sync" ::: "memory");
+ segv_wait = 0;
+
+ usleep(1); /* This value makes a big difference */
+ }
+
+ return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+ timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int page_size = getpagesize();
+ time_t now;
+ int i, dir_error;
+ pthread_attr_t attr;
+ key_t shm_key = (key_t) getpid();
+ int shmid, run_time = 20 * 60;
+ struct sigaction sa_alrm;
+
+ snprintf(logdir, LOGDIR_NAME_SIZE,
+ "/tmp/logdir-%u", (unsigned int)getpid());
+ while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+ switch(c) {
+ case 'r':
+ start_cpu = strtoul(optarg, NULL, 10);
+ break;
+ case 'h':
+ printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+ exit(0);
+ break;
+ case 'n':
+ nrthreads = strtoul(optarg, NULL, 10);
+ break;
+ case 'l':
+ strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
+ break;
+ case 't':
+ run_time = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ printf("invalid option\n");
+ exit(0);
+ break;
+ }
+ }
+
+ if (nrthreads > MAX_THREADS)
+ nrthreads = MAX_THREADS;
+
+ shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+ if (shmid < 0) {
+ err_msg("Failed shmget\n");
+ }
+
+ map1 = shmat(shmid, NULL, 0);
+ if (map1 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ map2 = shmat(shmid, NULL, 0);
+ if (map2 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ dir_error = mkdir(logdir, 0755);
+
+ if (dir_error) {
+ err_msg("Failed mkdir");
+ }
+
+ printf("start_cpu list:%lu\n", start_cpu);
+ printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+ printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+ printf("logdir at : %s\n", logdir);
+ printf("Timeout: %d seconds\n", run_time);
+
+ time(&now);
+ printf("=================================\n");
+ printf(" Starting Test\n");
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+
+ for (i = 0; i < nrthreads; i++) {
+ if (1 && !fork()) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ set_mycpu(start_cpu + i);
+ for (;;)
+ sched_yield();
+ exit(0);
+ }
+ }
+
+
+ sa_alrm.sa_handler = &alrm_sighandler;
+ sigemptyset(&sa_alrm.sa_mask);
+ sa_alrm.sa_flags = 0;
+
+ if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+ err_msg("Failed signal handler registration\n");
+ }
+
+ alarm(run_time);
+
+ pthread_attr_init(&attr);
+ for (i = 0; i < nrthreads; i++) {
+ rim_thread_ids[i] = i;
+ pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+ set_pthread_cpu(rim_threads[i], start_cpu + i);
+ }
+
+ pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+ set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+ pthread_join(mem_snapshot_thread, NULL);
+ for (i = 0; i < nrthreads; i++) {
+ pthread_join(rim_threads[i], NULL);
+ }
+
+ if (!timeout) {
+ time(&now);
+ printf("=================================\n");
+ printf(" Data Corruption Detected\n");
+ printf(" %s", ctime(&now));
+ printf(" See logfiles in %s\n", logdir);
+ printf("=================================\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 23f4caf48ffc..417306353e07 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+include ../../../../../../scripts/Kbuild.include
+
noarg:
$(MAKE) -C ../../
@@ -6,7 +8,10 @@ noarg:
CFLAGS += -m64
# Toolchains may build PIE by default which breaks the assembly
-LDFLAGS += -no-pie
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += $(no-pie-option)
TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 07ec449a2767..dce19f221c46 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -10,3 +10,6 @@ ptrace-tm-spd-vsx
ptrace-tm-spr
ptrace-hwbreak
perf-hwbreak
+core-pkey
+ptrace-pkey
+ptrace-syscall
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index 200337daec42..c1f324afdbf3 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -148,6 +148,121 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
return 0;
}
+static int runtest_dar_outside(void)
+{
+ void *target;
+ volatile __u16 temp16;
+ volatile __u64 temp64;
+ struct perf_event_attr attr;
+ int break_fd;
+ unsigned long long breaks;
+ int fail = 0;
+ size_t res;
+
+ target = malloc(8);
+ if (!target) {
+ perror("malloc failed");
+ exit(EXIT_FAILURE);
+ }
+
+ /* setup counters */
+ memset(&attr, 0, sizeof(attr));
+ attr.disabled = 1;
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.exclude_guest = 1;
+ attr.bp_type = HW_BREAKPOINT_RW;
+ /* watch middle half of target array */
+ attr.bp_addr = (__u64)(target + 2);
+ attr.bp_len = 4;
+ break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (break_fd < 0) {
+ free(target);
+ perror("sys_perf_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Shouldn't hit. */
+ ioctl(break_fd, PERF_EVENT_IOC_RESET);
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+ temp16 = *((__u16 *)target);
+ *((__u16 *)target) = temp16;
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ if (breaks == 0) {
+ printf("TESTED: No overlap\n");
+ } else {
+ printf("FAILED: No overlap: %lld != 0\n", breaks);
+ fail = 1;
+ }
+
+ /* Hit */
+ ioctl(break_fd, PERF_EVENT_IOC_RESET);
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+ temp16 = *((__u16 *)(target + 1));
+ *((__u16 *)(target + 1)) = temp16;
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ if (breaks == 2) {
+ printf("TESTED: Partial overlap\n");
+ } else {
+ printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+ fail = 1;
+ }
+
+ /* Hit */
+ ioctl(break_fd, PERF_EVENT_IOC_RESET);
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+ temp16 = *((__u16 *)(target + 5));
+ *((__u16 *)(target + 5)) = temp16;
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ if (breaks == 2) {
+ printf("TESTED: Partial overlap\n");
+ } else {
+ printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+ fail = 1;
+ }
+
+ /* Shouldn't Hit */
+ ioctl(break_fd, PERF_EVENT_IOC_RESET);
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+ temp16 = *((__u16 *)(target + 6));
+ *((__u16 *)(target + 6)) = temp16;
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ if (breaks == 0) {
+ printf("TESTED: No overlap\n");
+ } else {
+ printf("FAILED: No overlap: %lld != 0\n", breaks);
+ fail = 1;
+ }
+
+ /* Hit */
+ ioctl(break_fd, PERF_EVENT_IOC_RESET);
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+ temp64 = *((__u64 *)target);
+ *((__u64 *)target) = temp64;
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ if (breaks == 2) {
+ printf("TESTED: Full overlap\n");
+ } else {
+ printf("FAILED: Full overlap: %lld != 2\n", breaks);
+ fail = 1;
+ }
+
+ free(target);
+ close(break_fd);
+ return fail;
+}
+
static int runtest(void)
{
int rwflag;
@@ -172,7 +287,9 @@ static int runtest(void)
return ret;
}
}
- return 0;
+
+ ret = runtest_dar_outside();
+ return ret;
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 3066d310f32b..fc477dfe86a2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -22,321 +22,485 @@
#include <sys/wait.h>
#include "ptrace.h"
-/* Breakpoint access modes */
-enum {
- BP_X = 1,
- BP_RW = 2,
- BP_W = 4,
-};
-
-static pid_t child_pid;
-static struct ppc_debug_info dbginfo;
-
-static void get_dbginfo(void)
-{
- int ret;
+#define SPRN_PVR 0x11F
+#define PVR_8xx 0x00500000
- ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
- if (ret) {
- perror("Can't get breakpoint info\n");
- exit(-1);
- }
-}
+bool is_8xx;
-static bool hwbreak_present(void)
-{
- return (dbginfo.num_data_bps != 0);
-}
+/*
+ * Use volatile on all global var so that compiler doesn't
+ * optimise their load/stores. Otherwise selftest can fail.
+ */
+static volatile __u64 glvar;
-static bool dawr_present(void)
-{
- return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
-}
+#define DAWR_MAX_LEN 512
+static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512)));
-static void set_breakpoint_addr(void *addr)
-{
- int ret;
+#define A_LEN 6
+#define B_LEN 6
+struct gstruct {
+ __u8 a[A_LEN]; /* double word aligned */
+ __u8 b[B_LEN]; /* double word unaligned */
+};
+static volatile struct gstruct gstruct __attribute__((aligned(512)));
- ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
- if (ret) {
- perror("Can't set breakpoint addr\n");
- exit(-1);
- }
-}
-static int set_hwbreakpoint_addr(void *addr, int range)
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
{
- int ret;
-
- struct ppc_hw_breakpoint info;
-
- info.version = 1;
- info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
- info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
- if (range > 0)
- info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
- info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- info.addr = (__u64)addr;
- info.addr2 = (__u64)addr + range;
- info.condition_value = 0;
-
- ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
- if (ret < 0) {
- perror("Can't set breakpoint\n");
+ if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+ perror("Can't get breakpoint info");
exit(-1);
}
- return ret;
}
-static int del_hwbreakpoint_addr(int watchpoint_handle)
+static bool dawr_present(struct ppc_debug_info *dbginfo)
{
- int ret;
-
- ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
- if (ret < 0) {
- perror("Can't delete hw breakpoint\n");
- exit(-1);
- }
- return ret;
+ return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
}
-#define DAWR_LENGTH_MAX 512
-
-/* Dummy variables to test read/write accesses */
-static unsigned long long
- dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
- __attribute__((aligned(512)));
-static unsigned long long *dummy_var = dummy_array;
-
static void write_var(int len)
{
- long long *plval;
- char *pcval;
- short *psval;
- int *pival;
+ __u8 *pcvar;
+ __u16 *psvar;
+ __u32 *pivar;
+ __u64 *plvar;
switch (len) {
case 1:
- pcval = (char *)dummy_var;
- *pcval = 0xff;
+ pcvar = (__u8 *)&glvar;
+ *pcvar = 0xff;
break;
case 2:
- psval = (short *)dummy_var;
- *psval = 0xffff;
+ psvar = (__u16 *)&glvar;
+ *psvar = 0xffff;
break;
case 4:
- pival = (int *)dummy_var;
- *pival = 0xffffffff;
+ pivar = (__u32 *)&glvar;
+ *pivar = 0xffffffff;
break;
case 8:
- plval = (long long *)dummy_var;
- *plval = 0xffffffffffffffffLL;
+ plvar = (__u64 *)&glvar;
+ *plvar = 0xffffffffffffffffLL;
break;
}
}
static void read_var(int len)
{
- char cval __attribute__((unused));
- short sval __attribute__((unused));
- int ival __attribute__((unused));
- long long lval __attribute__((unused));
+ __u8 cvar __attribute__((unused));
+ __u16 svar __attribute__((unused));
+ __u32 ivar __attribute__((unused));
+ __u64 lvar __attribute__((unused));
switch (len) {
case 1:
- cval = *(char *)dummy_var;
+ cvar = (__u8)glvar;
break;
case 2:
- sval = *(short *)dummy_var;
+ svar = (__u16)glvar;
break;
case 4:
- ival = *(int *)dummy_var;
+ ivar = (__u32)glvar;
break;
case 8:
- lval = *(long long *)dummy_var;
+ lvar = (__u64)glvar;
break;
}
}
-/*
- * Do the r/w accesses to trigger the breakpoints. And run
- * the usual traps.
- */
-static void trigger_tests(void)
+static void test_workload(void)
{
- int len, ret;
+ __u8 cvar __attribute__((unused));
+ __u32 ivar __attribute__((unused));
+ int len = 0;
- ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
- if (ret) {
- perror("Can't be traced?\n");
- return;
+ if (ptrace(PTRACE_TRACEME, 0, NULL, 0)) {
+ perror("Child can't be traced?");
+ exit(-1);
}
/* Wake up father so that it sets up the first test */
kill(getpid(), SIGUSR1);
- /* Test write watchpoints */
- for (len = 1; len <= sizeof(long); len <<= 1)
+ /* PTRACE_SET_DEBUGREG, WO test */
+ for (len = 1; len <= sizeof(glvar); len <<= 1)
write_var(len);
- /* Test read/write watchpoints (on read accesses) */
- for (len = 1; len <= sizeof(long); len <<= 1)
+ /* PTRACE_SET_DEBUGREG, RO test */
+ for (len = 1; len <= sizeof(glvar); len <<= 1)
read_var(len);
- /* Test when breakpoint is unset */
-
- /* Test write watchpoints */
- for (len = 1; len <= sizeof(long); len <<= 1)
- write_var(len);
+ /* PTRACE_SET_DEBUGREG, RW test */
+ for (len = 1; len <= sizeof(glvar); len <<= 1) {
+ if (rand() % 2)
+ read_var(len);
+ else
+ write_var(len);
+ }
- /* Test read/write watchpoints (on read accesses) */
- for (len = 1; len <= sizeof(long); len <<= 1)
- read_var(len);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+ write_var(1);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+ read_var(1);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+ if (rand() % 2)
+ write_var(1);
+ else
+ read_var(1);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+ gstruct.a[rand() % A_LEN] = 'a';
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+ cvar = gstruct.a[rand() % A_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+ if (rand() % 2)
+ gstruct.a[rand() % A_LEN] = 'a';
+ else
+ cvar = gstruct.a[rand() % A_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+ gstruct.b[rand() % B_LEN] = 'b';
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+ cvar = gstruct.b[rand() % B_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+ if (rand() % 2)
+ gstruct.b[rand() % B_LEN] = 'b';
+ else
+ cvar = gstruct.b[rand() % B_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+ if (rand() % 2)
+ *((int *)(gstruct.a + 4)) = 10;
+ else
+ ivar = *((int *)(gstruct.a + 4));
+
+ /* PPC_PTRACE_SETHWDEBUG. DAWR_MAX_LEN. RW test */
+ if (rand() % 2)
+ big_var[rand() % DAWR_MAX_LEN] = 'a';
+ else
+ cvar = big_var[rand() % DAWR_MAX_LEN];
}
-static void check_success(const char *msg)
+static void check_success(pid_t child_pid, const char *name, const char *type,
+ unsigned long saddr, int len)
{
- const char *msg2;
int status;
+ siginfo_t siginfo;
+ unsigned long eaddr = (saddr + len - 1) | 0x7;
+
+ saddr &= ~0x7;
/* Wait for the child to SIGTRAP */
wait(&status);
- msg2 = "Failed";
+ ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &siginfo);
- if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
- msg2 = "Child process hit the breakpoint";
+ if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP ||
+ (unsigned long)siginfo.si_addr < saddr ||
+ (unsigned long)siginfo.si_addr > eaddr) {
+ printf("%s, %s, len: %d: Fail\n", name, type, len);
+ exit(-1);
}
- printf("%s Result: [%s]\n", msg, msg2);
+ printf("%s, %s, len: %d: Ok\n", name, type, len);
+
+ if (!is_8xx) {
+ /*
+ * For ptrace registered watchpoint, signal is generated
+ * before executing load/store. Singlestep the instruction
+ * and then continue the test.
+ */
+ ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0);
+ wait(NULL);
+ }
}
-static void launch_watchpoints(char *buf, int mode, int len,
- struct ppc_debug_info *dbginfo, bool dawr)
+static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr)
{
- const char *mode_str;
- unsigned long data = (unsigned long)(dummy_var);
- int wh, range;
-
- data &= ~0x7UL;
-
- if (mode == BP_W) {
- data |= (1UL << 1);
- mode_str = "write";
- } else {
- data |= (1UL << 0);
- data |= (1UL << 1);
- mode_str = "read";
+ if (ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, wp_addr)) {
+ perror("PTRACE_SET_DEBUGREG failed");
+ exit(-1);
}
+}
- /* Set DABR_TRANSLATION bit */
- data |= (1UL << 2);
-
- /* use PTRACE_SET_DEBUGREG breakpoints */
- set_breakpoint_addr((void *)data);
- ptrace(PTRACE_CONT, child_pid, NULL, 0);
- sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
- check_success(buf);
- /* Unregister hw brkpoint */
- set_breakpoint_addr(NULL);
+static int ptrace_sethwdebug(pid_t child_pid, struct ppc_hw_breakpoint *info)
+{
+ int wh = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, info);
- data = (data & ~7); /* remove dabr control bits */
+ if (wh <= 0) {
+ perror("PPC_PTRACE_SETHWDEBUG failed");
+ exit(-1);
+ }
+ return wh;
+}
- /* use PPC_PTRACE_SETHWDEBUG breakpoint */
- if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
- return; /* not supported */
- wh = set_hwbreakpoint_addr((void *)data, 0);
- ptrace(PTRACE_CONT, child_pid, NULL, 0);
- sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
- check_success(buf);
- /* Unregister hw brkpoint */
- del_hwbreakpoint_addr(wh);
-
- /* try a wider range */
- range = 8;
- if (dawr)
- range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
- wh = set_hwbreakpoint_addr((void *)data, range);
- ptrace(PTRACE_CONT, child_pid, NULL, 0);
- sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
- check_success(buf);
- /* Unregister hw brkpoint */
- del_hwbreakpoint_addr(wh);
+static void ptrace_delhwdebug(pid_t child_pid, int wh)
+{
+ if (ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, wh) < 0) {
+ perror("PPC_PTRACE_DELHWDEBUG failed");
+ exit(-1);
+ }
}
-/* Set the breakpoints and check the child successfully trigger them */
-static int launch_tests(bool dawr)
+#define DABR_READ_SHIFT 0
+#define DABR_WRITE_SHIFT 1
+#define DABR_TRANSLATION_SHIFT 2
+
+static int test_set_debugreg(pid_t child_pid)
{
- char buf[1024];
- int len, i, status;
+ unsigned long wp_addr = (unsigned long)&glvar;
+ char *name = "PTRACE_SET_DEBUGREG";
+ int len;
+
+ /* PTRACE_SET_DEBUGREG, WO test*/
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ for (len = 1; len <= sizeof(glvar); len <<= 1) {
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "WO", wp_addr, len);
+ }
- struct ppc_debug_info dbginfo;
+ /* PTRACE_SET_DEBUGREG, RO test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1UL << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ for (len = 1; len <= sizeof(glvar); len <<= 1) {
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RO", wp_addr, len);
+ }
- i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
- if (i) {
- perror("Can't set breakpoint info\n");
- exit(-1);
+ /* PTRACE_SET_DEBUGREG, RW test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1Ul << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ for (len = 1; len <= sizeof(glvar); len <<= 1) {
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
}
- if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
- printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
- /* Write watchpoint */
- for (len = 1; len <= sizeof(long); len <<= 1)
- launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+ ptrace_set_debugreg(child_pid, 0);
+ return 0;
+}
- /* Read-Write watchpoint */
- for (len = 1; len <= sizeof(long); len <<= 1)
- launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
+ unsigned long addr, int len)
+{
+ info->version = 1;
+ info->trigger_type = type;
+ info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ info->addr = (__u64)addr;
+ info->addr2 = (__u64)addr + len;
+ info->condition_value = 0;
+ if (!len)
+ info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ else
+ info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
+static void test_sethwdebug_exact(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr = (unsigned long)&glvar;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+ int len = 1; /* hardcoded in kernel */
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "WO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
- /*
- * Now we have unregistered the breakpoint, access by child
- * should not cause SIGTRAP.
- */
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
- wait(&status);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
- if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
- printf("FAIL: Child process hit the breakpoint, which is not expected\n");
- ptrace(PTRACE_CONT, child_pid, NULL, 0);
- return TEST_FAIL;
- }
+static void test_sethwdebug_range_aligned(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED";
+ int len;
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+ wp_addr = (unsigned long)&gstruct.a;
+ len = A_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "WO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+ wp_addr = (unsigned long)&gstruct.a;
+ len = A_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+ wp_addr = (unsigned long)&gstruct.a;
+ len = A_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
- if (WIFEXITED(status))
- printf("Child exited normally\n");
+static void test_sethwdebug_range_unaligned(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED";
+ int len;
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+ wp_addr = (unsigned long)&gstruct.b;
+ len = B_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "WO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+ wp_addr = (unsigned long)&gstruct.b;
+ len = B_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RO", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+ wp_addr = (unsigned long)&gstruct.b;
+ len = B_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
- return TEST_PASS;
+}
+
+static void test_sethwdebug_range_unaligned_dar(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE";
+ int len;
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+ wp_addr = (unsigned long)&gstruct.b;
+ len = B_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_dawr_max_range(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr;
+ char *name = "PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN";
+ int len;
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW test */
+ wp_addr = (unsigned long)big_var;
+ len = DAWR_MAX_LEN;
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RW", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void
+run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
+{
+ test_set_debugreg(child_pid);
+ if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
+ test_sethwdebug_exact(child_pid);
+
+ test_sethwdebug_range_aligned(child_pid);
+ if (dawr || is_8xx) {
+ test_sethwdebug_range_unaligned(child_pid);
+ test_sethwdebug_range_unaligned_dar(child_pid);
+ test_sethwdebug_dawr_max_range(child_pid);
+ }
+ }
}
static int ptrace_hwbreak(void)
{
- pid_t pid;
- int ret;
+ pid_t child_pid;
+ struct ppc_debug_info dbginfo;
bool dawr;
- pid = fork();
- if (!pid) {
- trigger_tests();
+ child_pid = fork();
+ if (!child_pid) {
+ test_workload();
return 0;
}
wait(NULL);
- child_pid = pid;
+ get_dbginfo(child_pid, &dbginfo);
+ SKIP_IF(dbginfo.num_data_bps == 0);
- get_dbginfo();
- SKIP_IF(!hwbreak_present());
- dawr = dawr_present();
-
- ret = launch_tests(dawr);
+ dawr = dawr_present(&dbginfo);
+ run_tests(child_pid, &dbginfo, dawr);
+ /* Let the child exit first. */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
wait(NULL);
- return ret;
+ /*
+ * Testcases exits immediately with -1 on any failure. If
+ * it has reached here, it means all tests were successful.
+ */
+ return TEST_PASS;
}
int main(int argc, char **argv, char **envp)
{
+ int pvr = 0;
+ asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR));
+ if (pvr == PVR_8xx)
+ is_8xx = true;
+
return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index 25e23e73c72e..2ecfa1158e2b 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -73,7 +73,7 @@ trans:
[sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1),
[dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2),
[tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3)
- : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ : "memory", "r0", "r3", "r4", "r5", "r6", "lr"
);
/* TM failed, analyse */
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index f603fe5a445b..6f7fb51f0809 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -74,8 +74,8 @@ trans:
"3: ;"
: [res] "=r" (result), [texasr] "=r" (texasr)
: [sprn_texasr] "i" (SPRN_TEXASR)
- : "memory", "r0", "r1", "r3", "r4",
- "r7", "r8", "r9", "r10", "r11"
+ : "memory", "r0", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11", "lr"
);
if (result) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index e0d37f07bdeb..46ef378a15ec 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -62,7 +62,7 @@ trans:
[sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR),
[tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2),
[dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1])
- : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ : "memory", "r0", "r3", "r4", "r5", "r6"
);
/* TM failed, analyse */
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 8027457b97b7..70ca01234f79 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -62,8 +62,8 @@ trans:
"3: ;"
: [res] "=r" (result), [texasr] "=r" (texasr)
: [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1])
- : "memory", "r0", "r1", "r3", "r4",
- "r7", "r8", "r9", "r10", "r11"
+ : "memory", "r0", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11", "lr"
);
if (result) {
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
new file mode 100644
index 000000000000..0b969fba3beb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -0,0 +1 @@
+rfi_flush
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 85861c46b445..eadbbff50be6 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+
-TEST_GEN_PROGS := rfi_flush
+TEST_GEN_PROGS := rfi_flush spectre_v2
top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
@@ -8,3 +8,6 @@ CFLAGS += -I../../../../../usr/include
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/spectre_v2: CFLAGS += -m64
+$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
diff --git a/tools/testing/selftests/powerpc/security/branch_loops.S b/tools/testing/selftests/powerpc/security/branch_loops.S
new file mode 100644
index 000000000000..22e9204e3421
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/branch_loops.S
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2019, Michael Ellerman, IBM Corp.
+ */
+
+#include <ppc-asm.h>
+
+ .data
+
+jump_table:
+ .long 0x0
+ .long (.Lstate_1 - .Lstate_0)
+ .long (.Lstate_2 - .Lstate_0)
+ .long (.Lstate_3 - .Lstate_0)
+ .long (.Lstate_4 - .Lstate_0)
+ .long (.Lstate_5 - .Lstate_0)
+ .long (.Lstate_6 - .Lstate_0)
+ .long (.Lstate_7 - .Lstate_0)
+
+ .text
+
+#define ITER_SHIFT 31
+
+.macro state number
+ .balign 32
+.Lstate_\number:
+ .if \number==7
+ li r3, 0
+ .else
+ li r3, \number+1
+ .endif
+ b .Lloop
+.endm
+
+FUNC_START(pattern_cache_loop)
+ li r3, 0
+ li r4, 1
+ sldi r4, r4, ITER_SHIFT
+
+.Lloop: cmpdi r4, 0
+ beqlr
+
+ addi r4, r4, -1
+
+ ld r6, jump_table@got(%r2)
+ sldi r5, r3, 2
+ lwax r6, r5, r6
+ ld r7, .Lstate_0@got(%r2)
+ add r6, r6, r7
+ mtctr r6
+ bctr
+
+ state 0
+ state 1
+ state 2
+ state 3
+ state 4
+ state 5
+ state 6
+ state 7
+
+FUNC_END(pattern_cache_loop)
+
+
+FUNC_START(indirect_branch_loop)
+ li r3, 1
+ sldi r3, r3, ITER_SHIFT
+
+1: cmpdi r3, 0
+ beqlr
+
+ addi r3, r3, -1
+
+ ld r4, 2f@got(%r2)
+ mtctr r4
+ bctr
+
+ .balign 32
+2: b 1b
+
+FUNC_END(indirect_branch_loop)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
new file mode 100644
index 000000000000..8c6b982af2a8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018-2019 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include "utils.h"
+
+#include "../pmu/event.h"
+
+
+extern void pattern_cache_loop(void);
+extern void indirect_branch_loop(void);
+
+static int do_count_loop(struct event *events, bool is_p9, s64 *miss_percent)
+{
+ u64 pred, mpred;
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ if (is_p9)
+ pattern_cache_loop();
+ else
+ indirect_branch_loop();
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ event_read(&events[0]);
+ event_read(&events[1]);
+
+ // We could scale all the events by running/enabled but we're lazy
+ // As long as the PMU is uncontended they should all run
+ FAIL_IF(events[0].result.running != events[0].result.enabled);
+ FAIL_IF(events[1].result.running != events[1].result.enabled);
+
+ pred = events[0].result.value;
+ mpred = events[1].result.value;
+
+ if (is_p9) {
+ event_read(&events[2]);
+ event_read(&events[3]);
+ FAIL_IF(events[2].result.running != events[2].result.enabled);
+ FAIL_IF(events[3].result.running != events[3].result.enabled);
+
+ pred += events[2].result.value;
+ mpred += events[3].result.value;
+ }
+
+ *miss_percent = 100 * mpred / pred;
+
+ return 0;
+}
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+ event_init_named(e, config, name);
+
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+enum spectre_v2_state {
+ VULNERABLE = 0,
+ UNKNOWN = 1, // Works with FAIL_IF()
+ NOT_AFFECTED,
+ BRANCH_SERIALISATION,
+ COUNT_CACHE_DISABLED,
+ COUNT_CACHE_FLUSH_SW,
+ COUNT_CACHE_FLUSH_HW,
+ BTB_FLUSH,
+};
+
+static enum spectre_v2_state get_sysfs_state(void)
+{
+ enum spectre_v2_state state = UNKNOWN;
+ char buf[256];
+ int len;
+
+ memset(buf, 0, sizeof(buf));
+ FAIL_IF(read_sysfs_file("devices/system/cpu/vulnerabilities/spectre_v2", buf, sizeof(buf)));
+
+ // Make sure it's NULL terminated
+ buf[sizeof(buf) - 1] = '\0';
+
+ // Trim the trailing newline
+ len = strlen(buf);
+ FAIL_IF(len < 1);
+ buf[len - 1] = '\0';
+
+ printf("sysfs reports: '%s'\n", buf);
+
+ // Order matters
+ if (strstr(buf, "Vulnerable"))
+ state = VULNERABLE;
+ else if (strstr(buf, "Not affected"))
+ state = NOT_AFFECTED;
+ else if (strstr(buf, "Indirect branch serialisation (kernel only)"))
+ state = BRANCH_SERIALISATION;
+ else if (strstr(buf, "Indirect branch cache disabled"))
+ state = COUNT_CACHE_DISABLED;
+ else if (strstr(buf, "Software count cache flush (hardware accelerated)"))
+ state = COUNT_CACHE_FLUSH_HW;
+ else if (strstr(buf, "Software count cache flush"))
+ state = COUNT_CACHE_FLUSH_SW;
+ else if (strstr(buf, "Branch predictor state flush"))
+ state = BTB_FLUSH;
+
+ return state;
+}
+
+#define PM_BR_PRED_CCACHE 0x040a4 // P8 + P9
+#define PM_BR_MPRED_CCACHE 0x040ac // P8 + P9
+#define PM_BR_PRED_PCACHE 0x048a0 // P9 only
+#define PM_BR_MPRED_PCACHE 0x048b0 // P9 only
+
+#define SPRN_PVR 287
+
+int spectre_v2_test(void)
+{
+ enum spectre_v2_state state;
+ struct event events[4];
+ s64 miss_percent;
+ bool is_p9;
+
+ state = get_sysfs_state();
+ if (state == UNKNOWN) {
+ printf("Error: couldn't determine spectre_v2 mitigation state?\n");
+ return -1;
+ }
+
+ memset(events, 0, sizeof(events));
+
+ setup_event(&events[0], PM_BR_PRED_CCACHE, "PM_BR_PRED_CCACHE");
+ setup_event(&events[1], PM_BR_MPRED_CCACHE, "PM_BR_MPRED_CCACHE");
+ FAIL_IF(event_open(&events[0]));
+ FAIL_IF(event_open_with_group(&events[1], events[0].fd) == -1);
+
+ is_p9 = ((mfspr(SPRN_PVR) >> 16) & 0xFFFF) == 0x4e;
+
+ if (is_p9) {
+ // Count pattern cache too
+ setup_event(&events[2], PM_BR_PRED_PCACHE, "PM_BR_PRED_PCACHE");
+ setup_event(&events[3], PM_BR_MPRED_PCACHE, "PM_BR_MPRED_PCACHE");
+
+ FAIL_IF(event_open_with_group(&events[2], events[0].fd) == -1);
+ FAIL_IF(event_open_with_group(&events[3], events[0].fd) == -1);
+ }
+
+ FAIL_IF(do_count_loop(events, is_p9, &miss_percent));
+
+ event_report_justified(&events[0], 18, 10);
+ event_report_justified(&events[1], 18, 10);
+ event_close(&events[0]);
+ event_close(&events[1]);
+
+ if (is_p9) {
+ event_report_justified(&events[2], 18, 10);
+ event_report_justified(&events[3], 18, 10);
+ event_close(&events[2]);
+ event_close(&events[3]);
+ }
+
+ printf("Miss percent %lld %%\n", miss_percent);
+
+ switch (state) {
+ case VULNERABLE:
+ case NOT_AFFECTED:
+ case COUNT_CACHE_FLUSH_SW:
+ case COUNT_CACHE_FLUSH_HW:
+ // These should all not affect userspace branch prediction
+ if (miss_percent > 15) {
+ printf("Branch misses > 15%% unexpected in this configuration!\n");
+ printf("Possible mis-match between reported & actual mitigation\n");
+ return 1;
+ }
+ break;
+ case BRANCH_SERIALISATION:
+ // This seems to affect userspace branch prediction a bit?
+ if (miss_percent > 25) {
+ printf("Branch misses > 25%% unexpected in this configuration!\n");
+ printf("Possible mis-match between reported & actual mitigation\n");
+ return 1;
+ }
+ break;
+ case COUNT_CACHE_DISABLED:
+ if (miss_percent < 95) {
+ printf("Branch misses < 20%% unexpected in this configuration!\n");
+ printf("Possible mis-match between reported & actual mitigation\n");
+ return 1;
+ }
+ break;
+ case UNKNOWN:
+ case BTB_FLUSH:
+ printf("Not sure!\n");
+ return 1;
+ }
+
+ printf("OK - Measured branch prediction rates match reported spectre v2 mitigation.\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(spectre_v2_test, "spectre_v2");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c
index dade00c698c2..08f9afe3b95c 100644
--- a/tools/testing/selftests/powerpc/signal/sigfuz.c
+++ b/tools/testing/selftests/powerpc/signal/sigfuz.c
@@ -42,7 +42,7 @@
#include "utils.h"
/* Selftest defaults */
-#define COUNT_MAX 4000 /* Number of interactions */
+#define COUNT_MAX 600 /* Number of interactions */
#define THREADS 16 /* Number of threads */
/* Arguments options */
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
index 0b43da74ee46..31a17e0ba884 100644
--- a/tools/testing/selftests/powerpc/stringloops/.gitignore
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -1 +1,4 @@
-memcmp
+memcmp_64
+memcmp_32
+strlen
+strlen_32
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 951fe855f7cd..98f2708d86cc 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -17,3 +17,4 @@ tm-vmx-unavail
tm-unavailable
tm-trap
tm-sigreturn
+tm-poison
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0734ed0ef56..b15a1a325bd0 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
- tm-signal-context-force-tm
+ tm-signal-context-force-tm tm-poison
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
new file mode 100644
index 000000000000..977558497c16
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp.
+ *
+ * This test will spawn two processes. Both will be attached to the same
+ * CPU (CPU 0). The child will be in a loop writing to FP register f31 and
+ * VMX/VEC/Altivec register vr31 a known value, called poison, calling
+ * sched_yield syscall after to allow the parent to switch on the CPU.
+ * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and
+ * vr31 remain 1 as expected until a given timeout (2m). If the issue is
+ * present child's poison will leak into parent's f31 or vr31 registers,
+ * otherwise, poison will never leak into parent's f31 and vr31 registers.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <inttypes.h>
+
+#include "tm.h"
+
+int tm_poison_test(void)
+{
+ int pid;
+ cpu_set_t cpuset;
+ uint64_t poison = 0xdeadbeefc0dec0fe;
+ uint64_t unknown = 0;
+ bool fail_fp = false;
+ bool fail_vr = false;
+
+ SKIP_IF(!have_htm());
+
+ /* Attach both Child and Parent to CPU 0 */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ pid = fork();
+ if (!pid) {
+ /**
+ * child
+ */
+ while (1) {
+ sched_yield();
+ asm (
+ "mtvsrd 31, %[poison];" // f31 = poison
+ "mtvsrd 63, %[poison];" // vr31 = poison
+
+ : : [poison] "r" (poison) : );
+ }
+ }
+
+ /**
+ * parent
+ */
+ asm (
+ /*
+ * Set r3, r4, and f31 to known value 1 before entering
+ * in transaction. They won't be written after that.
+ */
+ " li 3, 0x1 ;"
+ " li 4, 0x1 ;"
+ " mtvsrd 31, 4 ;"
+
+ /*
+ * The Time Base (TB) is a 64-bit counter register that is
+ * independent of the CPU clock and which is incremented
+ * at a frequency of 512000000 Hz, so every 1.953125ns.
+ * So it's necessary 120s/0.000000001953125s = 61440000000
+ * increments to get a 2 minutes timeout. Below we set that
+ * value in r5 and then use r6 to track initial TB value,
+ * updating TB values in r7 at every iteration and comparing it
+ * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail
+ * out since for sure we spent already 2 minutes in the loop.
+ * SPR 268 is the TB register.
+ */
+ " lis 5, 14 ;"
+ " ori 5, 5, 19996 ;"
+ " sldi 5, 5, 16 ;" // r5 = 61440000000
+
+ " mfspr 6, 268 ;" // r6 (TB initial)
+ "1: mfspr 7, 268 ;" // r7 (TB current)
+ " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
+ " cmpd 7, 5 ;"
+ " bgt 3f ;" // yes, exit
+
+ /*
+ * Main loop to check f31
+ */
+ " tbegin. ;" // no, try again
+ " beq 1b ;" // restart if no timeout
+ " mfvsrd 3, 31 ;" // read f31
+ " cmpd 3, 4 ;" // f31 == 1 ?
+ " bne 2f ;" // broken :-(
+ " tabort. 3 ;" // try another transaction
+ "2: tend. ;" // commit transaction
+ "3: mr %[unknown], 3 ;" // record r3
+
+ : [unknown] "=r" (unknown)
+ :
+ : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31"
+
+ );
+
+ /*
+ * On leak 'unknown' will contain 'poison' value from child,
+ * otherwise (no leak) 'unknown' will contain the same value
+ * as r3 before entering in transactional mode, i.e. 0x1.
+ */
+ fail_fp = unknown != 0x1;
+ if (fail_fp)
+ printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown);
+ else
+ printf("Good, no poison or leaked value into FP registers\n");
+
+ asm (
+ /*
+ * Set r3, r4, and vr31 to known value 1 before entering
+ * in transaction. They won't be written after that.
+ */
+ " li 3, 0x1 ;"
+ " li 4, 0x1 ;"
+ " mtvsrd 63, 4 ;"
+
+ " lis 5, 14 ;"
+ " ori 5, 5, 19996 ;"
+ " sldi 5, 5, 16 ;" // r5 = 61440000000
+
+ " mfspr 6, 268 ;" // r6 (TB initial)
+ "1: mfspr 7, 268 ;" // r7 (TB current)
+ " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
+ " cmpd 7, 5 ;"
+ " bgt 3f ;" // yes, exit
+
+ /*
+ * Main loop to check vr31
+ */
+ " tbegin. ;" // no, try again
+ " beq 1b ;" // restart if no timeout
+ " mfvsrd 3, 63 ;" // read vr31
+ " cmpd 3, 4 ;" // vr31 == 1 ?
+ " bne 2f ;" // broken :-(
+ " tabort. 3 ;" // try another transaction
+ "2: tend. ;" // commit transaction
+ "3: mr %[unknown], 3 ;" // record r3
+
+ : [unknown] "=r" (unknown)
+ :
+ : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63"
+
+ );
+
+ /*
+ * On leak 'unknown' will contain 'poison' value from child,
+ * otherwise (no leak) 'unknown' will contain the same value
+ * as r3 before entering in transactional mode, i.e. 0x1.
+ */
+ fail_vr = unknown != 0x1;
+ if (fail_vr)
+ printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown);
+ else
+ printf("Good, no poison or leaked value into VEC registers\n");
+
+ kill(pid, SIGKILL);
+
+ return (fail_fp | fail_vr);
+}
+
+int main(int argc, char *argv[])
+{
+ /* Test completes in about 4m */
+ test_harness_set_timeout(250);
+ return test_harness(tm_poison_test, "tm_poison_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
index d57c2d2ab6ec..254f912ad611 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -5,10 +5,11 @@
* Test the kernel's signal frame code.
*
* The kernel sets up two sets of ucontexts if the signal was to be
- * delivered while the thread was in a transaction.
+ * delivered while the thread was in a transaction (referred too as
+ * first and second contexts).
* Expected behaviour is that the checkpointed state is in the user
- * context passed to the signal handler. The speculated state can be
- * accessed with the uc_link pointer.
+ * context passed to the signal handler (first context). The speculated
+ * state can be accessed with the uc_link pointer (second context).
*
* The rationale for this is that if TM unaware code (which linked
* against TM libs) installs a signal handler it will not know of the
@@ -28,17 +29,20 @@
#define MAX_ATTEMPT 500000
-#define NV_FPU_REGS 18
+#define NV_FPU_REGS 18 /* Number of non-volatile FP registers */
+#define FPR14 14 /* First non-volatile FP register to check in f14-31 subset */
long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
-/* Be sure there are 2x as many as there are NV FPU regs (2x18) */
+/* Test only non-volatile registers, i.e. 18 fpr registers from f14 to f31 */
static double fps[] = {
+ /* First context will be set with these values, i.e. non-speculative */
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ /* Second context will be set with these values, i.e. speculative */
-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18
};
-static sig_atomic_t fail;
+static sig_atomic_t fail, broken;
static void signal_usr1(int signum, siginfo_t *info, void *uc)
{
@@ -46,11 +50,24 @@ static void signal_usr1(int signum, siginfo_t *info, void *uc)
ucontext_t *ucp = uc;
ucontext_t *tm_ucp = ucp->uc_link;
- for (i = 0; i < NV_FPU_REGS && !fail; i++) {
- fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]);
- fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]);
- if (fail)
- printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]);
+ for (i = 0; i < NV_FPU_REGS; i++) {
+ /* Check first context. Print all mismatches. */
+ fail = (ucp->uc_mcontext.fp_regs[FPR14 + i] != fps[i]);
+ if (fail) {
+ broken = 1;
+ printf("FPR%d (1st context) == %g instead of %g (expected)\n",
+ FPR14 + i, ucp->uc_mcontext.fp_regs[FPR14 + i], fps[i]);
+ }
+ }
+
+ for (i = 0; i < NV_FPU_REGS; i++) {
+ /* Check second context. Print all mismatches. */
+ fail = (tm_ucp->uc_mcontext.fp_regs[FPR14 + i] != fps[NV_FPU_REGS + i]);
+ if (fail) {
+ broken = 1;
+ printf("FPR%d (2nd context) == %g instead of %g (expected)\n",
+ FPR14 + i, tm_ucp->uc_mcontext.fp_regs[FPR14 + i], fps[NV_FPU_REGS + i]);
+ }
}
}
@@ -72,13 +89,19 @@ static int tm_signal_context_chk_fpu()
}
i = 0;
- while (i < MAX_ATTEMPT && !fail) {
+ while (i < MAX_ATTEMPT && !broken) {
+ /*
+ * tm_signal_self_context_load will set both first and second
+ * contexts accordingly to the values passed through non-NULL
+ * array pointers to it, in that case 'fps', and invoke the
+ * signal handler installed for SIGUSR1.
+ */
rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL);
FAIL_IF(rc != pid);
i++;
}
- return fail;
+ return (broken);
}
int main(void)
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
index 4d05f8b0254c..0cc680f61828 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -5,10 +5,11 @@
* Test the kernel's signal frame code.
*
* The kernel sets up two sets of ucontexts if the signal was to be
- * delivered while the thread was in a transaction.
+ * delivered while the thread was in a transaction (referred too as
+ * first and second contexts).
* Expected behaviour is that the checkpointed state is in the user
- * context passed to the signal handler. The speculated state can be
- * accessed with the uc_link pointer.
+ * context passed to the signal handler (first context). The speculated
+ * state can be accessed with the uc_link pointer (second context).
*
* The rationale for this is that if TM unaware code (which linked
* against TM libs) installs a signal handler it will not know of the
@@ -28,14 +29,22 @@
#define MAX_ATTEMPT 500000
-#define NV_GPR_REGS 18
+#define NV_GPR_REGS 18 /* Number of non-volatile GPR registers */
+#define R14 14 /* First non-volatile register to check in r14-r31 subset */
long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
-static sig_atomic_t fail;
+static sig_atomic_t fail, broken;
-static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
- -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18};
+/* Test only non-volatile general purpose registers, i.e. r14-r31 */
+static long gprs[] = {
+ /* First context will be set with these values, i.e. non-speculative */
+ /* R14, R15, ... */
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ /* Second context will be set with these values, i.e. speculative */
+ /* R14, R15, ... */
+ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18
+};
static void signal_usr1(int signum, siginfo_t *info, void *uc)
{
@@ -43,12 +52,24 @@ static void signal_usr1(int signum, siginfo_t *info, void *uc)
ucontext_t *ucp = uc;
ucontext_t *tm_ucp = ucp->uc_link;
- for (i = 0; i < NV_GPR_REGS && !fail; i++) {
- fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]);
- fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]);
- if (fail)
- printf("Failed on %d GPR %lu or %lu\n", i,
- ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]);
+ /* Check first context. Print all mismatches. */
+ for (i = 0; i < NV_GPR_REGS; i++) {
+ fail = (ucp->uc_mcontext.gp_regs[R14 + i] != gprs[i]);
+ if (fail) {
+ broken = 1;
+ printf("GPR%d (1st context) == %lu instead of %lu (expected)\n",
+ R14 + i, ucp->uc_mcontext.gp_regs[R14 + i], gprs[i]);
+ }
+ }
+
+ /* Check second context. Print all mismatches. */
+ for (i = 0; i < NV_GPR_REGS; i++) {
+ fail = (tm_ucp->uc_mcontext.gp_regs[R14 + i] != gprs[NV_GPR_REGS + i]);
+ if (fail) {
+ broken = 1;
+ printf("GPR%d (2nd context) == %lu instead of %lu (expected)\n",
+ R14 + i, tm_ucp->uc_mcontext.gp_regs[R14 + i], gprs[NV_GPR_REGS + i]);
+ }
}
}
@@ -70,13 +91,19 @@ static int tm_signal_context_chk_gpr()
}
i = 0;
- while (i < MAX_ATTEMPT && !fail) {
- rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL);
+ while (i < MAX_ATTEMPT && !broken) {
+ /*
+ * tm_signal_self_context_load will set both first and second
+ * contexts accordingly to the values passed through non-NULL
+ * array pointers to it, in that case 'gprs', and invoke the
+ * signal handler installed for SIGUSR1.
+ */
+ rc = tm_signal_self_context_load(pid, gprs, NULL, NULL, NULL);
FAIL_IF(rc != pid);
i++;
}
- return fail;
+ return broken;
}
int main(void)
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
index 48ad01499b1a..b6d52730a0d8 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -5,10 +5,11 @@
* Test the kernel's signal frame code.
*
* The kernel sets up two sets of ucontexts if the signal was to be
- * delivered while the thread was in a transaction.
+ * delivered while the thread was in a transaction (referred too as
+ * first and second contexts).
* Expected behaviour is that the checkpointed state is in the user
- * context passed to the signal handler. The speculated state can be
- * accessed with the uc_link pointer.
+ * context passed to the signal handler (first context). The speculated
+ * state can be accessed with the uc_link pointer (second context).
*
* The rationale for this is that if TM unaware code (which linked
* against TM libs) installs a signal handler it will not know of the
@@ -29,18 +30,24 @@
#define MAX_ATTEMPT 500000
-#define NV_VMX_REGS 12
+#define NV_VMX_REGS 12 /* Number of non-volatile VMX registers */
+#define VMX20 20 /* First non-volatile register to check in vr20-31 subset */
long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
-static sig_atomic_t fail;
+static sig_atomic_t fail, broken;
+/* Test only non-volatile registers, i.e. 12 vmx registers from vr20 to vr31 */
vector int vms[] = {
- {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+ /* First context will be set with these values, i.e. non-speculative */
+ /* VMX20 , VMX21 , ... */
+ { 1, 2, 3, 4},{ 5, 6, 7, 8},{ 9,10,11,12},
{13,14,15,16},{17,18,19,20},{21,22,23,24},
{25,26,27,28},{29,30,31,32},{33,34,35,36},
{37,38,39,40},{41,42,43,44},{45,46,47,48},
- {-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12},
+ /* Second context will be set with these values, i.e. speculative */
+ /* VMX20 , VMX21 , ... */
+ { -1, -2, -3, -4},{ -5, -6, -7, -8},{ -9,-10,-11,-12},
{-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
{-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
{-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
@@ -48,26 +55,43 @@ vector int vms[] = {
static void signal_usr1(int signum, siginfo_t *info, void *uc)
{
- int i;
+ int i, j;
ucontext_t *ucp = uc;
ucontext_t *tm_ucp = ucp->uc_link;
- for (i = 0; i < NV_VMX_REGS && !fail; i++) {
- fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20],
+ for (i = 0; i < NV_VMX_REGS; i++) {
+ /* Check first context. Print all mismatches. */
+ fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[VMX20 + i],
&vms[i], sizeof(vector int));
- fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20],
- &vms[i + NV_VMX_REGS], sizeof (vector int));
-
if (fail) {
- int j;
+ broken = 1;
+ printf("VMX%d (1st context) == 0x", VMX20 + i);
+ /* Print actual value in first context. */
+ for (j = 0; j < 4; j++)
+ printf("%08x", ucp->uc_mcontext.v_regs->vrregs[VMX20 + i][j]);
+ printf(" instead of 0x");
+ /* Print expected value. */
+ for (j = 0; j < 4; j++)
+ printf("%08x", vms[i][j]);
+ printf(" (expected)\n");
+ }
+ }
- fprintf(stderr, "Failed on %d vmx 0x", i);
+ for (i = 0; i < NV_VMX_REGS; i++) {
+ /* Check second context. Print all mismatches. */
+ fail = memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[VMX20 + i],
+ &vms[NV_VMX_REGS + i], sizeof (vector int));
+ if (fail) {
+ broken = 1;
+ printf("VMX%d (2nd context) == 0x", NV_VMX_REGS + i);
+ /* Print actual value in second context. */
+ for (j = 0; j < 4; j++)
+ printf("%08x", tm_ucp->uc_mcontext.v_regs->vrregs[VMX20 + i][j]);
+ printf(" instead of 0x");
+ /* Print expected value. */
for (j = 0; j < 4; j++)
- fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
- fprintf(stderr, " vs 0x");
- for (j = 0 ; j < 4; j++)
- fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
- fprintf(stderr, "\n");
+ printf("%08x", vms[NV_VMX_REGS + i][j]);
+ printf(" (expected)\n");
}
}
}
@@ -90,13 +114,19 @@ static int tm_signal_context_chk()
}
i = 0;
- while (i < MAX_ATTEMPT && !fail) {
+ while (i < MAX_ATTEMPT && !broken) {
+ /*
+ * tm_signal_self_context_load will set both first and second
+ * contexts accordingly to the values passed through non-NULL
+ * array pointers to it, in that case 'vms', and invoke the
+ * signal handler installed for SIGUSR1.
+ */
rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL);
FAIL_IF(rc != pid);
i++;
}
- return fail;
+ return (broken);
}
int main(void)
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
index 8c8677a408bb..8e25e2072ecd 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -5,10 +5,11 @@
* Test the kernel's signal frame code.
*
* The kernel sets up two sets of ucontexts if the signal was to be
- * delivered while the thread was in a transaction.
+ * delivered while the thread was in a transaction (referred too as
+ * first and second contexts).
* Expected behaviour is that the checkpointed state is in the user
- * context passed to the signal handler. The speculated state can be
- * accessed with the uc_link pointer.
+ * context passed to the signal handler (first context). The speculated
+ * state can be accessed with the uc_link pointer (second context).
*
* The rationale for this is that if TM unaware code (which linked
* against TM libs) installs a signal handler it will not know of the
@@ -29,17 +30,24 @@
#define MAX_ATTEMPT 500000
-#define NV_VSX_REGS 12
+#define NV_VSX_REGS 12 /* Number of VSX registers to check. */
+#define VSX20 20 /* First VSX register to check in vsr20-vsr31 subset */
+#define FPR20 20 /* FPR20 overlaps VSX20 most significant doubleword */
long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
-static sig_atomic_t fail;
+static sig_atomic_t fail, broken;
-vector int vss[] = {
- {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+/* Test only 12 vsx registers from vsr20 to vsr31 */
+vector int vsxs[] = {
+ /* First context will be set with these values, i.e. non-speculative */
+ /* VSX20 , VSX21 , ... */
+ { 1, 2, 3, 4},{ 5, 6, 7, 8},{ 9,10,11,12},
{13,14,15,16},{17,18,19,20},{21,22,23,24},
{25,26,27,28},{29,30,31,32},{33,34,35,36},
{37,38,39,40},{41,42,43,44},{45,46,47,48},
+ /* Second context will be set with these values, i.e. speculative */
+ /* VSX20 , VSX21 , ... */
{-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12},
{-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
{-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
@@ -48,41 +56,91 @@ vector int vss[] = {
static void signal_usr1(int signum, siginfo_t *info, void *uc)
{
- int i;
- uint8_t vsc[sizeof(vector int)];
- uint8_t vst[sizeof(vector int)];
+ int i, j;
+ uint8_t vsx[sizeof(vector int)];
+ uint8_t vsx_tm[sizeof(vector int)];
ucontext_t *ucp = uc;
ucontext_t *tm_ucp = ucp->uc_link;
/*
- * The other half of the VSX regs will be after v_regs.
+ * FP registers and VMX registers overlap the VSX registers.
+ *
+ * FP registers (f0-31) overlap the most significant 64 bits of VSX
+ * registers vsr0-31, whilst VMX registers vr0-31, being 128-bit like
+ * the VSX registers, overlap fully the other half of VSX registers,
+ * i.e. vr0-31 overlaps fully vsr32-63.
+ *
+ * Due to compatibility and historical reasons (VMX/Altivec support
+ * appeared first on the architecture), VMX registers vr0-31 (so VSX
+ * half vsr32-63 too) are stored right after the v_regs pointer, in an
+ * area allocated for 'vmx_reverse' array (please see
+ * arch/powerpc/include/uapi/asm/sigcontext.h for details about the
+ * mcontext_t structure on Power).
+ *
+ * The other VSX half (vsr0-31) is hence stored below vr0-31/vsr32-63
+ * registers, but only the least significant 64 bits of vsr0-31. The
+ * most significant 64 bits of vsr0-31 (f0-31), as it overlaps the FP
+ * registers, is kept in fp_regs.
+ *
+ * v_regs is a 16 byte aligned pointer at the start of vmx_reserve
+ * (vmx_reserve may or may not be 16 aligned) where the v_regs structure
+ * exists, so v_regs points to where vr0-31 / vsr32-63 registers are
+ * fully stored. Since v_regs type is elf_vrregset_t, v_regs + 1
+ * skips all the slots used to store vr0-31 / vsr32-64 and points to
+ * part of one VSX half, i.e. v_regs + 1 points to the least significant
+ * 64 bits of vsr0-31. The other part of this half (the most significant
+ * part of vsr0-31) is stored in fp_regs.
*
- * In short, vmx_reserve array holds everything. v_regs is a 16
- * byte aligned pointer at the start of vmx_reserve (vmx_reserve
- * may or may not be 16 aligned) where the v_regs structure exists.
- * (half of) The VSX regsters are directly after v_regs so the
- * easiest way to find them below.
*/
+ /* Get pointer to least significant doubleword of vsr0-31 */
long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1);
long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1);
- for (i = 0; i < NV_VSX_REGS && !fail; i++) {
- memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8);
- memcpy(vsc + 8, &vsx_ptr[20 + i], 8);
- fail = memcmp(vsc, &vss[i], sizeof(vector int));
- memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8);
- memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8);
- fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int));
- if (fail) {
- int j;
+ /* Check first context. Print all mismatches. */
+ for (i = 0; i < NV_VSX_REGS; i++) {
+ /*
+ * Copy VSX most significant doubleword from fp_regs and
+ * copy VSX least significant one from 64-bit slots below
+ * saved VMX registers.
+ */
+ memcpy(vsx, &ucp->uc_mcontext.fp_regs[FPR20 + i], 8);
+ memcpy(vsx + 8, &vsx_ptr[VSX20 + i], 8);
+
+ fail = memcmp(vsx, &vsxs[i], sizeof(vector int));
- fprintf(stderr, "Failed on %d vsx 0x", i);
+ if (fail) {
+ broken = 1;
+ printf("VSX%d (1st context) == 0x", VSX20 + i);
for (j = 0; j < 16; j++)
- fprintf(stderr, "%02x", vsc[j]);
- fprintf(stderr, " vs 0x");
+ printf("%02x", vsx[j]);
+ printf(" instead of 0x");
+ for (j = 0; j < 4; j++)
+ printf("%08x", vsxs[i][j]);
+ printf(" (expected)\n");
+ }
+ }
+
+ /* Check second context. Print all mismatches. */
+ for (i = 0; i < NV_VSX_REGS; i++) {
+ /*
+ * Copy VSX most significant doubleword from fp_regs and
+ * copy VSX least significant one from 64-bit slots below
+ * saved VMX registers.
+ */
+ memcpy(vsx_tm, &tm_ucp->uc_mcontext.fp_regs[FPR20 + i], 8);
+ memcpy(vsx_tm + 8, &tm_vsx_ptr[VSX20 + i], 8);
+
+ fail = memcmp(vsx_tm, &vsxs[NV_VSX_REGS + i], sizeof(vector int));
+
+ if (fail) {
+ broken = 1;
+ printf("VSX%d (2nd context) == 0x", VSX20 + i);
for (j = 0; j < 16; j++)
- fprintf(stderr, "%02x", vst[j]);
- fprintf(stderr, "\n");
+ printf("%02x", vsx_tm[j]);
+ printf(" instead of 0x");
+ for (j = 0; j < 4; j++)
+ printf("%08x", vsxs[NV_VSX_REGS + i][j]);
+ printf("(expected)\n");
}
}
}
@@ -105,13 +163,19 @@ static int tm_signal_context_chk()
}
i = 0;
- while (i < MAX_ATTEMPT && !fail) {
- rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss);
+ while (i < MAX_ATTEMPT && !broken) {
+ /*
+ * tm_signal_self_context_load will set both first and second
+ * contexts accordingly to the values passed through non-NULL
+ * array pointers to it, in that case 'vsxs', and invoke the
+ * signal handler installed for SIGUSR1.
+ */
+ rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vsxs);
FAIL_IF(rc != pid);
i++;
}
- return fail;
+ return (broken);
}
int main(void)
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
index 56fbf9f6bbf3..07c388147b75 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -10,10 +10,12 @@
*/
#define _GNU_SOURCE
+#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include "utils.h"
+#include "tm.h"
void trap_signal_handler(int signo, siginfo_t *si, void *uc)
{
@@ -29,6 +31,8 @@ int tm_signal_sigreturn_nt(void)
{
struct sigaction trap_sa;
+ SKIP_IF(!have_htm());
+
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index 97f9f491c541..c402464b038f 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -55,7 +55,8 @@ static inline bool failure_is_unavailable(void)
static inline bool failure_is_reschedule(void)
{
if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
- (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+ (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED ||
+ (failure_code() & TM_CAUSE_KVM_FAC_UNAV) == TM_CAUSE_KVM_FAC_UNAV)
return true;
return false;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index c02d24835db4..5ee0e98c4896 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -127,6 +127,26 @@ bool is_ppc64le(void)
return strcmp(uts.machine, "ppc64le") == 0;
}
+int read_sysfs_file(char *fpath, char *result, size_t result_size)
+{
+ char path[PATH_MAX] = "/sys/";
+ int rc = -1, fd;
+
+ strncat(path, fpath, PATH_MAX - strlen(path) - 1);
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ return rc;
+
+ rc = read(fd, result, result_size);
+
+ close(fd);
+
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
int read_debugfs_file(char *debugfs_file, int *result)
{
int rc = -1, fd;
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 47b7473dedef..e6aa00a183bc 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -47,7 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
int main(void)
{
const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
- const unsigned long va_max = 1UL << 32;
+ /*
+ * va_max must be enough bigger than vm.mmap_min_addr, which is
+ * 64KB/32KB by default. (depends on CONFIG_LSM_MMAP_MIN_ADDR)
+ */
+ const unsigned long va_max = 1UL << 20;
unsigned long va;
void *p;
int fd;
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index bd4a7247b44f..c0dd10257df5 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx)
}
#endif
+static void show_flag_test(int rq_index, unsigned int flags, int err)
+{
+ printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n",
+ rq_index ? '1' + rq_index : ' ',
+ flags, err, strerror(errno));
+ /* sigh, uClibc ... */
+ errno = 0;
+}
+
+static void do_flag_test(int fd, unsigned int index)
+{
+ struct ptp_extts_request extts_request;
+ unsigned long request[2] = {
+ PTP_EXTTS_REQUEST,
+ PTP_EXTTS_REQUEST2,
+ };
+ unsigned int enable_flags[5] = {
+ PTP_ENABLE_FEATURE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1),
+ };
+ int err, i, j;
+
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 5; j++) {
+ extts_request.flags = enable_flags[j];
+ err = ioctl(fd, request[i], &extts_request);
+ show_flag_test(i, extts_request.flags, err);
+
+ extts_request.flags = 0;
+ err = ioctl(fd, request[i], &extts_request);
+ }
+ }
+}
+
static clockid_t get_clockid(int fd)
{
#define CLOCKFD 3
@@ -96,7 +136,8 @@ static void usage(char *progname)
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
" -t val shift the ptp clock time by 'val' seconds\n"
- " -T val set the ptp clock time to 'val' seconds\n",
+ " -T val set the ptp clock time to 'val' seconds\n"
+ " -z test combinations of rising/falling external time stamp flags\n",
progname);
}
@@ -122,6 +163,7 @@ int main(int argc, char *argv[])
int adjtime = 0;
int capabilities = 0;
int extts = 0;
+ int flagtest = 0;
int gettime = 0;
int index = 0;
int list_pins = 0;
@@ -138,7 +180,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -191,6 +233,9 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'z':
+ flagtest = 1;
+ break;
case 'h':
usage(progname);
return 0;
@@ -322,6 +367,10 @@ int main(int argc, char *argv[])
}
}
+ if (flagtest) {
+ do_flag_test(fd, index);
+ }
+
if (list_pins) {
int n_pins = 0;
if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index 4e9485590c10..1dbfb62567d2 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -15,8 +15,15 @@ then
exit 0
fi
ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
-idlecpus=`mpstat | tail -1 | \
- awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
+if mpstat -V > /dev/null 2>&1
+then
+ idlecpus=`mpstat | tail -1 | \
+ awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
+else
+ # No mpstat command, so use all available CPUs.
+ echo The mpstat command is not available, so greedily using all CPUs.
+ idlecpus=$ncpus
+fi
awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
BEGIN {
cpus2use = idlecpus;
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index dc49a3ba6111..30cb5b27d32e 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -23,25 +23,39 @@ spinmax=${4-1000}
n=1
-starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+starttime=`gawk 'BEGIN { print systime(); }' < /dev/null`
+
+nohotplugcpus=
+for i in /sys/devices/system/cpu/cpu[0-9]*
+do
+ if test -f $i/online
+ then
+ :
+ else
+ curcpu=`echo $i | sed -e 's/^[^0-9]*//'`
+ nohotplugcpus="$nohotplugcpus $curcpu"
+ fi
+done
while :
do
# Check for done.
- t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+ t=`gawk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
if test "$t" -gt "$duration"
then
exit 0;
fi
# Set affinity to randomly selected online CPU
- cpus=`grep 1 /sys/devices/system/cpu/*/online |
- sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
-
- # Do not leave out poor old cpu0 which may not be hot-pluggable
- if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then
- cpus="0 $cpus"
+ if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
+ then
+ :
+ else
+ cpus=
fi
+ # Do not leave out non-hot-pluggable CPUs
+ cpus="$cpus $nohotplugcpus"
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 2a7f3f4756a7..9d9a41625dd9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,6 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
+fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`"
if test -z "$ngps"
then
echo "$configfile ------- " $stopstate
@@ -39,7 +40,7 @@ else
BEGIN { print ngps / dur }' < /dev/null`
title="$title ($ngpsps/s)"
fi
- echo $title $stopstate
+ echo $title $stopstate $fwdprog
nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
if test -z "$nclosecalls"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 27b7b5693ede..e0352304b98b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -123,7 +123,7 @@ qemu_args=$5
boot_args=$6
cd $KVM
-kstarttime=`awk 'BEGIN { print systime() }' < /dev/null`
+kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
if test -z "$TORTURE_BUILDONLY"
then
echo ' ---' `date`: Starting kernel
@@ -133,11 +133,10 @@ fi
qemu_args="-enable-kvm -nographic $qemu_args"
cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
-vcpus=`identify_qemu_vcpus`
-if test $cpu_count -gt $vcpus
+if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"
then
- echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings
- cpu_count=$vcpus
+ echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings
+ cpu_count=$TORTURE_ALLOTED_CPUS
fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
@@ -177,7 +176,7 @@ do
then
qemu_pid=`cat "$resdir/qemu_pid"`
fi
- kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
then
if test $kruntime -ge $seconds
@@ -213,7 +212,7 @@ then
oldline="`tail $resdir/console.log`"
while :
do
- kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if kill -0 $qemu_pid > /dev/null 2>&1
then
:
@@ -227,7 +226,7 @@ then
must_continue=yes
fi
last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
- if test -z "last_ts"
+ if test -z "$last_ts"
then
last_ts=0
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 72518580df23..78d18ab8e954 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -24,7 +24,9 @@ dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
-TORTURE_ALLOTED_CPUS=""
+. functions.sh
+
+TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
@@ -40,8 +42,6 @@ cpus=0
ds=`date +%Y.%m.%d-%H:%M:%S`
jitter="-1"
-. functions.sh
-
usage () {
echo "Usage: $scriptname optional arguments:"
echo " --bootargs kernel-boot-arguments"
@@ -93,6 +93,11 @@ do
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
TORTURE_ALLOTED_CPUS="$2"
+ max_cpus="`identify_qemu_vcpus`"
+ if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
+ then
+ TORTURE_ALLOTED_CPUS=$max_cpus
+ fi
shift
;;
--datestamp)
@@ -198,9 +203,10 @@ fi
CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
if test -z "$configs"
then
- configs="`cat $CONFIGFRAG/CFLIST`"
+ configs=$defaultconfigs
fi
if test -z "$resdir"
@@ -209,7 +215,7 @@ then
fi
# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
-touch $T/cfgcpu
+configs_derep=
for CF in $configs
do
case $CF in
@@ -222,15 +228,21 @@ do
CF1=$CF
;;
esac
+ for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+ do
+ configs_derep="$configs_derep $CF1"
+ done
+done
+touch $T/cfgcpu
+configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
+for CF1 in $configs_derep
+do
if test -f "$CONFIGFRAG/$CF1"
then
cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
- for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
- do
- echo $CF1 $cpu_count >> $T/cfgcpu
- done
+ echo $CF1 $cpu_count >> $T/cfgcpu
else
echo "The --configs file $CF1 does not exist, terminating."
exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 6fa9bd1ddc09..38e424d2392c 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -20,58 +20,9 @@ if [ -s "$D/initrd/init" ]; then
exit 0
fi
-T=${TMPDIR-/tmp}/mkinitrd.sh.$$
-trap 'rm -rf $T' 0 2
-mkdir $T
-
-cat > $T/init << '__EOF___'
-#!/bin/sh
-# Run in userspace a few milliseconds every second. This helps to
-# exercise the NO_HZ_FULL portions of RCU. The 192 instances of "a" was
-# empirically shown to give a nice multi-millisecond burst of user-mode
-# execution on a 2GHz CPU, as desired. Modern CPUs will vary from a
-# couple of milliseconds up to perhaps 100 milliseconds, which is an
-# acceptable range.
-#
-# Why not calibrate an exact delay? Because within this initrd, we
-# are restricted to Bourne-shell builtins, which as far as I know do not
-# provide any means of obtaining a fine-grained timestamp.
-
-a4="a a a a"
-a16="$a4 $a4 $a4 $a4"
-a64="$a16 $a16 $a16 $a16"
-a192="$a64 $a64 $a64"
-while :
-do
- q=
- for i in $a192
- do
- q="$q $i"
- done
- sleep 1
-done
-__EOF___
-
-# Try using dracut to create initrd
-if command -v dracut >/dev/null 2>&1
-then
- echo Creating $D/initrd using dracut.
- # Filesystem creation
- dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img
- cd $D
- mkdir -p initrd
- cd initrd
- zcat $T/initramfs.img | cpio -id
- cp $T/init init
- chmod +x init
- echo Done creating $D/initrd using dracut
- exit 0
-fi
-
-# No dracut, so create a C-language initrd/init program and statically
-# link it. This results in a very small initrd, but might be a bit less
-# future-proof than dracut.
-echo "Could not find dracut, attempting C initrd"
+# Create a C-language initrd/init infinite-loop program and statically
+# link it. This results in a very small initrd.
+echo "Creating a statically linked C-language initrd"
cd $D
mkdir -p initrd
cd initrd
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index 28568b72a31b..ea4399020c6c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -1,8 +1,5 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 35e639e39366..65daee4fbf5a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
CONFIG_RCU_NOCB_CPU=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 5c3213cc3ad7..1c218944b1e9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=12
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
+threadirqs
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 24c9f6012e35..f6d6a40c0576 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=4
CONFIG_RCU_FANOUT_LEAF=3
CONFIG_DEBUG_LOCK_ALLOC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 05a4eec3f27b..bf4980d606b5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
CONFIG_RCU_NOCB_CPU=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index fb1c763c10c5..c810c5276a89 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=2
CONFIG_RCU_NOCB_CPU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 6710e749d9de..8523a7515cbf 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -8,9 +8,6 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
index 4d8eb5bfb6f6..5d546efa68e8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
@@ -6,9 +6,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index af6fca03602f..1b96d68473b8 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -6,7 +6,6 @@ Kconfig Parameters:
CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not.
CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
-CONFIG_HOTPLUG_CPU -- Do half. (Every second.)
CONFIG_HZ_PERIODIC -- Do one.
CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index eec2663261f2..e8a657a5f48a 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -15,7 +15,7 @@
#include <errno.h>
#include <stddef.h>
-static inline pid_t gettid(void)
+static inline pid_t rseq_gettid(void)
{
return syscall(__NR_gettid);
}
@@ -373,11 +373,12 @@ void *test_percpu_spinlock_thread(void *arg)
rseq_percpu_unlock(&data->lock, cpu);
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
- printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
#endif
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && thread_data->reg &&
rseq_unregister_current_thread())
abort();
@@ -454,11 +455,12 @@ void *test_percpu_inc_thread(void *arg)
} while (rseq_unlikely(ret));
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
- printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
#endif
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && thread_data->reg &&
rseq_unregister_current_thread())
abort();
@@ -605,7 +607,7 @@ void *test_percpu_list_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
@@ -796,7 +798,7 @@ void *test_percpu_buffer_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
@@ -1011,7 +1013,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index d40d60e7499e..3f63eb362b92 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -149,11 +149,13 @@ static inline void rseq_clear_rseq_cs(void)
/*
* rseq_prepare_unload() should be invoked by each thread executing a rseq
* critical section at least once between their last critical section and
- * library unload of the library defining the rseq critical section
- * (struct rseq_cs). This also applies to use of rseq in code generated by
- * JIT: rseq_prepare_unload() should be invoked at least once by each
- * thread executing a rseq critical section before reclaim of the memory
- * holding the struct rseq_cs.
+ * library unload of the library defining the rseq critical section (struct
+ * rseq_cs) or the code referred to by the struct rseq_cs start_ip and
+ * post_commit_offset fields. This also applies to use of rseq in code
+ * generated by JIT: rseq_prepare_unload() should be invoked at least once by
+ * each thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs or reclaim of the code pointed to by struct
+ * rseq_cs start_ip and post_commit_offset fields.
*/
static inline void rseq_prepare_unload(void)
{
diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/rseq/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/rtc/settings
@@ -0,0 +1 @@
+timeout=90
diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile
index 98da7a504737..fa02c4d5ec13 100644
--- a/tools/testing/selftests/safesetid/Makefile
+++ b/tools/testing/selftests/safesetid/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for mount selftests.
-CFLAGS = -Wall -lcap -O2
+CFLAGS = -Wall -O2
+LDLIBS = -lcap
-TEST_PROGS := run_tests.sh
+TEST_PROGS := safesetid-test.sh
TEST_GEN_FILES := safesetid-test
include ../lib.mk
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 8f40c6ecdad1..0c4d50644c13 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -213,7 +213,8 @@ static void test_setuid(uid_t child_uid, bool expect_success)
}
if (cpid == 0) { /* Code executed by child */
- setuid(child_uid);
+ if (setuid(child_uid) < 0)
+ exit(EXIT_FAILURE);
if (getuid() == child_uid)
exit(EXIT_SUCCESS);
else
@@ -291,8 +292,10 @@ int main(int argc, char **argv)
// First test to make sure we can write userns mappings from a user
// that doesn't have any restrictions (as long as it has CAP_SETUID);
- setuid(NO_POLICY_USER);
- setgid(NO_POLICY_USER);
+ if (setuid(NO_POLICY_USER) < 0)
+ die("Error with set uid(%d)\n", NO_POLICY_USER);
+ if (setgid(NO_POLICY_USER) < 0)
+ die("Error with set gid(%d)\n", NO_POLICY_USER);
// Take away all but setid caps
drop_caps(true);
@@ -306,8 +309,10 @@ int main(int argc, char **argv)
die("test_userns failed when it should work\n");
}
- setuid(RESTRICTED_PARENT);
- setgid(RESTRICTED_PARENT);
+ if (setuid(RESTRICTED_PARENT) < 0)
+ die("Error with set uid(%d)\n", RESTRICTED_PARENT);
+ if (setgid(RESTRICTED_PARENT) < 0)
+ die("Error with set gid(%d)\n", RESTRICTED_PARENT);
test_setuid(ROOT_USER, false);
test_setuid(ALLOWED_CHILD1, true);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 6ef7f16c4cf5..ee1b727ede04 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -35,6 +35,7 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
+#include <limits.h>
#include <linux/elf.h>
#include <sys/uio.h>
#include <sys/utsname.h>
@@ -43,6 +44,7 @@
#include <sys/times.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
+#include <linux/kcmp.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -112,6 +114,8 @@ struct seccomp_data {
# define __NR_seccomp 383
# elif defined(__aarch64__)
# define __NR_seccomp 277
+# elif defined(__riscv)
+# define __NR_seccomp 277
# elif defined(__hppa__)
# define __NR_seccomp 338
# elif defined(__powerpc__)
@@ -199,6 +203,15 @@ struct seccomp_notif_sizes {
};
#endif
+#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
+#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
+#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
+#endif
+
+#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
+#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -1582,6 +1595,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define ARCH_REGS struct user_pt_regs
# define SYSCALL_NUM regs[8]
# define SYSCALL_RET regs[0]
+#elif defined(__riscv) && __riscv_xlen == 64
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM a7
+# define SYSCALL_RET a0
#elif defined(__hppa__)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM gr[20]
@@ -1671,7 +1688,7 @@ void change_syscall(struct __test_metadata *_metadata,
EXPECT_EQ(0, ret) {}
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__)
+ defined(__s390__) || defined(__hppa__) || defined(__riscv)
{
regs.SYSCALL_NUM = syscall;
}
@@ -3072,7 +3089,7 @@ static int user_trap_syscall(int nr, unsigned int flags)
return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
}
-#define USER_NOTIF_MAGIC 116983961184613L
+#define USER_NOTIF_MAGIC INT_MAX
TEST(user_notification_basic)
{
pid_t pid;
@@ -3141,7 +3158,18 @@ TEST(user_notification_basic)
EXPECT_GT(poll(&pollfd, 1, -1), 0);
EXPECT_EQ(pollfd.revents, POLLIN);
- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Test that we can't pass garbage to the kernel. */
+ memset(&req, 0, sizeof(req));
+ req.pid = -1;
+ errno = 0;
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ if (ret) {
+ req.pid = 0;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ }
pollfd.fd = listener;
pollfd.events = POLLIN | POLLOUT;
@@ -3261,6 +3289,7 @@ TEST(user_notification_signal)
close(sk_pair[1]);
+ memset(&req, 0, sizeof(req));
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
EXPECT_EQ(kill(pid, SIGUSR1), 0);
@@ -3279,6 +3308,7 @@ TEST(user_notification_signal)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
EXPECT_EQ(errno, ENOENT);
+ memset(&req, 0, sizeof(req));
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
resp.id = req.id;
@@ -3480,6 +3510,108 @@ TEST(seccomp_get_notif_sizes)
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
}
+static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
+{
+#ifdef __NR_kcmp
+ return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+TEST(user_notification_continue)
+{
+ pid_t pid;
+ long ret;
+ int status, listener;
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ struct pollfd pollfd;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int dup_fd, pipe_fds[2];
+ pid_t self;
+
+ ret = pipe(pipe_fds);
+ if (ret < 0)
+ exit(1);
+
+ dup_fd = dup(pipe_fds[0]);
+ if (dup_fd < 0)
+ exit(1);
+
+ self = getpid();
+
+ ret = filecmp(self, self, pipe_fds[0], dup_fd);
+ if (ret)
+ exit(2);
+
+ exit(0);
+ }
+
+ pollfd.fd = listener;
+ pollfd.events = POLLIN | POLLOUT;
+
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
+ EXPECT_EQ(pollfd.revents, POLLIN);
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ pollfd.fd = listener;
+ pollfd.events = POLLIN | POLLOUT;
+
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
+ EXPECT_EQ(pollfd.revents, POLLOUT);
+
+ EXPECT_EQ(req.data.nr, __NR_dup);
+
+ resp.id = req.id;
+ resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
+
+ /*
+ * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
+ * args be set to 0.
+ */
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ resp.error = USER_NOTIF_MAGIC;
+ resp.val = 0;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ resp.error = 0;
+ resp.val = 0;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
+ if (errno == EINVAL)
+ XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
+ }
+
+skip:
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status)) {
+ if (WEXITSTATUS(status) == 2) {
+ XFAIL(return, "Kernel does not support kcmp() syscall");
+ return;
+ }
+ }
+}
+
/*
* TODO:
* - add microbenchmarks
diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c
index 2ad45b944355..2980b1a63366 100644
--- a/tools/testing/selftests/size/get_size.c
+++ b/tools/testing/selftests/size/get_size.c
@@ -11,23 +11,35 @@
* own execution. It also attempts to have as few dependencies
* on kernel features as possible.
*
- * It should be statically linked, with startup libs avoided.
- * It uses no library calls, and only the following 3 syscalls:
+ * It should be statically linked, with startup libs avoided. It uses
+ * no library calls except the syscall() function for the following 3
+ * syscalls:
* sysinfo(), write(), and _exit()
*
* For output, it avoids printf (which in some C libraries
* has large external dependencies) by implementing it's own
* number output and print routines, and using __builtin_strlen()
+ *
+ * The test may crash if any of the above syscalls fails because in some
+ * libc implementations (e.g. the GNU C Library) errno is saved in
+ * thread-local storage, which does not get initialized due to avoiding
+ * startup libs.
*/
#include <sys/sysinfo.h>
#include <unistd.h>
+#include <sys/syscall.h>
#define STDOUT_FILENO 1
static int print(const char *s)
{
- return write(STDOUT_FILENO, s, __builtin_strlen(s));
+ size_t len = 0;
+
+ while (s[len] != '\0')
+ len++;
+
+ return syscall(SYS_write, STDOUT_FILENO, s, len);
}
static inline char *num_to_str(unsigned long num, char *buf, int len)
@@ -79,12 +91,12 @@ void _start(void)
print("TAP version 13\n");
print("# Testing system size.\n");
- ccode = sysinfo(&info);
+ ccode = syscall(SYS_sysinfo, &info);
if (ccode < 0) {
print("not ok 1");
print(test_name);
print(" ---\n reason: \"could not get sysinfo\"\n ...\n");
- _exit(ccode);
+ syscall(SYS_exit, ccode);
}
print("ok 1");
print(test_name);
@@ -100,5 +112,5 @@ void _start(void)
print(" ...\n");
print("1..1\n");
- _exit(0);
+ syscall(SYS_exit, 0);
}
diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c
index f3d599f249b9..7741c0518d18 100644
--- a/tools/testing/selftests/sync/sync.c
+++ b/tools/testing/selftests/sync/sync.c
@@ -109,7 +109,7 @@ static struct sync_file_info *sync_file_info(int fd)
return NULL;
}
- info->sync_fence_info = (uint64_t)fence_info;
+ info->sync_fence_info = (uint64_t)(unsigned long)fence_info;
err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
if (err < 0) {
@@ -124,7 +124,7 @@ static struct sync_file_info *sync_file_info(int fd)
static void sync_file_info_free(struct sync_file_info *info)
{
- free((void *)info->sync_fence_info);
+ free((void *)(unsigned long)info->sync_fence_info);
free(info);
}
@@ -152,7 +152,7 @@ int sync_fence_count_with_status(int fd, int status)
if (!info)
return -1;
- fence_info = (struct sync_fence_info *)info->sync_fence_info;
+ fence_info = (struct sync_fence_info *)(unsigned long)info->sync_fence_info;
for (i = 0 ; i < info->num_fences ; i++) {
if (fence_info[i].status == status)
count++;
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 22e5da9008fd..b0954c873e2f 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -128,7 +128,9 @@ optional arguments:
-v, --verbose Show the commands that are being run
-N, --notap Suppress tap results for command under test
-d DEVICE, --device DEVICE
- Execute the test case in flower category
+ Execute test cases that use a physical device, where
+ DEVICE is its name. (If not defined, tests that require
+ a physical device will be skipped)
-P, --pause Pause execution just before post-suite stage
selection:
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 7c551968d184..477bc61b374a 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -1,3 +1,12 @@
+#
+# Core Netfilter Configuration
+#
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_NAT=m
+
CONFIG_NET_SCHED=y
#
@@ -42,6 +51,7 @@ CONFIG_NET_ACT_CTINFO=m
CONFIG_NET_ACT_SKBMOD=m
CONFIG_NET_ACT_IFE=m
CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_MPLS=m
CONFIG_NET_IFE_SKBMARK=m
CONFIG_NET_IFE_SKBPRIO=m
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
index e98c36750fae..d34fe06268d2 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin):
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
- env=ENVIR)
+ env=os.environ.copy())
(rawout, serr) = proc.communicate()
if proc.returncode != 0 and len(serr) > 0:
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index affa7f2d9670..9539cffa9e5e 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -64,7 +64,7 @@ class SubPlugin(TdcPlugin):
cmdlist.insert(0, self.args.NAMES['NS'])
cmdlist.insert(0, 'exec')
cmdlist.insert(0, 'netns')
- cmdlist.insert(0, 'ip')
+ cmdlist.insert(0, self.args.NAMES['IP'])
else:
pass
@@ -78,16 +78,16 @@ class SubPlugin(TdcPlugin):
return command
def _ports_create(self):
- cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+ cmd = '$IP link add $DEV0 type veth peer name $DEV1'
self._exec_cmd('pre', cmd)
- cmd = 'ip link set $DEV0 up'
+ cmd = '$IP link set $DEV0 up'
self._exec_cmd('pre', cmd)
if not self.args.namespace:
- cmd = 'ip link set $DEV1 up'
+ cmd = '$IP link set $DEV1 up'
self._exec_cmd('pre', cmd)
def _ports_destroy(self):
- cmd = 'ip link del $DEV0'
+ cmd = '$IP link del $DEV0'
self._exec_cmd('post', cmd)
def _ns_create(self):
@@ -97,16 +97,16 @@ class SubPlugin(TdcPlugin):
'''
self._ports_create()
if self.args.namespace:
- cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+ cmd = '$IP netns add {}'.format(self.args.NAMES['NS'])
self._exec_cmd('pre', cmd)
- cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+ cmd = '$IP link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
self._exec_cmd('pre', cmd)
- cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+ cmd = '$IP -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
self._exec_cmd('pre', cmd)
if self.args.device:
- cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+ cmd = '$IP link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
self._exec_cmd('pre', cmd)
- cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+ cmd = '$IP -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
self._exec_cmd('pre', cmd)
def _ns_destroy(self):
@@ -115,7 +115,7 @@ class SubPlugin(TdcPlugin):
devices as well)
'''
if self.args.namespace:
- cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+ cmd = '$IP netns delete {}'.format(self.args.NAMES['NS'])
self._exec_cmd('post', cmd)
def _exec_cmd(self, stage, command):
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index ddabb2fbb7c7..88ec134872e4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -525,5 +525,29 @@
"teardown": [
"$TC actions flush action csum"
]
+ },
+ {
+ "id": "eaf0",
+ "name": "Add csum iph action with no_percpu flag",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum iph no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 62b82fe10c89..4202e95e27b9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -24,6 +24,30 @@
]
},
{
+ "id": "e38c",
+ "name": "Add simple ct action with cookie",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ct index 42 cookie deadbeef",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action ct",
+ "matchPattern": "action order [0-9]*: ct zone 0 pipe.*index 42 ref.*cookie deadbeef",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ct"
+ ]
+ },
+ {
"id": "9f20",
"name": "Add ct clear action",
"category": [
@@ -48,6 +72,30 @@
]
},
{
+ "id": "0bc1",
+ "name": "Add ct clear action with cookie of max length",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ct clear index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action ct",
+ "matchPattern": "action order [0-9]*: ct clear pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ct"
+ ]
+ },
+ {
"id": "5bea",
"name": "Try ct with zone",
"category": [
@@ -310,5 +358,53 @@
"teardown": [
"$TC actions flush action ct"
]
+ },
+ {
+ "id": "2faa",
+ "name": "Try ct with mark + mask and cookie",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ct mark 0x42/0xf0 index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action ct",
+ "matchPattern": "action order [0-9]*: ct mark 66/0xf0 zone 0 pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ct"
+ ]
+ },
+ {
+ "id": "3991",
+ "name": "Add simple ct action with no_percpu flag",
+ "category": [
+ "actions",
+ "ct"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ct",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ct no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action ct",
+ "matchPattern": "action order [0-9]*: ct zone 0 pipe.*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ct"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index 814b7a8a478b..b24494c6f546 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -585,5 +585,29 @@
"teardown": [
"$TC actions flush action gact"
]
+ },
+ {
+ "id": "95ad",
+ "name": "Add gact pass action with no_percpu flag",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pass no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 2232b21e2510..12a2fe0e1472 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -553,5 +553,29 @@
"matchPattern": "^[ \t]+index [0-9]+ ref",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "31e3",
+ "name": "Add mirred mirror to egress action with no_percpu flag",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror dev lo no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
index e31a080edc49..866f0efd0859 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
@@ -168,6 +168,54 @@
]
},
{
+ "id": "09d2",
+ "name": "Add mpls dec_ttl action with opcode and cookie",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mpls dec_ttl pipe index 8 cookie aabbccddeeff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mpls",
+ "matchPattern": "action order [0-9]+: mpls.*dec_ttl pipe.*index 8 ref.*cookie aabbccddeeff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
+ "id": "c170",
+ "name": "Add mpls dec_ttl action with opcode and cookie of max length",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mpls dec_ttl continue index 8 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mpls",
+ "matchPattern": "action order [0-9]+: mpls.*dec_ttl continue.*index 8 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
"id": "9118",
"name": "Add mpls dec_ttl action with invalid opcode",
"category": [
@@ -302,6 +350,30 @@
]
},
{
+ "id": "91fb",
+ "name": "Add mpls pop action with ip proto and cookie",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 cookie 12345678",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mpls",
+ "matchPattern": "action order [0-9]+: mpls.*pop.*protocol.*ip.*pipe.*ref 1.*cookie 12345678",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
"id": "92fe",
"name": "Add mpls pop action with mpls proto",
"category": [
@@ -508,6 +580,30 @@
]
},
{
+ "id": "7c34",
+ "name": "Add mpls push action with label, tc ttl and cookie of max length",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mpls push label 20 tc 3 ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mpls",
+ "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*tc.*3.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
"id": "16eb",
"name": "Add mpls push action with label and bos",
"category": [
@@ -828,6 +924,30 @@
]
},
{
+ "id": "77c1",
+ "name": "Add mpls mod action with mpls ttl and cookie",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mpls mod ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mpls",
+ "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
"id": "b80f",
"name": "Add mpls mod action with mpls max ttl",
"category": [
@@ -1037,6 +1157,31 @@
]
},
{
+ "id": "95a9",
+ "name": "Replace existing mpls push action with new label, tc, ttl and cookie",
+ "category": [
+ "actions",
+ "mpls"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mpls",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mpls push label 20 tc 3 ttl 128 index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2"
+ ],
+ "cmdUnderTest": "$TC actions replace action mpls push label 30 tc 2 ttl 125 pipe index 1 cookie aa11bb22cc33",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mpls index 1",
+ "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*30 tc 2 ttl 125 pipe.*index 1.*cookie aa11bb22cc33",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
"id": "6cce",
"name": "Delete mpls pop action",
"category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index 0d319f1d01db..f8ea6f5fa8e9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -349,6 +349,281 @@
]
},
{
+ "id": "1762",
+ "name": "Add pedit action with RAW_OP offset u8 clear value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 clear",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask 00ffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "bcee",
+ "name": "Add pedit action with RAW_OP offset u8 retain value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 set 0x11 retain 0x0f",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 01000000 mask f0ffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "e89f",
+ "name": "Add pedit action with RAW_OP offset u16 retain value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 set 0x1122 retain 0xff00",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 11000000 mask 00ffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "c282",
+ "name": "Add pedit action with RAW_OP offset u32 clear value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 clear",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "c422",
+ "name": "Add pedit action with RAW_OP offset u16 invert value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 invert",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 12: val ffff0000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "d3d3",
+ "name": "Add pedit action with RAW_OP offset u32 invert value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 invert",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 12: val ffffffff mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "57e5",
+ "name": "Add pedit action with RAW_OP offset u8 preserve value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 preserve",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "99e0",
+ "name": "Add pedit action with RAW_OP offset u16 preserve value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 preserve",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "1892",
+ "name": "Add pedit action with RAW_OP offset u32 preserve value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 preserve",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "4b60",
+ "name": "Add pedit action with RAW_OP negative offset u16/u32 set value",
+ "category": [
+ "actions",
+ "pedit",
+ "raw_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge offset -14 u16 set 0x0000 munge offset -12 u32 set 0x00000100 munge offset -8 u32 set 0x0aaf0100 munge offset -4 u32 set 0x0008eb06 pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+:.*pedit.*keys 4.*key #0.*at -16: val 00000000 mask ffff0000.*key #1.*at -12: val 00000100 mask 00000000.*key #2.*at -8: val 0aaf0100 mask 00000000.*key #3.*at -4: val 0008eb06 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "a5a7",
+ "name": "Add pedit action with LAYERED_OP eth set src",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 00001122 mask ffff0000.*key #1 at eth\\+8: val 33445566 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "86d4",
"name": "Add pedit action with LAYERED_OP eth set src & dst",
"category": [
@@ -374,6 +649,31 @@
]
},
{
+ "id": "f8a9",
+ "name": "Add pedit action with LAYERED_OP eth set dst",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set 11:22:33:44:55:66",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val 11223344 mask 00000000.*key #1 at eth\\+4: val 55660000 mask 0000ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "c715",
"name": "Add pedit action with LAYERED_OP eth set src (INVALID)",
"category": [
@@ -399,6 +699,31 @@
]
},
{
+ "id": "8131",
+ "name": "Add pedit action with LAYERED_OP eth set dst (INVALID)",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set %e:11:m2:33:x4:-5",
+ "expExitCode": "255",
+ "verifyCmd": "/bin/true",
+ "matchPattern": " ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "ba22",
"name": "Add pedit action with LAYERED_OP eth type set/clear sequence",
"category": [
@@ -424,6 +749,179 @@
]
},
{
+ "id": "dec4",
+ "name": "Add pedit action with LAYERED_OP eth set type (INVALID)",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0xabcdef",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth+12: val ",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ab06",
+ "name": "Add pedit action with LAYERED_OP eth add type",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth type add 0x1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: add 00010000 mask 0000ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "918d",
+ "name": "Add pedit action with LAYERED_OP eth invert src",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth src invert",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 0000ff00 mask ffff0000.*key #1 at eth\\+8: val 00000000 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "a8d4",
+ "name": "Add pedit action with LAYERED_OP eth invert dst",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth dst invert",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val ff000000 mask 00000000.*key #1 at eth\\+4: val 00000000 mask 0000ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "ee13",
+ "name": "Add pedit action with LAYERED_OP eth invert type",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge eth type invert",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: val ffff0000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "7588",
+ "name": "Add pedit action with LAYERED_OP ip set src",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip src set 1.1.1.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 12: val 01010101 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "0fa7",
+ "name": "Add pedit action with LAYERED_OP ip set dst",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip dst set 2.2.2.2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 16: val 02020202 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "5810",
"name": "Add pedit action with LAYERED_OP ip set src & dst",
"category": [
@@ -599,6 +1097,206 @@
]
},
{
+ "id": "cc8a",
+ "name": "Add pedit action with LAYERED_OP ip set tos",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip tos set 0x4 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action continue keys 1.*key #0 at 0: val 00040000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "7a17",
+ "name": "Add pedit action with LAYERED_OP ip set precedence",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip precedence set 3 jump 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action jump 2 keys 1.*key #0 at 0: val 00030000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "c3b6",
+ "name": "Add pedit action with LAYERED_OP ip add tos",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip tos add 0x1 pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at ipv4\\+0: add 00010000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "43d3",
+ "name": "Add pedit action with LAYERED_OP ip add precedence",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip precedence add 0x1 pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pipe keys 1.*key #0 at ipv4\\+0: add 00010000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "438e",
+ "name": "Add pedit action with LAYERED_OP ip clear tos",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip tos clear continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action continue keys 1.*key #0 at 0: val 00000000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "6b1b",
+ "name": "Add pedit action with LAYERED_OP ip clear precedence",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip precedence clear jump 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action jump 2 keys 1.*key #0 at 0: val 00000000 mask ff00ffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "824a",
+ "name": "Add pedit action with LAYERED_OP ip invert tos",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip tos invert pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pipe keys 1.*key #0 at 0: val 00ff0000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "106f",
+ "name": "Add pedit action with LAYERED_OP ip invert precedence",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit munge ip precedence invert reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action reclassify keys 1.*key #0 at 0: val 00ff0000 mask ffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "6829",
"name": "Add pedit action with LAYERED_OP beyond ip set dport & sport",
"category": [
@@ -674,6 +1372,56 @@
]
},
{
+ "id": "815c",
+ "name": "Add pedit action with LAYERED_OP ip6 set src",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+8: val 20010db8 mask 00000000.*key #1 at ipv6\\+12: val 0000f101 mask 00000000.*key #2 at ipv6\\+16: val 00000000 mask 00000000.*key #3 at ipv6\\+20: val 00000001 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "4dae",
+ "name": "Add pedit action with LAYERED_OP ip6 set dst",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+24: val 20010db8 mask 00000000.*key #1 at ipv6\\+28: val 0000f101 mask 00000000.*key #2 at ipv6\\+32: val 00000000 mask 00000000.*key #3 at ipv6\\+36: val 00000001 mask 00000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "fc1f",
"name": "Add pedit action with LAYERED_OP ip6 set src & dst",
"category": [
@@ -950,5 +1698,4 @@
"$TC actions flush action pedit"
]
}
-
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index 28453a445fdb..fbeb9197697d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -909,5 +909,29 @@
"teardown": [
"$TC actions flush action tunnel_key"
]
+ },
+ {
+ "id": "0cd2",
+ "name": "Add tunnel_key set action with no_percpu flag",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 6503b1ce091f..41d783254b08 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -807,5 +807,29 @@
"matchPattern": "^[ \t]+index [0-9]+ ref",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "1a3d",
+ "name": "Add vlan pop action with no_percpu flag",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop no_percpu",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*no_percpu",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
new file mode 100644
index 000000000000..98a20faf3198
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -0,0 +1,376 @@
+[
+ {
+ "id": "7a92",
+ "name": "Add basic filter with cmp ematch u8/link layer and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2e8a",
+ "name": "Add basic filter with cmp ematch u8/link layer with trans flag and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff trans gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff trans gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4d9f",
+ "name": "Add basic filter with cmp ematch u16/link layer and a single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u16 at 0 layer 0 mask 0xff00 lt 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u16 at 0 layer 0 mask 0xff00 lt 3\\).*action.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4943",
+ "name": "Add basic filter with cmp ematch u32/link layer and miltiple actions",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u32 at 4 layer link mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u32 at 4 layer 0 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7559",
+ "name": "Add basic filter with cmp ematch u8/network layer and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 basic match 'cmp(u8 at 0 layer 1 mask 0xff gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xab prio 11 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 11 basic.*handle 0xab flowid 1:1.*cmp\\(u8 at 0 layer 1 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "aff4",
+ "name": "Add basic filter with cmp ematch u8/network layer with trans flag and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 basic match 'cmp(u8 at 0 layer 1 mask 0xff trans gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xab prio 11 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 11 basic.*handle 0xab flowid 1:1.*cmp\\(u8 at 0 layer 1 mask 0xff trans gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "c732",
+ "name": "Add basic filter with cmp ematch u16/network layer and a single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x100 protocol ip prio 100 basic match 'cmp(u16 at 0 layer network mask 0xff00 lt 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x100 prio 100 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 100 basic.*handle 0x100.*cmp\\(u16 at 0 layer 1 mask 0xff00 lt 3\\).*action.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "32d8",
+ "name": "Add basic filter with cmp ematch u32/network layer and miltiple actions",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x112233 protocol ip prio 7 basic match 'cmp(u32 at 4 layer network mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x112233 prio 7 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 7 basic.*handle 0x112233.*cmp\\(u32 at 4 layer 1 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b99c",
+ "name": "Add basic filter with cmp ematch u8/transport layer and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer transport mask 0xff gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 2 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0752",
+ "name": "Add basic filter with cmp ematch u8/transport layer with trans flag and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer transport mask 0xff trans gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 2 mask 0xff trans gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7e07",
+ "name": "Add basic filter with cmp ematch u16/transport layer and a single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u16 at 0 layer 2 mask 0xff00 lt 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u16 at 0 layer 2 mask 0xff00 lt 3\\).*action.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "62d7",
+ "name": "Add basic filter with cmp ematch u32/transport layer and miltiple actions",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u32 at 4 layer transport mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u32 at 4 layer 2 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "304b",
+ "name": "Add basic filter with NOT cmp ematch rule and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'not cmp(u8 at 0 layer link mask 0xff eq 3)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*NOT cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8ecb",
+ "name": "Add basic filter with two ANDed cmp ematch rules and single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b1ad",
+ "name": "Add basic filter with two ORed cmp ematch rules and single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) or cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*OR cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4600",
+ "name": "Add basic filter with two ANDed cmp ematch rules and one ORed ematch rule and single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bc59",
+ "name": "Add basic filter with two ANDed cmp ematch rules and one NOT ORed ematch rule and single action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or not cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR NOT cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
index 9002714b1851..c2a433a4737e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
@@ -12,7 +12,7 @@
"$TC qdisc add dev $DEV2 ingress",
"./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add"
],
- "cmdUnderTest": "find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -37,7 +37,7 @@
"$TC -b $BATCH_DIR/add_0",
"./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del"
],
- "cmdUnderTest": "find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -62,7 +62,7 @@
"$TC -b $BATCH_DIR/add_0",
"./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace"
],
- "cmdUnderTest": "find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -87,7 +87,7 @@
"$TC -b $BATCH_DIR/add_0",
"./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace"
],
- "cmdUnderTest": "find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -112,7 +112,7 @@
"$TC -b $BATCH_DIR/add_0",
"./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del"
],
- "cmdUnderTest": "find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"",
"expExitCode": "123",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -134,11 +134,11 @@
"/bin/mkdir $BATCH_DIR",
"$TC qdisc add dev $DEV2 ingress",
"./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add",
- "find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b",
+ "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
"./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add",
"./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
],
- "cmdUnderTest": "find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -160,11 +160,11 @@
"/bin/mkdir $BATCH_DIR",
"$TC qdisc add dev $DEV2 ingress",
"./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add",
- "find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b",
+ "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
"./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace",
"./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
],
- "cmdUnderTest": "find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b",
+ "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
"expExitCode": "0",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
new file mode 100644
index 000000000000..51799874a972
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
@@ -0,0 +1,391 @@
+[
+ {
+ "id": "f62b",
+ "name": "Add ingress matchall filter for protocol ipv4 and action PASS",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f09",
+ "name": "Add egress matchall filter for protocol ipv4 and action PASS",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ip matchall action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent 1: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0596",
+ "name": "Add ingress matchall filter for protocol ipv6 and action DROP",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv6 matchall",
+ "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "41df",
+ "name": "Add egress matchall filter for protocol ipv6 and action DROP",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ipv6 matchall action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 1 protocol ipv6 matchall",
+ "matchPattern": "^filter parent 1: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e1da",
+ "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 65535 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "3de5",
+ "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 65535 protocol ipv4 matchall",
+ "matchPattern": "^filter parent 1: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "72d7",
+ "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 655355 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "41d3",
+ "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 655355 protocol ipv4 matchall",
+ "matchPattern": "^filter parent 1: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f755",
+ "name": "Add ingress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0xffffffff prio 1 protocol all matchall action continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0xffffffff prio 1 protocol all matchall",
+ "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2c33",
+ "name": "Add egress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0xffffffff prio 1 protocol all matchall action continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 0xffffffff prio 1 protocol all matchall",
+ "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0e4a",
+ "name": "Add ingress matchall filter for all protocols and action RECLASSIFY with skip_hw flag",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall",
+ "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f60",
+ "name": "Add egress matchall filter for all protocols and action RECLASSIFY with skip_hw flag",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 0x1 prio 1 protocol all matchall",
+ "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: prio",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8bd2",
+ "name": "Add ingress matchall filter for protocol ipv6 and action PASS with classid",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:1 action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+ "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:1.*gact action pass.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2a4a",
+ "name": "Add ingress matchall filter for protocol ipv6 and action PASS with invalid classid",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 6789defg action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+ "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 6789defg.*gact action pass.*ref 1 bind 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "eaf8",
+ "name": "Delete single ingress matchall filter",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:2 action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+ "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:2.*gact action pass.*ref 1 bind 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "76ad",
+ "name": "Delete all ingress matchall filters",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DUMMY parent ffff:",
+ "matchPattern": "^filter protocol all pref.*matchall.*handle.*flowid.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "1eb9",
+ "name": "Delete single ingress matchall filter out of multiple",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: protocol all handle 0x2 prio 2 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DUMMY parent ffff:",
+ "matchPattern": "^filter protocol all pref 2 matchall.*handle 0x2 flowid 1:2.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "6d63",
+ "name": "Delete ingress matchall filter by chain ID",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all chain 1 matchall classid 1:1 action pass",
+ "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 chain 2 matchall classid 1:3 action continue"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: chain 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DUMMY parent ffff:",
+ "matchPattern": "^filter protocol all pref 1 matchall chain 1 handle 0x1 flowid 1:1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 0f89cd50a94b..8877f7b2b809 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -1,27 +1,5 @@
[
{
- "id": "e9a3",
- "name": "Add u32 with source match",
- "category": [
- "filter",
- "u32"
- ],
- "plugins": {
- "requires": "nsPlugin"
- },
- "setup": [
- "$TC qdisc add dev $DEV1 ingress"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
- "matchPattern": "match 7f000001/ffffffff at 12",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV1 ingress"
- ]
- },
- {
"id": "2638",
"name": "Add matchall and try to get it",
"category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
new file mode 100644
index 000000000000..e09d3c0e307f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -0,0 +1,205 @@
+[
+ {
+ "id": "afa9",
+ "name": "Add u32 with source match",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1.*match 7f000001/ffffffff at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6aa7",
+ "name": "Add/Replace u32 with source match and invalid indev",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 indev notexist20 flowid 1:1 action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bc4d",
+ "name": "Replace valid u32 with source match and invalid indev",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.3/32 flowid 1:3 action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.2/32 indev notexist20 flowid 1:2 action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:3.*match 7f000003/ffffffff at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "648b",
+ "name": "Add u32 with custom hash table",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6658",
+ "name": "Add/Replace u32 with custom hash table and invalid handle",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 256",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9d0a",
+ "name": "Replace valid u32 with custom hash table and invalid handle",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 128",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1644",
+ "name": "Add u32 filter that links to a custom hash table",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 43: u32 divisor 256"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 link 43: hashkey mask 0x0000ff00 at 12 match ip src 192.168.0.0/16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 98 u32 chain (0[ ]+$|0 fh 801: ht divisor 1|0 fh 801::800 order 2048 key ht 801 bkt 0 link 43:.*match c0a80000/ffff0000 at 12.*hash mask 0000ff00 at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "74c2",
+ "name": "Add/Replace u32 filter with invalid hash table id",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 20 u32 ht 47:47 action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 20 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1fe6",
+ "name": "Replace valid u32 filter with invalid hash table id",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 99 handle 43: u32 divisor 1",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 ht 43: match tcp src 22 FFFF classid 1:3"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 98 u32 ht 43:1 match tcp src 23 FFFF classid 1:4",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 99 u32 chain (0[ ]+$|0 fh (43|800): ht divisor 1|0 fh 43::800 order 2048 key ht 43 bkt 0 flowid 1:3.*match 00160000/ffff0000 at nexthdr\\+0)",
+ "matchCount": "4",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
new file mode 100644
index 000000000000..180593010675
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
@@ -0,0 +1,940 @@
+[
+ {
+ "id": "e90e",
+ "name": "Add ETS qdisc using bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "b059",
+ "name": "Add ETS qdisc using quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 900 800 700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e8e7",
+ "name": "Add ETS qdisc using strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 3",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "233c",
+ "name": "Add ETS qdisc using bands + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 quanta 1000 900 800 700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "3d35",
+ "name": "Add ETS qdisc using bands + strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 3 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f3b",
+ "name": "Add ETS qdisc using strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 strict 3 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "4593",
+ "name": "Add ETS qdisc using strict 0 + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 0 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 2 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8938",
+ "name": "Add ETS qdisc using bands + strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 strict 3 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 .*strict 3 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0782",
+ "name": "Add ETS qdisc with more bands than quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 2 .*quanta 1000 [1-9][0-9]* priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "501b",
+ "name": "Add ETS qdisc with more bands than strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta ([1-9][0-9]* ){2}priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "671a",
+ "name": "Add ETS qdisc with more bands than strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1 quanta 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta 1000 [1-9][0-9]* priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2a23",
+ "name": "Add ETS qdisc with 16 bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8daf",
+ "name": "Add ETS qdisc with 17 bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f95",
+ "name": "Add ETS qdisc with 17 strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "837a",
+ "name": "Add ETS qdisc with 16 quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "65b6",
+ "name": "Add ETS qdisc with 17 quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "b9e9",
+ "name": "Add ETS qdisc with 16 strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 8 quanta 1 2 3 4 5 6 7 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "9877",
+ "name": "Add ETS qdisc with 17 strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 9 quanta 1 2 3 4 5 6 7 8",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "c696",
+ "name": "Add ETS qdisc with priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "30c4",
+ "name": "Add ETS qdisc with quanta + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e8ac",
+ "name": "Add ETS qdisc with strict + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "5a7e",
+ "name": "Add ETS qdisc with quanta + strict + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "cb8b",
+ "name": "Show ETS class :1",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:1",
+ "matchPattern": "class ets 1:1 root quantum 4000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "1b4e",
+ "name": "Show ETS class :2",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:2",
+ "matchPattern": "class ets 1:2 root quantum 3000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f642",
+ "name": "Show ETS class :3",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:3",
+ "matchPattern": "class ets 1:3 root quantum 2000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0a5f",
+ "name": "Show ETS strict class",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:1",
+ "matchPattern": "class ets 1:1 root $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f7c8",
+ "name": "Add ETS qdisc with too many quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000 2000 3000",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2389",
+ "name": "Add ETS qdisc with too many strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 3",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "fe3c",
+ "name": "Add ETS qdisc with too many strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 2 quanta 1000 2000 3000",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "cb04",
+ "name": "Add ETS qdisc with excess priomap elements",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "c32e",
+ "name": "Add ETS qdisc with priomap above bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "744c",
+ "name": "Add ETS qdisc with priomap above quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 500 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7b33",
+ "name": "Add ETS qdisc with priomap above strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "dbe6",
+ "name": "Add ETS qdisc with priomap above strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 1 quanta 1000 500 priomap 0 1 2 3",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "bdb2",
+ "name": "Add ETS qdisc with priomap within bands with strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "39a3",
+ "name": "Add ETS qdisc with priomap above bands with strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3 4",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "557c",
+ "name": "Unset priorities default to the last band",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 priomap 0 0 0 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*priomap 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "a347",
+ "name": "Unset priorities default to the last band -- no priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "39c4",
+ "name": "Add ETS qdisc with too few bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 0",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "930b",
+ "name": "Add ETS qdisc with too many bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "406a",
+ "name": "Add ETS qdisc without parameters",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e51a",
+ "name": "Zero element in quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 0 800 700",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e7f2",
+ "name": "Sole zero element in quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 0",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "d6e6",
+ "name": "No values after the quanta keyword",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "28c6",
+ "name": "Change ETS band quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets quantum 1500",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1500 2000 3000 priomap ",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "4714",
+ "name": "Change ETS band without quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 priomap ",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "6979",
+ "name": "Change quantum of a strict ETS band",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets quantum 1500",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*bands 5 .*strict 5",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "9a7d",
+ "name": "Change ETS strict band without quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*bands 5 .*strict 5",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
new file mode 100644
index 000000000000..5ecd93b4c473
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
@@ -0,0 +1,304 @@
+[
+ {
+ "id": "a519",
+ "name": "Add bfifo qdisc with system default parameters on egress",
+ "__comment": "When omitted, queue size in bfifo is calculated as: txqueuelen * (MTU + LinkLayerHdrSize), where LinkLayerHdrSize=14 for Ethernet",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root.*limit [0-9]+b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "585c",
+ "name": "Add pfifo qdisc with system default parameters on egress",
+ "__comment": "When omitted, queue size in pfifo is defaulted to the interface's txqueuelen value.",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo 1: root.*limit [0-9]+p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "a86e",
+ "name": "Add bfifo qdisc with system default parameters on egress with handle of maximum value",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: bfifo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo ffff: root.*limit [0-9]+b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle ffff: root bfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "9ac8",
+ "name": "Add bfifo qdisc on egress with queue size of 3000 bytes",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit 3000b",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root.*limit 3000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f4e6",
+ "name": "Add pfifo qdisc on egress with queue size of 3000 packets",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY txqueuelen 3000 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo limit 3000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo 1: root.*limit 3000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "b1b1",
+ "name": "Add bfifo qdisc with system default parameters on egress with invalid handle exceeding maximum value",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: bfifo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 10000: root.*limit [0-9]+b",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8d5e",
+ "name": "Add bfifo qdisc on egress with unsupported argument",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo foorbar",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7787",
+ "name": "Add pfifo qdisc on egress with unsupported argument",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo foorbar",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "c4b6",
+ "name": "Replace bfifo qdisc on egress with new queue size",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link del dev $DUMMY type dummy || /bin/true",
+ "$IP link add dev $DUMMY txqueuelen 1000 type dummy",
+ "$TC qdisc add dev $DUMMY handle 1: root bfifo"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root bfifo limit 3000b",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root.*limit 3000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "3df6",
+ "name": "Replace pfifo qdisc on egress with new queue size",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link del dev $DUMMY type dummy || /bin/true",
+ "$IP link add dev $DUMMY txqueuelen 1000 type dummy",
+ "$TC qdisc add dev $DUMMY handle 1: root pfifo"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root pfifo limit 30",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo 1: root.*limit 30p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7a67",
+ "name": "Add bfifo qdisc on egress with queue size in invalid format",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit foo-bar",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root.*limit foo-bar",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "1298",
+ "name": "Add duplicate bfifo qdisc on egress",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root bfifo"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "45a0",
+ "name": "Delete nonexistent bfifo qdisc",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: bfifo",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "972b",
+ "name": "Add prio qdisc on egress with invalid format for handles",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ bfifo limit 100b",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 123 root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "4d39",
+ "name": "Delete bfifo qdisc twice",
+ "category": [
+ "qdisc",
+ "fifo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: bfifo",
+ "$TC qdisc del dev $DUMMY root handle 1: bfifo"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root bfifo",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc bfifo 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
index f518c55f468b..d99dba6e2b1a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
@@ -7,16 +7,16 @@
"ingress"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
"expExitCode": "0",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -27,15 +27,15 @@
"ingress"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress foorbar",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress foorbar",
"expExitCode": "1",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -46,17 +46,17 @@
"ingress"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true",
- "$TC qdisc add dev $DEV1 ingress"
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -67,15 +67,15 @@
"ingress"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY ingress",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -86,17 +86,17 @@
"ingress"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true",
- "$TC qdisc add dev $DEV1 ingress",
- "$TC qdisc del dev $DEV1 ingress"
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC qdisc del dev $DUMMY ingress"
],
- "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY ingress",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
index 9c792fa8ca23..3076c02d08d6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -7,16 +7,16 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
"expExitCode": "0",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 handle 1: root prio",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -27,15 +27,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle ffff: prio",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: prio",
"expExitCode": "0",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio ffff: root",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -46,15 +46,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 10000: prio",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: prio",
"expExitCode": "255",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 10000: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -65,15 +65,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio foorbar",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio foorbar",
"expExitCode": "1",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -84,16 +84,16 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
"expExitCode": "0",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 handle 1: root prio",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -104,15 +104,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
"expExitCode": "1",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -123,15 +123,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
"expExitCode": "1",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -142,15 +142,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -161,15 +161,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -180,17 +180,17 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true",
- "$TC qdisc add dev $DEV1 handle 1: root prio"
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root prio"
],
- "cmdUnderTest": "$TC qdisc replace dev $DEV1 handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
"expExitCode": "0",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 handle 1: root prio",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -201,17 +201,17 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true",
- "$TC qdisc add dev $DEV1 handle 1: root prio"
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root prio"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 handle 1: root prio",
- "$IP link del dev $DEV1 type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio",
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -222,15 +222,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc del dev $DEV1 root handle 1: prio",
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: prio",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -241,15 +241,15 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true"
+ "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 123^ prio",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ prio",
"expExitCode": "255",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc prio 123 root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -260,17 +260,17 @@
"prio"
],
"setup": [
- "$IP link add dev $DEV1 type dummy || /bin/true",
- "$TC qdisc add dev $DEV1 root handle 1: prio",
- "$TC qdisc del dev $DEV1 root handle 1: prio"
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: prio",
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
],
- "cmdUnderTest": "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root prio",
"expExitCode": "2",
- "verifyCmd": "$TC qdisc show dev $DEV1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DEV1 type dummy"
+ "$IP link del dev $DUMMY type dummy"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index f04321ace9fb..e566c70e64a1 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -356,12 +356,14 @@ def test_runner(pm, args, filtered_tests):
time.sleep(2)
for tidx in testlist:
if "flower" in tidx["category"] and args.device == None:
+ errmsg = "Tests using the DEV2 variable must define the name of a "
+ errmsg += "physical NIC with the -d option when running tdc.\n"
+ errmsg += "Test has been skipped."
if args.verbose > 1:
- print('Not executing test {} {} because DEV2 not defined'.
- format(tidx['id'], tidx['name']))
+ print(errmsg)
res = TestResult(tidx['id'], tidx['name'])
res.set_result(ResultState.skip)
- res.set_errormsg('Not executed because DEV2 is not defined')
+ res.set_errormsg(errmsg)
tsr.add_resultdata(res)
continue
try:
@@ -499,7 +501,9 @@ def set_args(parser):
choices=['none', 'xunit', 'tap'],
help='Specify the format for test results. (Default: TAP)')
parser.add_argument('-d', '--device',
- help='Execute the test case in flower category')
+ help='Execute test cases that use a physical device, ' +
+ 'where DEVICE is its name. (If not defined, tests ' +
+ 'that require a physical device will be skipped)')
parser.add_argument(
'-P', '--pause', action='store_true',
help='Pause execution just before post-suite stage')
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index b771d4c89621..080709cc4297 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -16,6 +16,7 @@ NAMES = {
'DEV0': 'v0p0',
'DEV1': 'v0p1',
'DEV2': '',
+ 'DUMMY': 'dummy1',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
new file mode 100644
index 000000000000..789f21e81028
--- /dev/null
+++ b/tools/testing/selftests/timens/.gitignore
@@ -0,0 +1,8 @@
+clock_nanosleep
+exec
+gettime_perf
+gettime_perf_cold
+procfs
+timens
+timer
+timerfd
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
new file mode 100644
index 000000000000..e9fb30bd8aeb
--- /dev/null
+++ b/tools/testing/selftests/timens/Makefile
@@ -0,0 +1,7 @@
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec
+TEST_GEN_PROGS_EXTENDED := gettime_perf
+
+CFLAGS := -Wall -Werror -pthread
+LDFLAGS := -lrt -ldl
+
+include ../lib.mk
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
new file mode 100644
index 000000000000..8e7b7c72ef65
--- /dev/null
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <sys/timerfd.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+void test_sig(int sig)
+{
+ if (sig == SIGUSR2)
+ pthread_exit(NULL);
+}
+
+struct thread_args {
+ struct timespec *now, *rem;
+ pthread_mutex_t *lock;
+ int clockid;
+ int abs;
+};
+
+void *call_nanosleep(void *_args)
+{
+ struct thread_args *args = _args;
+
+ clock_nanosleep(args->clockid, args->abs ? TIMER_ABSTIME : 0, args->now, args->rem);
+ pthread_mutex_unlock(args->lock);
+ return NULL;
+}
+
+int run_test(int clockid, int abs)
+{
+ struct timespec now = {}, rem;
+ struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid};
+ struct timespec start;
+ pthread_mutex_t lock;
+ pthread_t thread;
+ int j, ok, ret;
+
+ signal(SIGUSR1, test_sig);
+ signal(SIGUSR2, test_sig);
+
+ pthread_mutex_init(&lock, NULL);
+ pthread_mutex_lock(&lock);
+
+ if (clock_gettime(clockid, &start) == -1) {
+ if (errno == EINVAL && check_skip(clockid))
+ return 0;
+ return pr_perror("clock_gettime");
+ }
+
+
+ if (abs) {
+ now.tv_sec = start.tv_sec;
+ now.tv_nsec = start.tv_nsec;
+ }
+
+ now.tv_sec += 3600;
+ args.abs = abs;
+ args.lock = &lock;
+ ret = pthread_create(&thread, NULL, call_nanosleep, &args);
+ if (ret != 0) {
+ pr_err("Unable to create a thread: %s", strerror(ret));
+ return 1;
+ }
+
+ /* Wait when the thread will call clock_nanosleep(). */
+ ok = 0;
+ for (j = 0; j < 8; j++) {
+ /* The maximum timeout is about 5 seconds. */
+ usleep(10000 << j);
+
+ /* Try to interrupt clock_nanosleep(). */
+ pthread_kill(thread, SIGUSR1);
+
+ usleep(10000 << j);
+ /* Check whether clock_nanosleep() has been interrupted or not. */
+ if (pthread_mutex_trylock(&lock) == 0) {
+ /**/
+ ok = 1;
+ break;
+ }
+ }
+ if (!ok)
+ pthread_kill(thread, SIGUSR2);
+ pthread_join(thread, NULL);
+ pthread_mutex_destroy(&lock);
+
+ if (!ok) {
+ ksft_test_result_pass("clockid: %d abs:%d timeout\n", clockid, abs);
+ return 1;
+ }
+
+ if (rem.tv_sec < 3300 || rem.tv_sec > 3900) {
+ pr_fail("clockid: %d abs: %d remain: %ld\n",
+ clockid, abs, rem.tv_sec);
+ return 1;
+ }
+ ksft_test_result_pass("clockid: %d abs:%d\n", clockid, abs);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, nsfd;
+
+ nscheck();
+
+ ksft_set_plan(4);
+
+ check_config_posix_timers();
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, 7 * 24 * 3600))
+ return 1;
+ if (_settime(CLOCK_BOOTTIME, 9 * 24 * 3600))
+ return 1;
+
+ nsfd = open("/proc/self/ns/time_for_children", O_RDONLY);
+ if (nsfd < 0)
+ return pr_perror("Unable to open timens_for_children");
+
+ if (setns(nsfd, CLONE_NEWTIME))
+ return pr_perror("Unable to set timens");
+
+ ret = 0;
+ ret |= run_test(CLOCK_MONOTONIC, 0);
+ ret |= run_test(CLOCK_MONOTONIC, 1);
+ ret |= run_test(CLOCK_BOOTTIME_ALARM, 0);
+ ret |= run_test(CLOCK_BOOTTIME_ALARM, 1);
+
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return ret;
+}
diff --git a/tools/testing/selftests/timens/config b/tools/testing/selftests/timens/config
new file mode 100644
index 000000000000..4480620f6f49
--- /dev/null
+++ b/tools/testing/selftests/timens/config
@@ -0,0 +1 @@
+CONFIG_TIME_NS=y
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
new file mode 100644
index 000000000000..87b47b557a7a
--- /dev/null
+++ b/tools/testing/selftests/timens/exec.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define OFFSET (36000)
+
+int main(int argc, char *argv[])
+{
+ struct timespec now, tst;
+ int status, i;
+ pid_t pid;
+
+ if (argc > 1) {
+ if (sscanf(argv[1], "%ld", &now.tv_sec) != 1)
+ return pr_perror("sscanf");
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
+ }
+ return 0;
+ }
+
+ nscheck();
+
+ ksft_set_plan(1);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, OFFSET))
+ return 1;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec, tst.tv_sec);
+ }
+
+ if (argc > 1)
+ return 0;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("fork");
+
+ if (pid == 0) {
+ char now_str[64];
+ char *cargv[] = {"exec", now_str, NULL};
+ char *cenv[] = {NULL};
+
+ /* Check that a child process is in the new timens. */
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec + OFFSET, tst.tv_sec);
+ }
+
+ /* Check for proper vvar offsets after execve. */
+ snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET);
+ execve("/proc/self/exe", cargv, cenv);
+ return pr_perror("execve");
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("waitpid");
+
+ if (status)
+ ksft_exit_fail();
+
+ ksft_test_result_pass("exec\n");
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c
new file mode 100644
index 000000000000..7bf841a3967b
--- /dev/null
+++ b/tools/testing/selftests/timens/gettime_perf.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+
+#include "log.h"
+#include "timens.h"
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+static void fill_function_pointers(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ pr_err("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ pr_err("Warning: failed to find clock_gettime in vDSO\n");
+
+}
+
+static void test(clock_t clockid, char *clockstr, bool in_ns)
+{
+ struct timespec tp, start;
+ long i = 0;
+ const int timeout = 3;
+
+ vdso_clock_gettime(clockid, &start);
+ tp = start;
+ for (tp = start; start.tv_sec + timeout > tp.tv_sec ||
+ (start.tv_sec + timeout == tp.tv_sec &&
+ start.tv_nsec > tp.tv_nsec); i++) {
+ vdso_clock_gettime(clockid, &tp);
+ }
+
+ ksft_test_result_pass("%s:\tclock: %10s\tcycles:\t%10ld\n",
+ in_ns ? "ns" : "host", clockstr, i);
+}
+
+int main(int argc, char *argv[])
+{
+ time_t offset = 10;
+ int nsfd;
+
+ ksft_set_plan(8);
+
+ fill_function_pointers();
+
+ test(CLOCK_MONOTONIC, "monotonic", false);
+ test(CLOCK_MONOTONIC_COARSE, "monotonic-coarse", false);
+ test(CLOCK_MONOTONIC_RAW, "monotonic-raw", false);
+ test(CLOCK_BOOTTIME, "boottime", false);
+
+ nscheck();
+
+ if (unshare_timens())
+ return 1;
+
+ nsfd = open("/proc/self/ns/time_for_children", O_RDONLY);
+ if (nsfd < 0)
+ return pr_perror("Can't open a time namespace");
+
+ if (_settime(CLOCK_MONOTONIC, offset))
+ return 1;
+ if (_settime(CLOCK_BOOTTIME, offset))
+ return 1;
+
+ if (setns(nsfd, CLONE_NEWTIME))
+ return pr_perror("setns");
+
+ test(CLOCK_MONOTONIC, "monotonic", true);
+ test(CLOCK_MONOTONIC_COARSE, "monotonic-coarse", true);
+ test(CLOCK_MONOTONIC_RAW, "monotonic-raw", true);
+ test(CLOCK_BOOTTIME, "boottime", true);
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/timens/log.h b/tools/testing/selftests/timens/log.h
new file mode 100644
index 000000000000..db64df2a8483
--- /dev/null
+++ b/tools/testing/selftests/timens/log.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTEST_TIMENS_LOG_H__
+#define __SELFTEST_TIMENS_LOG_H__
+
+#define pr_msg(fmt, lvl, ...) \
+ ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \
+ lvl, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_fail(fmt, ...) \
+ ({ \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
new file mode 100644
index 000000000000..43d93f4006b9
--- /dev/null
+++ b/tools/testing/selftests/timens/procfs.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <time.h>
+
+#include "log.h"
+#include "timens.h"
+
+/*
+ * Test shouldn't be run for a day, so add 10 days to child
+ * time and check parent's time to be in the same day.
+ */
+#define MAX_TEST_TIME_SEC (60*5)
+#define DAY_IN_SEC (60*60*24)
+#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static int child_ns, parent_ns;
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWTIME))
+ return pr_perror("setns()");
+
+ return 0;
+}
+
+static int init_namespaces(void)
+{
+ char path[] = "/proc/self/ns/time_for_children";
+ struct stat st1, st2;
+
+ parent_ns = open(path, O_RDONLY);
+ if (parent_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+
+ if (fstat(parent_ns, &st1))
+ return pr_perror("Unable to stat the parent timens");
+
+ if (unshare_timens())
+ return -1;
+
+ child_ns = open(path, O_RDONLY);
+ if (child_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+
+ if (fstat(child_ns, &st2))
+ return pr_perror("Unable to stat the timens");
+
+ if (st1.st_ino == st2.st_ino)
+ return pr_err("The same child_ns after CLONE_NEWTIME");
+
+ if (_settime(CLOCK_BOOTTIME, TEN_DAYS_IN_SEC))
+ return -1;
+
+ return 0;
+}
+
+static int read_proc_uptime(struct timespec *uptime)
+{
+ unsigned long up_sec, up_nsec;
+ FILE *proc;
+
+ proc = fopen("/proc/uptime", "r");
+ if (proc == NULL) {
+ pr_perror("Unable to open /proc/uptime");
+ return -1;
+ }
+
+ if (fscanf(proc, "%lu.%02lu", &up_sec, &up_nsec) != 2) {
+ if (errno) {
+ pr_perror("fscanf");
+ return -errno;
+ }
+ pr_err("failed to parse /proc/uptime");
+ return -1;
+ }
+ fclose(proc);
+
+ uptime->tv_sec = up_sec;
+ uptime->tv_nsec = up_nsec;
+ return 0;
+}
+
+static int check_uptime(void)
+{
+ struct timespec uptime_new, uptime_old;
+ time_t uptime_expected;
+ double prec = MAX_TEST_TIME_SEC;
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", parent_ns);
+
+ if (read_proc_uptime(&uptime_old))
+ return 1;
+
+ if (switch_ns(child_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (read_proc_uptime(&uptime_new))
+ return 1;
+
+ uptime_expected = uptime_old.tv_sec + TEN_DAYS_IN_SEC;
+ if (fabs(difftime(uptime_new.tv_sec, uptime_expected)) > prec) {
+ pr_fail("uptime in /proc/uptime: old %ld, new %ld [%ld]",
+ uptime_old.tv_sec, uptime_new.tv_sec,
+ uptime_old.tv_sec + TEN_DAYS_IN_SEC);
+ return 1;
+ }
+
+ ksft_test_result_pass("Passed for /proc/uptime\n");
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+
+ nscheck();
+
+ ksft_set_plan(1);
+
+ if (init_namespaces())
+ return 1;
+
+ ret |= check_uptime();
+
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return ret;
+}
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
new file mode 100644
index 000000000000..559d26e21ba0
--- /dev/null
+++ b/tools/testing/selftests/timens/timens.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+/*
+ * Test shouldn't be run for a day, so add 10 days to child
+ * time and check parent's time to be in the same day.
+ */
+#define DAY_IN_SEC (60*60*24)
+#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct test_clock {
+ clockid_t id;
+ char *name;
+ /*
+ * off_id is -1 if a clock has own offset, or it contains an index
+ * which contains a right offset of this clock.
+ */
+ int off_id;
+ time_t offset;
+};
+
+#define ct(clock, off_id) { clock, #clock, off_id }
+static struct test_clock clocks[] = {
+ ct(CLOCK_BOOTTIME, -1),
+ ct(CLOCK_BOOTTIME_ALARM, 1),
+ ct(CLOCK_MONOTONIC, -1),
+ ct(CLOCK_MONOTONIC_COARSE, 1),
+ ct(CLOCK_MONOTONIC_RAW, 1),
+};
+#undef ct
+
+static int child_ns, parent_ns = -1;
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWTIME)) {
+ pr_perror("setns()");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_namespaces(void)
+{
+ char path[] = "/proc/self/ns/time_for_children";
+ struct stat st1, st2;
+
+ if (parent_ns == -1) {
+ parent_ns = open(path, O_RDONLY);
+ if (parent_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+ }
+
+ if (fstat(parent_ns, &st1))
+ return pr_perror("Unable to stat the parent timens");
+
+ if (unshare_timens())
+ return -1;
+
+ child_ns = open(path, O_RDONLY);
+ if (child_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+
+ if (fstat(child_ns, &st2))
+ return pr_perror("Unable to stat the timens");
+
+ if (st1.st_ino == st2.st_ino)
+ return pr_perror("The same child_ns after CLONE_NEWTIME");
+
+ return 0;
+}
+
+static int test_gettime(clockid_t clock_index, bool raw_syscall, time_t offset)
+{
+ struct timespec child_ts_new, parent_ts_old, cur_ts;
+ char *entry = raw_syscall ? "syscall" : "vdso";
+ double precision = 0.0;
+
+ if (check_skip(clocks[clock_index].id))
+ return 0;
+
+ switch (clocks[clock_index].id) {
+ case CLOCK_MONOTONIC_COARSE:
+ case CLOCK_MONOTONIC_RAW:
+ precision = -2.0;
+ break;
+ }
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (_gettime(clocks[clock_index].id, &parent_ts_old, raw_syscall))
+ return -1;
+
+ child_ts_new.tv_nsec = parent_ts_old.tv_nsec;
+ child_ts_new.tv_sec = parent_ts_old.tv_sec + offset;
+
+ if (switch_ns(child_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall))
+ return -1;
+
+ if (difftime(cur_ts.tv_sec, child_ts_new.tv_sec) < precision) {
+ ksft_test_result_fail(
+ "Child's %s (%s) time has not changed: %lu -> %lu [%lu]\n",
+ clocks[clock_index].name, entry, parent_ts_old.tv_sec,
+ child_ts_new.tv_sec, cur_ts.tv_sec);
+ return -1;
+ }
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", parent_ns);
+
+ if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall))
+ return -1;
+
+ if (difftime(cur_ts.tv_sec, parent_ts_old.tv_sec) > DAY_IN_SEC) {
+ ksft_test_result_fail(
+ "Parent's %s (%s) time has changed: %lu -> %lu [%lu]\n",
+ clocks[clock_index].name, entry, parent_ts_old.tv_sec,
+ child_ts_new.tv_sec, cur_ts.tv_sec);
+ /* Let's play nice and put it closer to original */
+ clock_settime(clocks[clock_index].id, &cur_ts);
+ return -1;
+ }
+
+ ksft_test_result_pass("Passed for %s (%s)\n",
+ clocks[clock_index].name, entry);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ time_t offset;
+ int ret = 0;
+
+ nscheck();
+
+ check_config_posix_timers();
+
+ ksft_set_plan(ARRAY_SIZE(clocks) * 2);
+
+ if (init_namespaces())
+ return 1;
+
+ /* Offsets have to be set before tasks enter the namespace. */
+ for (i = 0; i < ARRAY_SIZE(clocks); i++) {
+ if (clocks[i].off_id != -1)
+ continue;
+ offset = TEN_DAYS_IN_SEC + i * 1000;
+ clocks[i].offset = offset;
+ if (_settime(clocks[i].id, offset))
+ return 1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(clocks); i++) {
+ if (clocks[i].off_id != -1)
+ offset = clocks[clocks[i].off_id].offset;
+ else
+ offset = clocks[i].offset;
+ ret |= test_gettime(i, true, offset);
+ ret |= test_gettime(i, false, offset);
+ }
+
+ if (ret)
+ ksft_exit_fail();
+
+ ksft_exit_pass();
+ return !!ret;
+}
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
new file mode 100644
index 000000000000..e09e7e39bc52
--- /dev/null
+++ b/tools/testing/selftests/timens/timens.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TIMENS_H__
+#define __TIMENS_H__
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#ifndef CLONE_NEWTIME
+# define CLONE_NEWTIME 0x00000080
+#endif
+
+static int config_posix_timers = true;
+
+static inline void check_config_posix_timers(void)
+{
+ if (timer_create(-1, 0, 0) == -1 && errno == ENOSYS)
+ config_posix_timers = false;
+}
+
+static inline bool check_skip(int clockid)
+{
+ if (config_posix_timers)
+ return false;
+
+ switch (clockid) {
+ /* Only these clocks are supported without CONFIG_POSIX_TIMERS. */
+ case CLOCK_BOOTTIME:
+ case CLOCK_MONOTONIC:
+ case CLOCK_REALTIME:
+ return false;
+ default:
+ ksft_test_result_skip("Posix Clocks & timers are not supported\n");
+ return true;
+ }
+
+ return false;
+}
+
+static inline int unshare_timens(void)
+{
+ if (unshare(CLONE_NEWTIME)) {
+ if (errno == EPERM)
+ ksft_exit_skip("need to run as root\n");
+ return pr_perror("Can't unshare() timens");
+ }
+ return 0;
+}
+
+static inline int _settime(clockid_t clk_id, time_t offset)
+{
+ int fd, len;
+ char buf[4096];
+
+ if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW)
+ clk_id = CLOCK_MONOTONIC;
+
+ len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset);
+
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+
+ return 0;
+}
+
+static inline int _gettime(clockid_t clk_id, struct timespec *res, bool raw_syscall)
+{
+ int err;
+
+ if (!raw_syscall) {
+ if (clock_gettime(clk_id, res)) {
+ pr_perror("clock_gettime(%d)", (int)clk_id);
+ return -1;
+ }
+ return 0;
+ }
+
+ err = syscall(SYS_clock_gettime, clk_id, res);
+ if (err)
+ pr_perror("syscall(SYS_clock_gettime(%d))", (int)clk_id);
+
+ return err;
+}
+
+static inline void nscheck(void)
+{
+ if (access("/proc/self/ns/time", F_OK) < 0)
+ ksft_exit_skip("Time namespaces are not supported\n");
+}
+
+#endif
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
new file mode 100644
index 000000000000..0cca7aafc4bd
--- /dev/null
+++ b/tools/testing/selftests/timens/timer.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <time.h>
+
+#include "log.h"
+#include "timens.h"
+
+int run_test(int clockid, struct timespec now)
+{
+ struct itimerspec new_value;
+ long long elapsed;
+ timer_t fd;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ struct sigevent sevp = {.sigev_notify = SIGEV_NONE};
+ int flags = 0;
+
+ new_value.it_value.tv_sec = 3600;
+ new_value.it_value.tv_nsec = 0;
+ new_value.it_interval.tv_sec = 1;
+ new_value.it_interval.tv_nsec = 0;
+
+ if (i == 1) {
+ new_value.it_value.tv_sec += now.tv_sec;
+ new_value.it_value.tv_nsec += now.tv_nsec;
+ }
+
+ if (timer_create(clockid, &sevp, &fd) == -1) {
+ if (errno == ENOSYS) {
+ ksft_test_result_skip("Posix Clocks & timers are supported\n");
+ return 0;
+ }
+ return pr_perror("timerfd_create");
+ }
+
+ if (i == 1)
+ flags |= TIMER_ABSTIME;
+ if (timer_settime(fd, flags, &new_value, NULL) == -1)
+ return pr_perror("timerfd_settime");
+
+ if (timer_gettime(fd, &new_value) == -1)
+ return pr_perror("timerfd_gettime");
+
+ elapsed = new_value.it_value.tv_sec;
+ if (abs(elapsed - 3600) > 60) {
+ ksft_test_result_fail("clockid: %d elapsed: %lld\n",
+ clockid, elapsed);
+ return 1;
+ }
+ }
+
+ ksft_test_result_pass("clockid=%d\n", clockid);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, status, len, fd;
+ char buf[4096];
+ pid_t pid;
+ struct timespec btime_now, mtime_now;
+
+ nscheck();
+
+ ksft_set_plan(3);
+
+ clock_gettime(CLOCK_MONOTONIC, &mtime_now);
+ clock_gettime(CLOCK_BOOTTIME, &btime_now);
+
+ if (unshare_timens())
+ return 1;
+
+ len = snprintf(buf, sizeof(buf), "%d %d 0\n%d %d 0",
+ CLOCK_MONOTONIC, 70 * 24 * 3600,
+ CLOCK_BOOTTIME, 9 * 24 * 3600);
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+ mtime_now.tv_sec += 70 * 24 * 3600;
+ btime_now.tv_sec += 9 * 24 * 3600;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("Unable to fork");
+ if (pid == 0) {
+ ret = 0;
+ ret |= run_test(CLOCK_BOOTTIME, btime_now);
+ ret |= run_test(CLOCK_MONOTONIC, mtime_now);
+ ret |= run_test(CLOCK_BOOTTIME_ALARM, btime_now);
+
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return ret;
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("Unable to wait the child process");
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
new file mode 100644
index 000000000000..eff1ec5ff215
--- /dev/null
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <sys/timerfd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "log.h"
+#include "timens.h"
+
+static int tclock_gettime(clock_t clockid, struct timespec *now)
+{
+ if (clockid == CLOCK_BOOTTIME_ALARM)
+ clockid = CLOCK_BOOTTIME;
+ return clock_gettime(clockid, now);
+}
+
+int run_test(int clockid, struct timespec now)
+{
+ struct itimerspec new_value;
+ long long elapsed;
+ int fd, i;
+
+ if (tclock_gettime(clockid, &now))
+ return pr_perror("clock_gettime(%d)", clockid);
+
+ for (i = 0; i < 2; i++) {
+ int flags = 0;
+
+ new_value.it_value.tv_sec = 3600;
+ new_value.it_value.tv_nsec = 0;
+ new_value.it_interval.tv_sec = 1;
+ new_value.it_interval.tv_nsec = 0;
+
+ if (i == 1) {
+ new_value.it_value.tv_sec += now.tv_sec;
+ new_value.it_value.tv_nsec += now.tv_nsec;
+ }
+
+ fd = timerfd_create(clockid, 0);
+ if (fd == -1)
+ return pr_perror("timerfd_create(%d)", clockid);
+
+ if (i == 1)
+ flags |= TFD_TIMER_ABSTIME;
+
+ if (timerfd_settime(fd, flags, &new_value, NULL))
+ return pr_perror("timerfd_settime(%d)", clockid);
+
+ if (timerfd_gettime(fd, &new_value))
+ return pr_perror("timerfd_gettime(%d)", clockid);
+
+ elapsed = new_value.it_value.tv_sec;
+ if (abs(elapsed - 3600) > 60) {
+ ksft_test_result_fail("clockid: %d elapsed: %lld\n",
+ clockid, elapsed);
+ return 1;
+ }
+
+ close(fd);
+ }
+
+ ksft_test_result_pass("clockid=%d\n", clockid);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, status, len, fd;
+ char buf[4096];
+ pid_t pid;
+ struct timespec btime_now, mtime_now;
+
+ nscheck();
+
+ ksft_set_plan(3);
+
+ clock_gettime(CLOCK_MONOTONIC, &mtime_now);
+ clock_gettime(CLOCK_BOOTTIME, &btime_now);
+
+ if (unshare_timens())
+ return 1;
+
+ len = snprintf(buf, sizeof(buf), "%d %d 0\n%d %d 0",
+ CLOCK_MONOTONIC, 70 * 24 * 3600,
+ CLOCK_BOOTTIME, 9 * 24 * 3600);
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+ mtime_now.tv_sec += 70 * 24 * 3600;
+ btime_now.tv_sec += 9 * 24 * 3600;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("Unable to fork");
+ if (pid == 0) {
+ ret = 0;
+ ret |= run_test(CLOCK_BOOTTIME, btime_now);
+ ret |= run_test(CLOCK_MONOTONIC, mtime_now);
+ ret |= run_test(CLOCK_BOOTTIME_ALARM, btime_now);
+
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return ret;
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("Unable to wait the child process");
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile
index 9dd848427a7b..1a5db1eb8ed5 100644
--- a/tools/testing/selftests/tpm2/Makefile
+++ b/tools/testing/selftests/tpm2/Makefile
@@ -2,3 +2,4 @@
include ../lib.mk
TEST_PROGS := test_smoke.sh test_space.sh
+TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 80521d46220c..8155c2ea7ccb 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -2,3 +2,9 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
python -m unittest -v tpm2_tests.SmokeTest
+python -m unittest -v tpm2_tests.AsyncTest
+
+CLEAR_CMD=$(which tpm2_clear)
+if [ -n $CLEAR_CMD ]; then
+ tpm2_clear -T device
+fi
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 828c18584624..d0fcb66a88a6 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -6,8 +6,8 @@ import socket
import struct
import sys
import unittest
-from fcntl import ioctl
-
+import fcntl
+import select
TPM2_ST_NO_SESSIONS = 0x8001
TPM2_ST_SESSIONS = 0x8002
@@ -352,6 +352,7 @@ def hex_dump(d):
class Client:
FLAG_DEBUG = 0x01
FLAG_SPACE = 0x02
+ FLAG_NONBLOCK = 0x04
TPM_IOC_NEW_SPACE = 0xa200
def __init__(self, flags = 0):
@@ -362,13 +363,27 @@ class Client:
else:
self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
+ if (self.flags & Client.FLAG_NONBLOCK):
+ flags = fcntl.fcntl(self.tpm, fcntl.F_GETFL)
+ flags |= os.O_NONBLOCK
+ fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags)
+ self.tpm_poll = select.poll()
+
def close(self):
self.tpm.close()
def send_cmd(self, cmd):
self.tpm.write(cmd)
+
+ if (self.flags & Client.FLAG_NONBLOCK):
+ self.tpm_poll.register(self.tpm, select.POLLIN)
+ self.tpm_poll.poll(10000)
+
rsp = self.tpm.read()
+ if (self.flags & Client.FLAG_NONBLOCK):
+ self.tpm_poll.unregister(self.tpm)
+
if (self.flags & Client.FLAG_DEBUG) != 0:
sys.stderr.write('cmd' + os.linesep)
sys.stderr.write(hex_dump(cmd) + os.linesep)
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index d4973be53493..728be7c69b76 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -288,3 +288,16 @@ class SpaceTest(unittest.TestCase):
self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE |
tpm2.TSS2_RESMGR_TPM_RC_LAYER)
+
+class AsyncTest(unittest.TestCase):
+ def setUp(self):
+ logging.basicConfig(filename='AsyncTest.log', level=logging.DEBUG)
+
+ def test_async(self):
+ log = logging.getLogger(__name__)
+ log.debug(sys._getframe().f_code.co_name)
+
+ async_client = tpm2.Client(tpm2.Client.FLAG_NONBLOCK)
+ log.debug("Calling get_cap in a NON_BLOCKING mode")
+ async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
+ async_client.close()
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 9534dc2bc929..7f9a8a8c31da 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
@@ -16,8 +18,11 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
TEST_GEN_FILES += va_128TBswitch
TEST_GEN_FILES += virtual_address_range
+endif
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 1c0d76cb5adf..93b90a9b1eeb 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -1,2 +1,3 @@
CONFIG_SYSVIPC=y
CONFIG_USERFAULTFD=y
+CONFIG_TEST_VMALLOC=m
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index c0534e298b51..389327e9b30a 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -18,6 +18,9 @@
#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01 /* check pte is writable */
+
struct gup_benchmark {
__u64 get_delta_usec;
__u64 put_delta_usec;
@@ -37,7 +40,7 @@ int main(int argc, char **argv)
char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -71,7 +74,7 @@ int main(int argc, char **argv)
flags |= MAP_SHARED;
break;
case 'H':
- flags |= MAP_HUGETLB;
+ flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
default:
return -1;
@@ -85,7 +88,8 @@ int main(int argc, char **argv)
}
gup.nr_pages_per_call = nr_pages;
- gup.flags = write;
+ if (write)
+ gup.flags |= FOLL_WRITE;
fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
if (fd == -1)
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 951c507a27f7..a692ea828317 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -58,6 +58,14 @@ else
exit 1
fi
+#filter 64bit architectures
+ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64"
+if [ -z $ARCH ]; then
+ ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
+fi
+VADDR64=0
+echo "$ARCH64STR" | grep $ARCH && VADDR64=1
+
mkdir $mnt
mount -t hugetlbfs none $mnt
@@ -189,6 +197,7 @@ else
echo "[PASS]"
fi
+if [ $VADDR64 -ne 0 ]; then
echo "-----------------------------"
echo "running virtual_address_range"
echo "-----------------------------"
@@ -210,6 +219,7 @@ if [ $? -ne 0 ]; then
else
echo "[PASS]"
fi
+fi # VADDR64
echo "------------------------------------"
echo "running vmalloc stability smoke test"
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index c2333c78cf04..f45e510500c0 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
int fd;
const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NL";
+static const char sopts[] = "bdehp:t:Tn:NLf:i";
static const struct option lopts[] = {
{"bootstatus", no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
@@ -31,6 +31,8 @@ static const struct option lopts[] = {
{"pretimeout", required_argument, NULL, 'n'},
{"getpretimeout", no_argument, NULL, 'N'},
{"gettimeleft", no_argument, NULL, 'L'},
+ {"file", required_argument, NULL, 'f'},
+ {"info", no_argument, NULL, 'i'},
{NULL, no_argument, NULL, 0x0}
};
@@ -69,16 +71,20 @@ static void term(int sig)
static void usage(char *progname)
{
printf("Usage: %s [options]\n", progname);
- printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n");
- printf(" -d, --disable Turn off the watchdog timer\n");
- printf(" -e, --enable Turn on the watchdog timer\n");
- printf(" -h, --help Print the help message\n");
- printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
- printf(" -t, --timeout=T Set timeout to T seconds\n");
- printf(" -T, --gettimeout Get the timeout\n");
- printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n");
- printf(" -N, --getpretimeout Get the pretimeout\n");
- printf(" -L, --gettimeleft Get the time left until timer expires\n");
+ printf(" -f, --file\t\tOpen watchdog device file\n");
+ printf("\t\t\tDefault is /dev/watchdog\n");
+ printf(" -i, --info\t\tShow watchdog_info\n");
+ printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
+ printf(" -d, --disable\t\tTurn off the watchdog timer\n");
+ printf(" -e, --enable\t\tTurn on the watchdog timer\n");
+ printf(" -h, --help\t\tPrint the help message\n");
+ printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n",
+ DEFAULT_PING_RATE);
+ printf(" -t, --timeout=T\tSet timeout to T seconds\n");
+ printf(" -T, --gettimeout\tGet the timeout\n");
+ printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n");
+ printf(" -N, --getpretimeout\tGet the pretimeout\n");
+ printf(" -L, --gettimeleft\tGet the time left until timer expires\n");
printf("\n");
printf("Parameters are parsed left-to-right in real-time.\n");
printf("Example: %s -d -t 10 -p 5 -e\n", progname);
@@ -92,14 +98,21 @@ int main(int argc, char *argv[])
int ret;
int c;
int oneshot = 0;
+ char *file = "/dev/watchdog";
+ struct watchdog_info info;
setbuf(stdout, NULL);
- fd = open("/dev/watchdog", O_WRONLY);
+ while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+ if (c == 'f')
+ file = optarg;
+ }
+
+ fd = open(file, O_WRONLY);
if (fd == -1) {
if (errno == ENOENT)
- printf("Watchdog device not enabled.\n");
+ printf("Watchdog device (%s) not found.\n", file);
else if (errno == EACCES)
printf("Run watchdog as root.\n");
else
@@ -108,6 +121,18 @@ int main(int argc, char *argv[])
exit(-1);
}
+ /*
+ * Validate that `file` is a watchdog device
+ */
+ ret = ioctl(fd, WDIOC_GETSUPPORT, &info);
+ if (ret) {
+ printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno));
+ close(fd);
+ exit(ret);
+ }
+
+ optind = 0;
+
while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
switch (c) {
case 'b':
@@ -190,6 +215,21 @@ int main(int argc, char *argv[])
else
printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno));
break;
+ case 'f':
+ /* Handled above */
+ break;
+ case 'i':
+ /*
+ * watchdog_info was obtained as part of file open
+ * validation. So we just show it here.
+ */
+ oneshot = 1;
+ printf("watchdog_info:\n");
+ printf(" identity:\t\t%s\n", info.identity);
+ printf(" firmware_version:\t%u\n",
+ info.firmware_version);
+ printf(" options:\t\t%08x\n", info.options);
+ break;
default:
usage(argv[0]);
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
new file mode 100755
index 000000000000..138d46b3f330
--- /dev/null
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -0,0 +1,550 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+#
+# This script tests the below topology:
+#
+# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│
+# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││
+# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│
+# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││
+# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││
+# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│
+# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘
+# └──────────────────────────────────┘
+#
+# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the
+# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0
+# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
+# details on how this is accomplished.
+set -e
+
+exec 3>&1
+export LANG=C
+export WG_HIDE_KEYS=never
+netns0="wg-test-$$-0"
+netns1="wg-test-$$-1"
+netns2="wg-test-$$-2"
+pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; }
+pp() { pretty "" "$*"; "$@"; }
+maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; }
+n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; }
+n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; }
+n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
+ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
+ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
+ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
+sleep() { read -t "$1" -N 1 || true; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
+
+cleanup() {
+ set +e
+ exec 2>/dev/null
+ printf "$orig_message_cost" > /proc/sys/net/core/message_cost
+ ip0 link del dev wg0
+ ip1 link del dev wg0
+ ip2 link del dev wg0
+ local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
+ [[ -n $to_kill ]] && kill $to_kill
+ pp ip netns del $netns1
+ pp ip netns del $netns2
+ pp ip netns del $netns0
+ exit
+}
+
+orig_message_cost="$(< /proc/sys/net/core/message_cost)"
+trap cleanup EXIT
+printf 0 > /proc/sys/net/core/message_cost
+
+ip netns del $netns0 2>/dev/null || true
+ip netns del $netns1 2>/dev/null || true
+ip netns del $netns2 2>/dev/null || true
+pp ip netns add $netns0
+pp ip netns add $netns1
+pp ip netns add $netns2
+ip0 link set up dev lo
+
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns1
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns2
+key1="$(pp wg genkey)"
+key2="$(pp wg genkey)"
+key3="$(pp wg genkey)"
+pub1="$(pp wg pubkey <<<"$key1")"
+pub2="$(pp wg pubkey <<<"$key2")"
+pub3="$(pp wg pubkey <<<"$key3")"
+psk="$(pp wg genpsk)"
+[[ -n $key1 && -n $key2 && -n $psk ]]
+
+configure_peers() {
+ ip1 addr add 192.168.241.1/24 dev wg0
+ ip1 addr add fd00::1/24 dev wg0
+
+ ip2 addr add 192.168.241.2/24 dev wg0
+ ip2 addr add fd00::2/24 dev wg0
+
+ n1 wg set wg0 \
+ private-key <(echo "$key1") \
+ listen-port 1 \
+ peer "$pub2" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.2/32,fd00::2/128
+ n2 wg set wg0 \
+ private-key <(echo "$key2") \
+ listen-port 2 \
+ peer "$pub1" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.1/32,fd00::1/128
+
+ ip1 link set up dev wg0
+ ip2 link set up dev wg0
+}
+configure_peers
+
+tests() {
+ # Ping over IPv4
+ n2 ping -c 10 -f -W 1 192.168.241.1
+ n1 ping -c 10 -f -W 1 192.168.241.2
+
+ # Ping over IPv6
+ n2 ping6 -c 10 -f -W 1 fd00::1
+ n1 ping6 -c 10 -f -W 1 fd00::2
+
+ # TCP over IPv4
+ n2 iperf3 -s -1 -B 192.168.241.2 &
+ waitiperf $netns2 $!
+ n1 iperf3 -Z -t 3 -c 192.168.241.2
+
+ # TCP over IPv6
+ n1 iperf3 -s -1 -B fd00::1 &
+ waitiperf $netns1 $!
+ n2 iperf3 -Z -t 3 -c fd00::1
+
+ # UDP over IPv4
+ n1 iperf3 -s -1 -B 192.168.241.1 &
+ waitiperf $netns1 $!
+ n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
+
+ # UDP over IPv6
+ n2 iperf3 -s -1 -B fd00::2 &
+ waitiperf $netns2 $!
+ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+}
+
+[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
+big_mtu=$(( 34816 - 1500 + $orig_mtu ))
+
+# Test using IPv4 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+# Before calling tests, we first make sure that the stats counters and timestamper are working
+n2 ping -c 10 -f -W 1 192.168.241.1
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ timestamp < <(n1 wg show wg0 latest-handshakes)
+(( timestamp != 0 ))
+
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv6 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n2 wg set wg0 peer "$pub1" endpoint [::1]:1
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+# Test that route MTUs work with the padding
+ip1 link set wg0 mtu 1300
+ip2 link set wg0 mtu 1300
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+n0 iptables -A INPUT -m length --length 1360 -j DROP
+n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299
+n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299
+n2 ping -c 1 -W 1 -s 1269 192.168.241.1
+n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299
+n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299
+n0 iptables -F INPUT
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv4 that roaming works
+ip0 -4 addr del 127.0.0.1/8 dev lo
+ip0 -4 addr add 127.212.121.99/8 dev lo
+n1 wg set wg0 listen-port 9999
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n1 ping6 -W 1 -c 1 fd00::2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]]
+
+# Test using IPv6 that roaming works
+n1 wg set wg0 listen-port 9998
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]]
+
+# Test that crypto-RP filter works
+n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1 $ncat_pid
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
+kill $ncat_pid
+more_specific_key="$(pp wg genkey | pp wg pubkey)"
+n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
+n2 wg set wg0 listen-port 9997
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1 $ncat_pid
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+! read -r -N 1 -t 1 out <&4 || false
+kill $ncat_pid
+n1 wg set wg0 peer "$more_specific_key" remove
+[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]]
+
+# Test that we can change private keys keys and immediately handshake
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 private-key <(echo "$key3")
+n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
+n1 ping -W 1 -c 1 192.168.241.2
+
+ip1 link del wg0
+ip2 link del wg0
+
+# Test using NAT. We now change the topology to this:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+
+ip0 link add vethrc type veth peer name vethc
+ip0 link add vethrs type veth peer name veths
+ip0 link set vethc netns $netns1
+ip0 link set veths netns $netns2
+ip0 link set vethrc up
+ip0 link set vethrs up
+ip0 addr add 192.168.1.1/24 dev vethrc
+ip0 addr add 10.0.0.1/24 dev vethrs
+ip1 addr add 192.168.1.100/24 dev vethc
+ip1 link set vethc up
+ip1 route add default via 192.168.1.1
+ip2 addr add 10.0.0.100/24 dev veths
+ip2 link set veths up
+waitiface $netns0 vethrc
+waitiface $netns0 vethrs
+waitiface $netns1 vethc
+waitiface $netns2 veths
+
+n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream'
+n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1
+
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1
+n1 ping -W 1 -c 1 192.168.241.2
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`).
+pp sleep 3
+n2 ping -W 1 -c 1 192.168.241.1
+n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+
+# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
+ip1 -6 addr add fc00::9/96 dev vethc
+ip1 -6 route add default via fc00::1
+ip2 -4 addr add 192.168.99.7/32 dev wg0
+ip2 -6 addr add abab::1111/128 dev wg0
+n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111
+ip1 -6 route add default dev wg0 table 51820
+ip1 -6 rule add not fwmark 51820 table 51820
+ip1 -6 rule add table main suppress_prefixlength 0
+ip1 -4 route add default dev wg0 table 51820
+ip1 -4 rule add not fwmark 51820 table 51820
+ip1 -4 rule add table main suppress_prefixlength 0
+# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
+n1 ping -W 1 -c 100 -f 192.168.99.7
+n1 ping -W 1 -c 100 -f abab::1111
+
+# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route.
+n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2
+n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit.
+n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+ip0 -4 route add 192.168.241.1 via 10.0.0.100
+n2 wg set wg0 peer "$pub1" remove
+[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]]
+
+n0 iptables -t nat -F
+n0 iptables -t filter -F
+n2 iptables -t nat -F
+ip0 link del vethrc
+ip0 link del vethrs
+ip1 link del wg0
+ip2 link del wg0
+
+# Test that saddr routing is sticky but not too sticky, changing to this topology:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns2 namespace │
+# │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad'
+n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries'
+
+# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip2 addr add fd00:aa::2/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add 10.0.0.10/24 dev veth1
+ip1 addr del 10.0.0.1/24 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add fd00:aa::10/96 dev veth1
+ip1 addr del fd00:aa::1/96 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+
+# Now we show that we can successfully do reply to sender routing
+ip1 link set veth1 down
+ip2 link set veth2 down
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add 10.0.0.2/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip1 addr add fd00:aa::2/96 dev veth1
+ip2 addr add 10.0.0.3/24 dev veth2
+ip2 addr add fd00:aa::3/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]]
+
+# What happens if the inbound destination address belongs to a different interface as the default route?
+ip1 link add dummy0 type dummy
+ip1 addr add 10.50.0.1/24 dev dummy0
+ip1 link set dummy0 up
+ip2 route add 10.50.0.0/24 dev veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]]
+
+ip1 link del dummy0
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 route flush dev veth1
+ip2 route flush dev veth2
+
+# Now we see what happens if another interface route takes precedence over an ongoing one
+ip1 link add veth3 type veth peer name veth4
+ip1 link set veth4 netns $netns2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip1 addr add 10.0.0.3/24 dev veth3
+ip1 link set veth1 up
+ip2 link set veth2 up
+ip1 link set veth3 up
+ip2 link set veth4 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+waitiface $netns1 veth3
+waitiface $netns2 veth4
+ip1 route flush dev veth1
+ip1 route flush dev veth3
+ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
+
+ip1 link del veth1
+ip1 link del veth3
+ip1 link del wg0
+ip2 link del wg0
+
+# We test that Netlink/IPC is working properly by doing things that usually cause split responses
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" )
+for a in {1..255}; do
+ for b in {0..255}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+for ip in $(n0 wg show wg0 allowed-ips); do
+ ((++i))
+done
+((i == 255*256*2+1))
+ip0 link del wg0
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" )
+for a in {1..40}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+ for b in {1..52}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+while read -r line; do
+ j=0
+ for ip in $line; do
+ ((++j))
+ done
+ ((j == 53))
+ ((++i))
+done < <(n0 wg show wg0 allowed-ips)
+((i == 40))
+ip0 link del wg0
+ip0 link add wg0 type wireguard
+config=( )
+for i in {1..29}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+done
+config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" )
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+n0 wg showconf wg0 > /dev/null
+ip0 link del wg0
+
+allowedips=( )
+for i in {1..197}; do
+ allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1"
+n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
+{
+ read -r pub allowedips
+ [[ $pub == "$pub1" && $allowedips == "(none)" ]]
+ read -r pub allowedips
+ [[ $pub == "$pub2" ]]
+ i=0
+ for _ in $allowedips; do
+ ((++i))
+ done
+ ((i == 197))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
+! n0 wg show doesnotexist || false
+
+ip0 link add wg0 type wireguard
+n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk")
+[[ $(n0 wg show wg0 private-key) == "$key1" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]]
+n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null
+[[ $(n0 wg show wg0 private-key) == "(none)" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]]
+n0 wg set wg0 peer "$pub2"
+n0 wg set wg0 private-key <(echo "$key2")
+[[ $(n0 wg show wg0 public-key) == "$pub2" ]]
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 peer "$pub2"
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 private-key <(echo "$key1")
+n0 wg set wg0 peer "$pub2"
+[[ $(n0 wg show wg0 peers) == "$pub2" ]]
+n0 wg set wg0 private-key <(echo "/${key1:1}")
+[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]]
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0
+n0 wg set wg0 peer "$pub2" remove
+low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
+n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
+[[ -z $(n0 wg show wg0 peers) ]]
+ip0 link del wg0
+
+declare -A objects
+while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
+ objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
+done < /dev/kmsg
+alldeleted=1
+for object in "${!objects[@]}"; do
+ if [[ ${objects["$object"]} != *createddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
+done
+[[ $alldeleted -eq 1 ]]
+pretty "" "Objects that were created were also destroyed."
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
new file mode 100644
index 000000000000..415b542a9d59
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -0,0 +1,2 @@
+build/
+distfiles/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
new file mode 100644
index 000000000000..f10aa3590adc
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -0,0 +1,387 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+
+PWD := $(shell pwd)
+
+CHOST := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
+ifneq (,$(ARCH))
+CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
+ifeq (,$(CBUILD))
+$(error The toolchain for $(ARCH) is not installed)
+endif
+else
+CBUILD := $(CHOST)
+ARCH := $(firstword $(subst -, ,$(CBUILD)))
+endif
+
+# Set these from the environment to override
+KERNEL_PATH ?= $(PWD)/../../../../..
+BUILD_PATH ?= $(PWD)/build/$(ARCH)
+DISTFILES_PATH ?= $(PWD)/distfiles
+NR_CPUS ?= 4
+
+MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+
+default: qemu
+
+# variable name, tarball project name, version, tarball extension, default URI base
+define tar_download =
+$(1)_VERSION := $(3)
+$(1)_NAME := $(2)-$$($(1)_VERSION)
+$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4)
+$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME)
+$(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
+endef
+
+define file_download =
+$(DISTFILES_PATH)/$(1):
+ mkdir -p $(DISTFILES_PATH)
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
+ if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
+endef
+
+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
+$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
+$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
+
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
+export CFLAGS ?= -O3 -pipe
+export LDFLAGS ?=
+export CPPFLAGS := -I$(BUILD_PATH)/include
+
+ifeq ($(HOST_ARCH),$(ARCH))
+CROSS_COMPILE_FLAG := --host=$(CHOST)
+CFLAGS += -march=native
+STRIP := strip
+else
+$(info Cross compilation: building for $(CBUILD) using $(CHOST))
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+export CROSS_COMPILE=$(CBUILD)-
+STRIP := $(CBUILD)-strip
+endif
+ifeq ($(ARCH),aarch64)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),aarch64_be)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),arm)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+endif
+else ifeq ($(ARCH),armeb)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+LDFLAGS += -Wl,--be8
+endif
+else ifeq ($(ARCH),x86_64)
+QEMU_ARCH := x86_64
+KERNEL_ARCH := x86_64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu Skylake-Server -machine q35
+CFLAGS += -march=skylake-avx512
+endif
+else ifeq ($(ARCH),i686)
+QEMU_ARCH := i386
+KERNEL_ARCH := x86
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu coreduo -machine q35
+CFLAGS += -march=prescott
+endif
+else ifeq ($(ARCH),mips64)
+QEMU_ARCH := mips64
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EB
+endif
+else ifeq ($(ARCH),mips64el)
+QEMU_ARCH := mips64el
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EL
+endif
+else ifeq ($(ARCH),mips)
+QEMU_ARCH := mips
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EB
+endif
+else ifeq ($(ARCH),mipsel)
+QEMU_ARCH := mipsel
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EL
+endif
+else ifeq ($(ARCH),powerpc64le)
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
+CFLAGS += -mcpu=powerpc64le -mlong-double-64
+else ifeq ($(ARCH),powerpc)
+QEMU_ARCH := ppc
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
+else
+QEMU_MACHINE := -machine ppce500
+endif
+CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
+else ifeq ($(ARCH),m68k)
+QEMU_ARCH := m68k
+KERNEL_ARCH := m68k
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+endif
+else
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+endif
+
+REAL_CC := $(CBUILD)-gcc
+MUSL_CC := $(BUILD_PATH)/musl-gcc
+export CC := $(MUSL_CC)
+USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+
+build: $(KERNEL_BZIMAGE)
+qemu: $(KERNEL_BZIMAGE)
+ rm -f $(BUILD_PATH)/result
+ timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
+ -nodefaults \
+ -nographic \
+ -smp $(NR_CPUS) \
+ $(QEMU_MACHINE) \
+ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ -serial stdio \
+ -serial file:$(BUILD_PATH)/result \
+ -no-reboot \
+ -monitor none \
+ -kernel $<
+ grep -Fq success $(BUILD_PATH)/result
+
+$(BUILD_PATH)/init-cpio-spec.txt:
+ mkdir -p $(BUILD_PATH)
+ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
+ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
+ echo "dir /dev 755 0 0" >> $@
+ echo "nod /dev/console 644 0 0 c 5 1" >> $@
+ echo "dir /bin 755 0 0" >> $@
+ echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@
+ echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
+ echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
+ echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
+ echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
+ echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
+ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@
+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
+ echo "slink /bin/ping6 ping 777 0 0" >> $@
+ echo "dir /lib 755 0 0" >> $@
+ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
+ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+
+$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+ mkdir -p $(KERNEL_BUILD_PATH)
+ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
+ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
+ cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig
+ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
+ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
+
+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+
+$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+ touch $@
+
+$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
+ $(MAKE) -C $(MUSL_PATH)
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
+ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ touch $@
+
+$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
+ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
+ chmod +x $(BUILD_PATH)/musl-gcc
+
+$(IPERF_PATH)/.installed: $(IPERF_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h
+ sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile*
+ touch $@
+
+$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
+ $(MAKE) -C $(IPERF_PATH)
+ $(STRIP) -s $@
+
+$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
+ $(MAKE) -C $(LIBMNL_PATH)
+ sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
+
+$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
+ mkdir -p $(BUILD_PATH)
+ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(STRIP) -s $@
+
+$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
+ sed -i /atexit/d $(IPUTILS_PATH)/ping.c
+ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS)
+ $(STRIP) -s $@
+
+$(BASH_PATH)/.installed: $(BASH_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ $(MAKE) -C $(BASH_PATH)
+ $(STRIP) -s $@
+
+$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ touch $@
+
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+
+$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
+ touch $@
+
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+ $(MAKE) -C $(IPTABLES_PATH)
+ $(STRIP) -s $@
+
+$(NMAP_PATH)/.installed: $(NMAP_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh
+ $(MAKE) -C $(NMAP_PATH)/libpcap
+ $(MAKE) -C $(NMAP_PATH)/ncat
+ $(STRIP) -s $@
+
+clean:
+ rm -rf $(BUILD_PATH)
+
+distclean: clean
+ rm -rf $(DISTFILES_PATH)
+
+menuconfig: $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
+
+.PHONY: qemu build clean distclean menuconfig
+.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
new file mode 100644
index 000000000000..3d063bb247bb
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
new file mode 100644
index 000000000000..dbdc7e406a7b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -0,0 +1,6 @@
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
new file mode 100644
index 000000000000..148f49905418
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
new file mode 100644
index 000000000000..bd76b07d00a2
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -0,0 +1,10 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
new file mode 100644
index 000000000000..a85025d7206e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
new file mode 100644
index 000000000000..62a15bdb877e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_M68KCLASSIC=y
+CONFIG_M68040=y
+CONFIG_MAC=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
new file mode 100644
index 000000000000..df71d6b95546
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -0,0 +1,11 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
new file mode 100644
index 000000000000..90c783f725c4
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -0,0 +1,14 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
new file mode 100644
index 000000000000..435b0b43e00c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -0,0 +1,15 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
new file mode 100644
index 000000000000..62bb50c4a85f
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -0,0 +1,12 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
new file mode 100644
index 000000000000..57957093b71b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -0,0 +1,10 @@
+CONFIG_PPC_QEMU_E500=y
+CONFIG_FSL_SOC_BOOKE=y
+CONFIG_PPC_85xx=y
+CONFIG_PHYS_64BIT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_MATH_EMULATION=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
new file mode 100644
index 000000000000..990c510a9cfa
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -0,0 +1,12 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
new file mode 100644
index 000000000000..00a1ef4869d5
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
new file mode 100644
index 000000000000..5909e7ef2a5c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -0,0 +1,66 @@
+CONFIG_LOCALVERSION="-debug"
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_POINTER=y
+CONFIG_STACK_VALIDATION=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_POISONING=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_KASAN=y
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_SANITIZE_ALL=y
+CONFIG_UBSAN_NO_ALIGNMENT=y
+CONFIG_UBSAN_NULL=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_PROVE_RCU=y
+CONFIG_SPARSE_RCU_POINTER=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
+CONFIG_RCU_TRACE=y
+CONFIG_RCU_EQS_DEBUG=y
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DOUBLEFAULT=y
+CONFIG_X86_DEBUG_FPU=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
new file mode 100644
index 000000000000..90bc9813cadc
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/io.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/utsname.h>
+#include <sys/sendfile.h>
+#include <sys/sysmacros.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+__attribute__((noreturn)) static void poweroff(void)
+{
+ fflush(stdout);
+ fflush(stderr);
+ reboot(RB_AUTOBOOT);
+ sleep(30);
+ fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n");
+ exit(1);
+}
+
+static void panic(const char *what)
+{
+ fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno));
+ poweroff();
+}
+
+#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m")
+
+static void print_banner(void)
+{
+ struct utsname utsname;
+ int len;
+
+ if (uname(&utsname) < 0)
+ panic("uname");
+
+ len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine);
+ printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, "");
+}
+
+static void seed_rng(void)
+{
+ int fd;
+ struct {
+ int entropy_count;
+ int buffer_size;
+ unsigned char buffer[256];
+ } entropy = {
+ .entropy_count = sizeof(entropy.buffer) * 8,
+ .buffer_size = sizeof(entropy.buffer),
+ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
+ };
+
+ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
+ panic("mknod(/dev/urandom)");
+ fd = open("/dev/urandom", O_WRONLY);
+ if (fd < 0)
+ panic("open(urandom)");
+ for (int i = 0; i < 256; ++i) {
+ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
+ panic("ioctl(urandom)");
+ }
+ close(fd);
+}
+
+static void mount_filesystems(void)
+{
+ pretty_message("[+] Mounting filesystems...");
+ mkdir("/dev", 0755);
+ mkdir("/proc", 0755);
+ mkdir("/sys", 0755);
+ mkdir("/tmp", 0755);
+ mkdir("/run", 0755);
+ mkdir("/var", 0755);
+ if (mount("none", "/dev", "devtmpfs", 0, NULL))
+ panic("devtmpfs mount");
+ if (mount("none", "/proc", "proc", 0, NULL))
+ panic("procfs mount");
+ if (mount("none", "/sys", "sysfs", 0, NULL))
+ panic("sysfs mount");
+ if (mount("none", "/tmp", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/run", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL))
+ ; /* Not a problem if it fails.*/
+ if (symlink("/run", "/var/run"))
+ panic("run symlink");
+ if (symlink("/proc/self/fd", "/dev/fd"))
+ panic("fd symlink");
+}
+
+static void enable_logging(void)
+{
+ int fd;
+ pretty_message("[+] Enabling logging...");
+ fd = open("/proc/sys/kernel/printk", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "9\n", 2) != 2)
+ panic("write(printk)");
+ close(fd);
+ }
+ fd = open("/proc/sys/debug/exception-trace", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(exception-trace)");
+ close(fd);
+ }
+ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(panic_on_warn)");
+ close(fd);
+ }
+}
+
+static void kmod_selftests(void)
+{
+ FILE *file;
+ char line[2048], *start, *pass;
+ bool success = true;
+ pretty_message("[+] Module self-tests:");
+ file = fopen("/proc/kmsg", "r");
+ if (!file)
+ panic("fopen(kmsg)");
+ if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0)
+ panic("fcntl(kmsg, nonblock)");
+ while (fgets(line, sizeof(line), file)) {
+ start = strstr(line, "wireguard: ");
+ if (!start)
+ continue;
+ start += 11;
+ *strchrnul(start, '\n') = '\0';
+ if (strstr(start, "www.wireguard.com"))
+ break;
+ pass = strstr(start, ": pass");
+ if (!pass || pass[6] != '\0') {
+ success = false;
+ printf(" \x1b[31m* %s\x1b[0m\n", start);
+ } else
+ printf(" \x1b[32m* %s\x1b[0m\n", start);
+ }
+ fclose(file);
+ if (!success) {
+ puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m");
+ poweroff();
+ }
+}
+
+static void launch_tests(void)
+{
+ char cmdline[4096], *success_dev;
+ int status, fd;
+ pid_t pid;
+
+ pretty_message("[+] Launching tests...");
+ pid = fork();
+ if (pid == -1)
+ panic("fork");
+ else if (pid == 0) {
+ execl("/init.sh", "init", NULL);
+ panic("exec");
+ }
+ if (waitpid(pid, &status, 0) < 0)
+ panic("waitpid");
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ pretty_message("[+] Tests successful! :-)");
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0)
+ panic("open(/proc/cmdline)");
+ if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0)
+ panic("read(/proc/cmdline)");
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) {
+ if (strncmp(success_dev, "wg.success=", 11))
+ continue;
+ memcpy(success_dev + 11 - 5, "/dev/", 5);
+ success_dev += 11 - 5;
+ break;
+ }
+ if (!success_dev || !strlen(success_dev))
+ panic("Unable to find success device");
+
+ fd = open(success_dev, O_WRONLY);
+ if (fd < 0)
+ panic("open(success_dev)");
+ if (write(fd, "success\n", 8) != 8)
+ panic("write(success_dev)");
+ close(fd);
+ } else {
+ const char *why = "unknown cause";
+ int what = -1;
+
+ if (WIFEXITED(status)) {
+ why = "exit code";
+ what = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ why = "signal";
+ what = WTERMSIG(status);
+ }
+ printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what);
+ }
+}
+
+static void ensure_console(void)
+{
+ for (unsigned int i = 0; i < 1000; ++i) {
+ int fd = open("/dev/console", O_RDWR);
+ if (fd < 0) {
+ usleep(50000);
+ continue;
+ }
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ close(fd);
+ if (write(1, "\0\0\0\0\n", 5) == 5)
+ return;
+ }
+ panic("Unable to open console device");
+}
+
+static void clear_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Starting memory leak detection...");
+ write(fd, "clear\n", 5);
+ close(fd);
+}
+
+static void check_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Scanning for memory leaks...");
+ sleep(2); /* Wait for any grace periods. */
+ write(fd, "scan\n", 5);
+ close(fd);
+
+ fd = open("/sys/kernel/debug/kmemleak", O_RDONLY);
+ if (fd < 0)
+ return;
+ if (sendfile(1, fd, NULL, 0x7ffff000) > 0)
+ panic("Memory leaks encountered");
+ close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+ seed_rng();
+ ensure_console();
+ print_banner();
+ mount_filesystems();
+ kmod_selftests();
+ enable_logging();
+ clear_leaks();
+ launch_tests();
+ check_leaks();
+ poweroff();
+ return 1;
+}
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
new file mode 100644
index 000000000000..af9323a0b6e0
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -0,0 +1,88 @@
+CONFIG_LOCALVERSION=""
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_CORE=y
+CONFIG_NET_IPIP=y
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+CONFIG_MULTIUSER=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_NS=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_NAT=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_NAT=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_NAT_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_TTY=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_VDSO=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_PRINTK=y
+CONFIG_KALLSYMS=y
+CONFIG_BUG=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+CONFIG_JUMP_LABEL=y
+CONFIG_EMBEDDED=n
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_SHMEM=y
+CONFIG_SLUB=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_SMP=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+CONFIG_NUMA=y
+CONFIG_PREEMPT=y
+CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_ARCH_RANDOM=y
+CONFIG_FILE_LOCKING=y
+CONFIG_POSIX_TIMERS=y
+CONFIG_DEVTMPFS=y
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_PRINTK_TIME=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_KERNEL_GZIP=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_PANIC_TIMEOUT=-1
+CONFIG_STACKTRACE=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_WIREGUARD=y
+CONFIG_WIREGUARD_DEBUG=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index fa07d526fe39..5d49bfec1e9a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,13 +11,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
- check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
+ check_initial_reg_state sigreturn iopl ioperm \
protection_keys test_vdso test_vsyscall mov_ss_trap \
syscall_arg_fault
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
index 01de41c1b725..57ec5e99edb9 100644
--- a/tools/testing/selftests/x86/ioperm.c
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -131,6 +131,17 @@ int main(void)
printf("[RUN]\tchild: check that we inherited permissions\n");
expect_ok(0x80);
expect_gp(0xed);
+ printf("[RUN]\tchild: Extend permissions to 0x81\n");
+ if (ioperm(0x81, 1, 1) != 0) {
+ printf("[FAIL]\tioperm(0x81, 1, 1) failed (%d)", errno);
+ return 1;
+ }
+ printf("[RUN]\tchild: Drop permissions to 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ expect_gp(0x80);
return 0;
} else {
int status;
@@ -146,8 +157,11 @@ int main(void)
}
}
- /* Test the capability checks. */
+ /* Verify that the child dropping 0x80 did not affect the parent */
+ printf("\tVerify that unsharing the bitmap worked\n");
+ expect_ok(0x80);
+ /* Test the capability checks. */
printf("\tDrop privileges\n");
if (setresuid(1, 1, 1) != 0) {
printf("[WARN]\tDropping privileges failed\n");
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index 6aa27f34644c..bab2f6e06b63 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -35,6 +35,16 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
}
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
static jmp_buf jmpbuf;
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
@@ -42,25 +52,128 @@ static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
siglongjmp(jmpbuf, 1);
}
+static bool try_outb(unsigned short port)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("outb %%al, %w[port]"
+ : : [port] "Nd" (port), "a" (0));
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_ok_outb(unsigned short port)
+{
+ if (!try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp_outb(unsigned short port)
+{
+ if (try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+ nerrs++;
+ }
+
+ printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+static bool try_cli(void)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("cli");
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static bool try_sti(void)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("sti");
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_gp_sti(void)
+{
+ if (try_sti()) {
+ printf("[FAIL]\tSTI worked\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tSTI faulted\n");
+ }
+}
+
+static void expect_gp_cli(void)
+{
+ if (try_cli()) {
+ printf("[FAIL]\tCLI worked\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tCLI faulted\n");
+ }
+}
+
int main(void)
{
cpu_set_t cpuset;
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
err(1, "sched_setaffinity to CPU 0");
/* Probe for iopl support. Note that iopl(0) works even as nonroot. */
- if (iopl(3) != 0) {
+ switch(iopl(3)) {
+ case 0:
+ break;
+ case -ENOSYS:
+ printf("[OK]\tiopl() nor supported\n");
+ return 0;
+ default:
printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
errno);
return 0;
}
- /* Restore our original state prior to starting the test. */
+ /* Make sure that CLI/STI are blocked even with IOPL level 3 */
+ expect_gp_cli();
+ expect_gp_sti();
+ expect_ok_outb(0x80);
+
+ /* Establish an I/O bitmap to test the restore */
+ if (ioperm(0x80, 1, 1) != 0)
+ err(1, "ioperm(0x80, 1, 1) failed\n");
+
+ /* Restore our original state prior to starting the fork test. */
if (iopl(0) != 0)
err(1, "iopl(0)");
+ /*
+ * Verify that IOPL emulation is disabled and the I/O bitmap still
+ * works.
+ */
+ expect_ok_outb(0x80);
+ expect_gp_outb(0xed);
+ /* Drop the I/O bitmap */
+ if (ioperm(0x80, 1, 0) != 0)
+ err(1, "ioperm(0x80, 1, 0) failed\n");
+
pid_t child = fork();
if (child == -1)
err(1, "fork");
@@ -90,14 +203,9 @@ int main(void)
printf("[RUN]\tparent: write to 0x80 (should fail)\n");
- sethandler(SIGSEGV, sigsegv, 0);
- if (sigsetjmp(jmpbuf, 1) != 0) {
- printf("[OK]\twrite was denied\n");
- } else {
- asm volatile ("outb %%al, $0x80" : : "a" (0));
- printf("[FAIL]\twrite was allowed\n");
- nerrs++;
- }
+ expect_gp_outb(0x80);
+ expect_gp_cli();
+ expect_gp_sti();
/* Test the capability checks. */
printf("\tiopl(3)\n");
@@ -133,4 +241,3 @@ int main(void)
done:
return nerrs ? 1 : 0;
}
-
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 3c3a022654f3..6da0ac3f0135 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -257,7 +257,8 @@ int main()
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
- asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ /* Clear EBP first to make sure we segfault cleanly. */
+ asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
#ifdef __x86_64__
, "r11"
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
deleted file mode 100644
index 7546eba7f17a..000000000000
--- a/tools/testing/selftests/x86/mpx-debug.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_DEBUG_H
-#define _MPX_DEBUG_H
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-#define dprintf5(args...) dprintf_level(5, args)
-
-#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
deleted file mode 100644
index 880fbf676968..000000000000
--- a/tools/testing/selftests/x86/mpx-dig.c
+++ /dev/null
@@ -1,497 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Written by Dave Hansen <dave.hansen@intel.com>
- */
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <fcntl.h>
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-#include "mpx-hw.h"
-
-unsigned long bounds_dir_global;
-
-#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
-static void inline __mpx_dig_abort(const char *file, const char *func, int line)
-{
- fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
- printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
- abort();
-}
-
-/*
- * run like this (BDIR finds the probably bounds directory):
- *
- * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
- * | head -1 | awk -F- '{print $1}')";
- * ./mpx-dig $pid 0x$BDIR
- *
- * NOTE:
- * assumes that the only 2097152-kb VMA is the bounds dir
- */
-
-long nr_incore(void *ptr, unsigned long size_bytes)
-{
- int i;
- long ret = 0;
- long vec_len = size_bytes / PAGE_SIZE;
- unsigned char *vec = malloc(vec_len);
- int incore_ret;
-
- if (!vec)
- mpx_dig_abort();
-
- incore_ret = mincore(ptr, size_bytes, vec);
- if (incore_ret) {
- printf("mincore ret: %d\n", incore_ret);
- perror("mincore");
- mpx_dig_abort();
- }
- for (i = 0; i < vec_len; i++)
- ret += vec[i];
- free(vec);
- return ret;
-}
-
-int open_proc(int pid, char *file)
-{
- static char buf[100];
- int fd;
-
- snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
- fd = open(&buf[0], O_RDONLY);
- if (fd < 0)
- perror(buf);
-
- return fd;
-}
-
-struct vaddr_range {
- unsigned long start;
- unsigned long end;
-};
-struct vaddr_range *ranges;
-int nr_ranges_allocated;
-int nr_ranges_populated;
-int last_range = -1;
-
-int __pid_load_vaddrs(int pid)
-{
- int ret = 0;
- int proc_maps_fd = open_proc(pid, "maps");
- char linebuf[10000];
- unsigned long start;
- unsigned long end;
- char rest[1000];
- FILE *f = fdopen(proc_maps_fd, "r");
-
- if (!f)
- mpx_dig_abort();
- nr_ranges_populated = 0;
- while (!feof(f)) {
- char *readret = fgets(linebuf, sizeof(linebuf), f);
- int parsed;
-
- if (readret == NULL) {
- if (feof(f))
- break;
- mpx_dig_abort();
- }
-
- parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
- if (parsed != 3)
- mpx_dig_abort();
-
- dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
- if (nr_ranges_populated >= nr_ranges_allocated) {
- ret = -E2BIG;
- break;
- }
- ranges[nr_ranges_populated].start = start;
- ranges[nr_ranges_populated].end = end;
- nr_ranges_populated++;
- }
- last_range = -1;
- fclose(f);
- close(proc_maps_fd);
- return ret;
-}
-
-int pid_load_vaddrs(int pid)
-{
- int ret;
-
- dprintf2("%s(%d)\n", __func__, pid);
- if (!ranges) {
- nr_ranges_allocated = 4;
- ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
- dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
- nr_ranges_allocated, ranges);
- assert(ranges != NULL);
- }
- do {
- ret = __pid_load_vaddrs(pid);
- if (!ret)
- break;
- if (ret == -E2BIG) {
- dprintf2("%s(%d) need to realloc\n", __func__, pid);
- nr_ranges_allocated *= 2;
- ranges = realloc(ranges,
- nr_ranges_allocated * sizeof(ranges[0]));
- dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
- pid, nr_ranges_allocated, ranges);
- assert(ranges != NULL);
- dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
- }
- } while (1);
-
- dprintf2("%s(%d) done\n", __func__, pid);
-
- return ret;
-}
-
-static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
-{
- if (vaddr < r->start)
- return 0;
- if (vaddr >= r->end)
- return 0;
- return 1;
-}
-
-static inline int vaddr_mapped_by_range(unsigned long vaddr)
-{
- int i;
-
- if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
- return 1;
-
- for (i = 0; i < nr_ranges_populated; i++) {
- struct vaddr_range *r = &ranges[i];
-
- if (vaddr_in_range(vaddr, r))
- continue;
- last_range = i;
- return 1;
- }
- return 0;
-}
-
-const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
-
-void *read_bounds_table_into_buf(unsigned long table_vaddr)
-{
-#ifdef MPX_DIG_STANDALONE
- static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
- off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
- if (seek_ret != table_vaddr)
- mpx_dig_abort();
-
- int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
- if (read_ret != sizeof(bt_buf))
- mpx_dig_abort();
- return &bt_buf;
-#else
- return (void *)table_vaddr;
-#endif
-}
-
-int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
- unsigned long bde_vaddr)
-{
- unsigned long offset_inside_bt;
- int nr_entries = 0;
- int do_abort = 0;
- char *bt_buf;
-
- dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
- __func__, base_controlled_vaddr, bde_vaddr);
-
- bt_buf = read_bounds_table_into_buf(table_vaddr);
-
- dprintf4("%s() read done\n", __func__);
-
- for (offset_inside_bt = 0;
- offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
- offset_inside_bt += bt_entry_size_bytes) {
- unsigned long bt_entry_index;
- unsigned long bt_entry_controls;
- unsigned long this_bt_entry_for_vaddr;
- unsigned long *bt_entry_buf;
- int i;
-
- dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
- offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
- bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
- if (!bt_buf) {
- printf("null bt_buf\n");
- mpx_dig_abort();
- }
- if (!bt_entry_buf) {
- printf("null bt_entry_buf\n");
- mpx_dig_abort();
- }
- dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
- bt_entry_buf);
- if (!bt_entry_buf[0] &&
- !bt_entry_buf[1] &&
- !bt_entry_buf[2] &&
- !bt_entry_buf[3])
- continue;
-
- nr_entries++;
-
- bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
- bt_entry_controls = sizeof(void *);
- this_bt_entry_for_vaddr =
- base_controlled_vaddr + bt_entry_index*bt_entry_controls;
- /*
- * We sign extend vaddr bits 48->63 which effectively
- * creates a hole in the virtual address space.
- * This calculation corrects for the hole.
- */
- if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
- this_bt_entry_for_vaddr |= 0xffff800000000000;
-
- if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
- printf("bt_entry_buf: %p\n", bt_entry_buf);
- printf("there is a bte for %lx but no mapping\n",
- this_bt_entry_for_vaddr);
- printf(" bde vaddr: %016lx\n", bde_vaddr);
- printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
- printf(" table_vaddr: %016lx\n", table_vaddr);
- printf(" entry vaddr: %016lx @ offset %lx\n",
- table_vaddr + offset_inside_bt, offset_inside_bt);
- do_abort = 1;
- mpx_dig_abort();
- }
- if (DEBUG_LEVEL < 4)
- continue;
-
- printf("table entry[%lx]: ", offset_inside_bt);
- for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
- printf("0x%016lx ", bt_entry_buf[i]);
- printf("\n");
- }
- if (do_abort)
- mpx_dig_abort();
- dprintf4("%s() done\n", __func__);
- return nr_entries;
-}
-
-int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
- int *nr_populated_bdes)
-{
- unsigned long i;
- int total_entries = 0;
-
- dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
- len_bytes, bd_offset_bytes, buf + len_bytes);
-
- for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
- unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
- unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
- unsigned long bounds_dir_entry;
- unsigned long bd_for_vaddr;
- unsigned long bt_start;
- unsigned long bt_tail;
- int nr_entries;
-
- dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
- bounds_dir_entry_ptr);
-
- bounds_dir_entry = *bounds_dir_entry_ptr;
- if (!bounds_dir_entry) {
- dprintf4("no bounds dir at index 0x%lx / 0x%lx "
- "start at offset:%lx %lx\n", bd_index, bd_index,
- bd_offset_bytes, i);
- continue;
- }
- dprintf3("found bounds_dir_entry: 0x%lx @ "
- "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
- &buf[i]);
- /* mask off the enable bit: */
- bounds_dir_entry &= ~0x1;
- (*nr_populated_bdes)++;
- dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
- dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
-
- bt_start = bounds_dir_entry;
- bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
- if (!vaddr_mapped_by_range(bt_start)) {
- printf("bounds directory 0x%lx points to nowhere\n",
- bounds_dir_entry);
- mpx_dig_abort();
- }
- if (!vaddr_mapped_by_range(bt_tail)) {
- printf("bounds directory end 0x%lx points to nowhere\n",
- bt_tail);
- mpx_dig_abort();
- }
- /*
- * Each bounds directory entry controls 1MB of virtual address
- * space. This variable is the virtual address in the process
- * of the beginning of the area controlled by this bounds_dir.
- */
- bd_for_vaddr = bd_index * (1UL<<20);
-
- nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
- bounds_dir_global+bd_offset_bytes+i);
- total_entries += nr_entries;
- dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
- "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
- bd_index, buf+i,
- bounds_dir_entry, nr_entries, total_entries,
- bd_for_vaddr, bd_for_vaddr + (1UL<<20));
- }
- dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
- bd_offset_bytes);
- return total_entries;
-}
-
-int proc_pid_mem_fd = -1;
-
-void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
- long buffer_size_bytes, void *buffer)
-{
- unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
- int read_ret;
- off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
-
- if (seek_ret != seekto)
- mpx_dig_abort();
-
- read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
- /* there shouldn't practically be short reads of /proc/$pid/mem */
- if (read_ret != buffer_size_bytes)
- mpx_dig_abort();
-
- return buffer;
-}
-void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
- long buffer_size_bytes, void *buffer)
-
-{
- unsigned char vec[buffer_size_bytes / PAGE_SIZE];
- char *dig_bounds_dir_ptr =
- (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
- /*
- * use mincore() to quickly find the areas of the bounds directory
- * that have memory and thus will be worth scanning.
- */
- int incore_ret;
-
- int incore = 0;
- int i;
-
- dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
-
- incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
- if (incore_ret) {
- printf("mincore ret: %d\n", incore_ret);
- perror("mincore");
- mpx_dig_abort();
- }
- for (i = 0; i < sizeof(vec); i++)
- incore += vec[i];
- dprintf4("%s() total incore: %d\n", __func__, incore);
- if (!incore)
- return NULL;
- dprintf3("%s() total incore: %d\n", __func__, incore);
- return dig_bounds_dir_ptr;
-}
-
-int inspect_pid(int pid)
-{
- static int dig_nr;
- long offset_inside_bounds_dir;
- char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
- char *dig_bounds_dir_ptr;
- int total_entries = 0;
- int nr_populated_bdes = 0;
- int inspect_self;
-
- if (getpid() == pid) {
- dprintf4("inspecting self\n");
- inspect_self = 1;
- } else {
- dprintf4("inspecting pid %d\n", pid);
- mpx_dig_abort();
- }
-
- for (offset_inside_bounds_dir = 0;
- offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
- offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
- static int bufs_skipped;
- int this_entries;
-
- if (inspect_self) {
- dig_bounds_dir_ptr =
- fill_bounds_dir_buf_self(offset_inside_bounds_dir,
- sizeof(bounds_dir_buf),
- &bounds_dir_buf[0]);
- } else {
- dig_bounds_dir_ptr =
- fill_bounds_dir_buf_other(offset_inside_bounds_dir,
- sizeof(bounds_dir_buf),
- &bounds_dir_buf[0]);
- }
- if (!dig_bounds_dir_ptr) {
- bufs_skipped++;
- continue;
- }
- this_entries = search_bd_buf(dig_bounds_dir_ptr,
- sizeof(bounds_dir_buf),
- offset_inside_bounds_dir,
- &nr_populated_bdes);
- total_entries += this_entries;
- }
- printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
- total_entries, nr_populated_bdes);
- return total_entries + nr_populated_bdes;
-}
-
-#ifdef MPX_DIG_REMOTE
-int main(int argc, char **argv)
-{
- int err;
- char *c;
- unsigned long bounds_dir_entry;
- int pid;
-
- printf("mpx-dig starting...\n");
- err = sscanf(argv[1], "%d", &pid);
- printf("parsing: '%s', err: %d\n", argv[1], err);
- if (err != 1)
- mpx_dig_abort();
-
- err = sscanf(argv[2], "%lx", &bounds_dir_global);
- printf("parsing: '%s': %d\n", argv[2], err);
- if (err != 1)
- mpx_dig_abort();
-
- proc_pid_mem_fd = open_proc(pid, "mem");
- if (proc_pid_mem_fd < 0)
- mpx_dig_abort();
-
- inspect_pid(pid);
- return 0;
-}
-#endif
-
-long inspect_me(struct mpx_bounds_dir *bounds_dir)
-{
- int pid = getpid();
-
- pid_load_vaddrs(pid);
- bounds_dir_global = (unsigned long)bounds_dir;
- dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
- return inspect_pid(pid);
-}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
deleted file mode 100644
index d1b61ab870f8..000000000000
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_HW_H
-#define _MPX_HW_H
-
-#include <assert.h>
-
-/* Describe the MPX Hardware Layout in here */
-
-#define NR_MPX_BOUNDS_REGISTERS 4
-
-#ifdef __i386__
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
-#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
-#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
-#define MPX_BOUNDS_TABLE_TOP_BIT 11
-#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
-#define MPX_BOUNDS_DIR_TOP_BIT 31
-
-#else
-
-/*
- * Linear Address of "pointer" (LAp)
- * 0 -> 2: ignored
- * 3 -> 19: index in to bounds table
- * 20 -> 47: index in to bounds directory
- * 48 -> 63: ignored
- */
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
-#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
-#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
-#define MPX_BOUNDS_TABLE_TOP_BIT 19
-#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
-#define MPX_BOUNDS_DIR_TOP_BIT 47
-
-#endif
-
-#define MPX_BOUNDS_DIR_NR_ENTRIES \
- (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
-#define MPX_BOUNDS_TABLE_NR_ENTRIES \
- (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
-
-#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
-
-struct mpx_bd_entry {
- union {
- char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
- void *contents[0];
- };
-} __attribute__((packed));
-
-struct mpx_bt_entry {
- union {
- char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
- unsigned long contents[0];
- };
-} __attribute__((packed));
-
-struct mpx_bounds_dir {
- struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
-} __attribute__((packed));
-
-struct mpx_bounds_table {
- struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
-} __attribute__((packed));
-
-static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
-{
- int total_nr_bits = topbit - bottombit;
- unsigned long mask = (1UL << total_nr_bits)-1;
- return (val >> bottombit) & mask;
-}
-
-static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
-{
- return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
- MPX_BOUNDS_TABLE_TOP_BIT);
-}
-
-static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
-{
- return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
- MPX_BOUNDS_DIR_TOP_BIT);
-}
-
-static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
- struct mpx_bounds_dir *bounds_dir)
-{
- unsigned long index = __vaddr_bounds_directory_index(vaddr);
- return &bounds_dir->entries[index];
-}
-
-static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
-{
- unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
- return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
-}
-
-static inline struct mpx_bounds_table *
-__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
-{
- unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
- assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
- __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
- return (struct mpx_bounds_table *)__bd_entry;
-}
-
-static inline struct mpx_bt_entry *
-mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
-{
- struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
- struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
- unsigned long index = __vaddr_bounds_table_index(vaddr);
- return &bt->entries[index];
-}
-
-#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
deleted file mode 100644
index 23ddd453f362..000000000000
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ /dev/null
@@ -1,1613 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
- *
- * Written by:
- * "Ren, Qiaowei" <qiaowei.ren@intel.com>
- * "Wei, Gang" <gang.wei@intel.com>
- * "Hansen, Dave" <dave.hansen@intel.com>
- */
-
-/*
- * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
- * it works on 32-bit.
- */
-
-int inspect_every_this_many_mallocs = 100;
-int zap_all_every_this_many_mallocs = 1000;
-
-#define _GNU_SOURCE
-#define _LARGEFILE64_SOURCE
-
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "mpx-hw.h"
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-
-#ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline)
-#endif
-
-#ifndef TEST_DURATION_SECS
-#define TEST_DURATION_SECS 3
-#endif
-
-void write_int_to(char *prefix, char *file, int int_to_write)
-{
- char buf[100];
- int fd = open(file, O_RDWR);
- int len;
- int ret;
-
- assert(fd >= 0);
- len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
- assert(len >= 0);
- assert(len < sizeof(buf));
- ret = write(fd, buf, len);
- assert(ret == len);
- ret = close(fd);
- assert(!ret);
-}
-
-void write_pid_to(char *prefix, char *file)
-{
- write_int_to(prefix, file, getpid());
-}
-
-void trace_me(void)
-{
-/* tracing events dir */
-#define TED "/sys/kernel/debug/tracing/events/"
-/*
- write_pid_to("common_pid=", TED "signal/filter");
- write_pid_to("common_pid=", TED "exceptions/filter");
- write_int_to("", TED "signal/enable", 1);
- write_int_to("", TED "exceptions/enable", 1);
-*/
- write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
- write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
-}
-
-#define test_failed() __test_failed(__FILE__, __LINE__)
-static void __test_failed(char *f, int l)
-{
- fprintf(stderr, "abort @ %s::%d\n", f, l);
- abort();
-}
-
-/* Error Printf */
-#define eprintf(args...) fprintf(stderr, args)
-
-#ifdef __i386__
-
-/* i386 directory size is 4MB */
-#define REG_IP_IDX REG_EIP
-#define REX_PREFIX
-
-#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "push %%ebx;"
- "cpuid;"
- "mov %%ebx, %1;"
- "pop %%ebx"
- : "=a" (*eax),
- "=g" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-#else /* __i386__ */
-
-#define REG_IP_IDX REG_RIP
-#define REX_PREFIX "0x48, "
-
-#define XSAVE_OFFSET_IN_FPMEM 0
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-#endif /* !__i386__ */
-
-struct xsave_hdr_struct {
- uint64_t xstate_bv;
- uint64_t reserved1[2];
- uint64_t reserved2[5];
-} __attribute__((packed));
-
-struct bndregs_struct {
- uint64_t bndregs[8];
-} __attribute__((packed));
-
-struct bndcsr_struct {
- uint64_t cfg_reg_u;
- uint64_t status_reg;
-} __attribute__((packed));
-
-struct xsave_struct {
- uint8_t fpu_sse[512];
- struct xsave_hdr_struct xsave_hdr;
- uint8_t ymm[256];
- uint8_t lwp[128];
- struct bndregs_struct bndregs;
- struct bndcsr_struct bndcsr;
-} __attribute__((packed));
-
-uint8_t __attribute__((__aligned__(64))) buffer[4096];
-struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
-
-uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
-struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
-
-uint64_t num_bnd_chk;
-
-static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
- unsigned char *fx = _fx;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static inline uint64_t xgetbv(uint32_t index)
-{
- uint32_t eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((uint64_t)edx << 32);
-}
-
-static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
-{
- memset(buffer, 0, sizeof(buffer));
- memcpy(buffer,
- (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
- sizeof(struct xsave_struct));
-
- return xsave_buf->bndcsr.status_reg;
-}
-
-#include <pthread.h>
-
-static uint8_t *get_next_inst_ip(uint8_t *addr)
-{
- uint8_t *ip = addr;
- uint8_t sib;
- uint8_t rm;
- uint8_t mod;
- uint8_t base;
- uint8_t modrm;
-
- /* determine the prefix. */
- switch(*ip) {
- case 0xf2:
- case 0xf3:
- case 0x66:
- ip++;
- break;
- }
-
- /* look for rex prefix */
- if ((*ip & 0x40) == 0x40)
- ip++;
-
- /* Make sure we have a MPX instruction. */
- if (*ip++ != 0x0f)
- return addr;
-
- /* Skip the op code byte. */
- ip++;
-
- /* Get the modrm byte. */
- modrm = *ip++;
-
- /* Break it down into parts. */
- rm = modrm & 7;
- mod = (modrm >> 6);
-
- /* Init the parts of the address mode. */
- base = 8;
-
- /* Is it a mem mode? */
- if (mod != 3) {
- /* look for scaled indexed addressing */
- if (rm == 4) {
- /* SIB addressing */
- sib = *ip++;
- base = sib & 7;
- switch (mod) {
- case 0:
- if (base == 5)
- ip += 4;
- break;
-
- case 1:
- ip++;
- break;
-
- case 2:
- ip += 4;
- break;
- }
-
- } else {
- /* MODRM addressing */
- switch (mod) {
- case 0:
- /* DISP32 addressing, no base */
- if (rm == 5)
- ip += 4;
- break;
-
- case 1:
- ip++;
- break;
-
- case 2:
- ip += 4;
- break;
- }
- }
- }
- return ip;
-}
-
-#ifdef si_lower
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
- return si->si_lower;
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
- return si->si_upper;
-}
-#else
-
-/*
- * This deals with old version of _sigfault in some distros:
- *
-
-old _sigfault:
- struct {
- void *si_addr;
- } _sigfault;
-
-new _sigfault:
- struct {
- void __user *_addr;
- int _trapno;
- short _addr_lsb;
- union {
- struct {
- void __user *_lower;
- void __user *_upper;
- } _addr_bnd;
- __u32 _pkey;
- };
- } _sigfault;
- *
- */
-
-static inline void **__si_bounds_hack(siginfo_t *si)
-{
- void *sigfault = &si->_sifields._sigfault;
- void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
- int *trapno = (int*)end_sigfault;
- /* skip _trapno and _addr_lsb */
- void **__si_lower = (void**)(trapno + 2);
-
- return __si_lower;
-}
-
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
- return *__si_bounds_hack(si);
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
- return *(__si_bounds_hack(si) + 1);
-}
-#endif
-
-static int br_count;
-static int expected_bnd_index = -1;
-uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
-unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-/*
- * The kernel is supposed to provide some information about the bounds
- * exception in the siginfo. It should match what we have in the bounds
- * registers that we are checking against. Just check against the shadow copy
- * since it is easily available, and we also check that *it* matches the real
- * registers.
- */
-void check_siginfo_vs_shadow(siginfo_t* si)
-{
- int siginfo_ok = 1;
- void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
- void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
-
- if ((expected_bnd_index < 0) ||
- (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
- fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
- expected_bnd_index);
- exit(6);
- }
- if (__si_bounds_lower(si) != shadow_lower)
- siginfo_ok = 0;
- if (__si_bounds_upper(si) != shadow_upper)
- siginfo_ok = 0;
-
- if (!siginfo_ok) {
- fprintf(stderr, "ERROR: siginfo bounds do not match "
- "shadow bounds for register %d\n", expected_bnd_index);
- exit(7);
- }
-}
-
-void handler(int signum, siginfo_t *si, void *vucontext)
-{
- int i;
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
-
- dprintf1("entered signal handler\n");
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
-
- if (trapno == 5) {
- typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
- uint64_t status = read_mpx_status_sig(uctxt);
- uint64_t br_reason = status & 0x3;
-
- br_count++;
- dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
-
- dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
- status, ip, br_reason);
- dprintf2("si_signo: %d\n", si->si_signo);
- dprintf2(" signum: %d\n", signum);
- dprintf2("info->si_code == SEGV_BNDERR: %d\n",
- (si->si_code == SEGV_BNDERR));
- dprintf2("info->si_code: %d\n", si->si_code);
- dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
- dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
-
- for (i = 0; i < 8; i++)
- dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
- switch (br_reason) {
- case 0: /* traditional BR */
- fprintf(stderr,
- "Undefined status with bound exception:%jx\n",
- status);
- exit(5);
- case 1: /* #BR MPX bounds exception */
- /* these are normal and we expect to see them */
-
- check_siginfo_vs_shadow(si);
-
- dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
- status, (void *)ip, si->si_addr);
- num_bnd_chk++;
- uctxt->uc_mcontext.gregs[REG_IP_IDX] =
- (greg_t)get_next_inst_ip((uint8_t *)ip);
- break;
- case 2:
- fprintf(stderr, "#BR status == 2, missing bounds table,"
- "kernel should have handled!!\n");
- exit(4);
- break;
- default:
- fprintf(stderr, "bound check error: status 0x%jx at %p\n",
- status, (void *)ip);
- num_bnd_chk++;
- uctxt->uc_mcontext.gregs[REG_IP_IDX] =
- (greg_t)get_next_inst_ip((uint8_t *)ip);
- fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
- exit(3);
- }
- } else if (trapno == 14) {
- eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
- trapno, ip);
- eprintf("si_addr %p\n", si->si_addr);
- eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- test_failed();
- } else {
- eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
- eprintf("si_addr %p\n", si->si_addr);
- eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- test_failed();
- }
-}
-
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-#define XSTATE_CPUID 0x0000000d
-
-/*
- * List of XSAVE features Linux knows about:
- */
-enum xfeature_bit {
- XSTATE_BIT_FP,
- XSTATE_BIT_SSE,
- XSTATE_BIT_YMM,
- XSTATE_BIT_BNDREGS,
- XSTATE_BIT_BNDCSR,
- XSTATE_BIT_OPMASK,
- XSTATE_BIT_ZMM_Hi256,
- XSTATE_BIT_Hi16_ZMM,
-
- XFEATURES_NR_MAX,
-};
-
-#define XSTATE_FP (1 << XSTATE_BIT_FP)
-#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
-
-#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
-
-bool one_bit(unsigned int x, int bit)
-{
- return !!(x & (1<<bit));
-}
-
-void print_state_component(int state_bit_nr, char *name)
-{
- unsigned int eax, ebx, ecx, edx;
- unsigned int state_component_size;
- unsigned int state_component_supervisor;
- unsigned int state_component_user;
- unsigned int state_component_aligned;
-
- /* See SDM Section 13.2 */
- cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
- assert(eax || ebx || ecx);
- state_component_size = eax;
- state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
- state_component_user = !one_bit(ecx, 0);
- state_component_aligned = one_bit(ecx, 1);
- printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
- name,
- state_component_size, state_component_user,
- state_component_supervisor, state_component_aligned);
-
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
-#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
-
-bool check_mpx_support(void)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
-
- /* We can't do much without XSAVE, so just make these assert()'s */
- if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
- fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
- exit(0);
- }
-
- if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
- fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
- exit(0);
- }
-
- /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
- /* Is this redundant with the feature bit checks? */
- cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
- if (eax < XSTATE_CPUID) {
- fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
- " can not run MPX tests\n");
- exit(0);
- }
-
- printf("XSAVE is supported by HW & OS\n");
-
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-
- printf("XSAVE processor supported state mask: 0x%x\n", eax);
- printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
-
- /* Make sure that the MPX states are enabled in in XCR0 */
- if ((eax & MPX_XSTATES) != MPX_XSTATES) {
- fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
- exit(0);
- }
-
- /* Make sure the MPX states are supported by XSAVE* */
- if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
- fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
- "can not run MPX tests\n");
- exit(0);
- }
-
- print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
- print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
-
- return true;
-}
-
-void enable_mpx(void *l1base)
-{
- /* enable point lookup */
- memset(buffer, 0, sizeof(buffer));
- xrstor_state(xsave_buf, 0x18);
-
- xsave_buf->xsave_hdr.xstate_bv = 0x10;
- xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
- xsave_buf->bndcsr.status_reg = 0;
-
- dprintf2("bf xrstor\n");
- dprintf2("xsave cndcsr: status %jx, configu %jx\n",
- xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
- xrstor_state(xsave_buf, 0x18);
- dprintf2("after xrstor\n");
-
- xsave_state_1(xsave_buf, 0x18);
-
- dprintf1("xsave bndcsr: status %jx, configu %jx\n",
- xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
-}
-
-#include <sys/prctl.h>
-
-struct mpx_bounds_dir *bounds_dir_ptr;
-
-unsigned long __bd_incore(const char *func, int line)
-{
- unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
- return ret;
-}
-#define bd_incore() __bd_incore(__func__, __LINE__)
-
-void check_clear(void *ptr, unsigned long sz)
-{
- unsigned long *i;
-
- for (i = ptr; (void *)i < ptr + sz; i++) {
- if (*i) {
- dprintf1("%p is NOT clear at %p\n", ptr, i);
- assert(0);
- }
- }
- dprintf1("%p is clear for %lx\n", ptr, sz);
-}
-
-void check_clear_bd(void)
-{
- check_clear(bounds_dir_ptr, 2UL << 30);
-}
-
-#define USE_MALLOC_FOR_BOUNDS_DIR 1
-bool process_specific_init(void)
-{
- unsigned long size;
- unsigned long *dir;
- /* Guarantee we have the space to align it, add padding: */
- unsigned long pad = getpagesize();
-
- size = 2UL << 30; /* 2GB */
- if (sizeof(unsigned long) == 4)
- size = 4UL << 20; /* 4MB */
- dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
-
- if (USE_MALLOC_FOR_BOUNDS_DIR) {
- unsigned long _dir;
-
- dir = malloc(size + pad);
- assert(dir);
- _dir = (unsigned long)dir;
- _dir += 0xfffUL;
- _dir &= ~0xfffUL;
- dir = (void *)_dir;
- } else {
- /*
- * This makes debugging easier because the address
- * calculations are simpler:
- */
- dir = mmap((void *)0x200000000000, size + pad,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (dir == (void *)-1) {
- perror("unable to allocate bounds directory");
- abort();
- }
- check_clear(dir, size);
- }
- bounds_dir_ptr = (void *)dir;
- madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
- bd_incore();
- dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
- (char *)bounds_dir_ptr + size);
- check_clear(dir, size);
- enable_mpx(dir);
- check_clear(dir, size);
- if (prctl(43, 0, 0, 0, 0)) {
- printf("no MPX support\n");
- abort();
- return false;
- }
- return true;
-}
-
-bool process_specific_finish(void)
-{
- if (prctl(44)) {
- printf("no MPX support\n");
- return false;
- }
- return true;
-}
-
-void setup_handler()
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #BR is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0; /* void(*)(int)*/
- newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- assert(r == 0);
-}
-
-void mpx_prepare(void)
-{
- dprintf2("%s()\n", __func__);
- setup_handler();
- process_specific_init();
-}
-
-void mpx_cleanup(void)
-{
- printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
- process_specific_finish();
-}
-
-/*-------------- the following is test case ---------------*/
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-uint64_t num_lower_brs;
-uint64_t num_upper_brs;
-
-#define MPX_CONFIG_OFFSET 1024
-#define MPX_BOUNDS_OFFSET 960
-#define MPX_HEADER_OFFSET 512
-#define MAX_ADDR_TESTED (1<<28)
-#define TEST_ROUNDS 100
-
-/*
- 0F 1A /r BNDLDX-Load
- 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
- 66 0F 1A /r BNDMOV bnd1, bnd2/m128
- 66 0F 1B /r BNDMOV bnd1/m128, bnd2
- F2 0F 1A /r BNDCU bnd, r/m64
- F2 0F 1B /r BNDCN bnd, r/m64
- F3 0F 1A /r BNDCL bnd, r/m64
- F3 0F 1B /r BNDMK bnd, m64
-*/
-
-static __always_inline void xsave_state(void *_fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
- unsigned char *fx = _fx;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static __always_inline void mpx_clear_bnd0(void)
-{
- long size = 0;
- void *ptr = NULL;
- /* F3 0F 1B /r BNDMK bnd, m64 */
- /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr), "d" (size-1)
- : "memory");
-}
-
-static __always_inline void mpx_make_bound_helper(unsigned long ptr,
- unsigned long size)
-{
- /* F3 0F 1B /r BNDMK bnd, m64 */
- /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr), "d" (size-1)
- : "memory");
-}
-
-static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
-{
- /* F3 0F 1A /r NDCL bnd, r/m64 */
- /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
- : : "c" (ptr)
- : "memory");
-}
-
-static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
-{
- /* F2 0F 1A /r BNDCU bnd, r/m64 */
- /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
- asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
- : : "c" (ptr)
- : "memory");
-}
-
-static __always_inline void mpx_movbndreg_helper()
-{
- /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
- /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
-
- asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
-}
-
-static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
-{
- /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
- /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
- asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
- : : "c" (mem)
- : "memory");
-}
-
-static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
-{
- /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
- /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
- asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
- : : "c" (mem)
- : "memory");
-}
-
-static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
- unsigned long ptr_val)
-{
- /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
- /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
- asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr_addr), "d" (ptr_val)
- : "memory");
-}
-
-static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
- unsigned long ptr_val)
-{
- /* 0F 1A /r BNDLDX-Load */
- /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
- : : "c" (ptr_addr), "d" (ptr_val)
- : "memory");
-}
-
-void __print_context(void *__print_xsave_buffer, int line)
-{
- uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
- uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
-
- int i;
- eprintf("%s()::%d\n", "print_context", line);
- for (i = 0; i < 4; i++) {
- eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
- (unsigned long)bounds[i*2],
- ~(unsigned long)bounds[i*2+1],
- (unsigned long)bounds[i*2+1]);
- }
-
- eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
-}
-#define print_context(x) __print_context(x, __LINE__)
-#ifdef DEBUG
-#define dprint_context(x) print_context(x)
-#else
-#define dprint_context(x) do{}while(0)
-#endif
-
-void init()
-{
- int i;
-
- srand((unsigned int)time(NULL));
-
- for (i = 0; i < 4; i++) {
- shadow_plb[i][0] = 0;
- shadow_plb[i][1] = ~(unsigned long)0;
- }
-}
-
-long int __mpx_random(int line)
-{
-#ifdef NOT_SO_RANDOM
- static long fake = 722122311;
- fake += 563792075;
- return fakse;
-#else
- return random();
-#endif
-}
-#define mpx_random() __mpx_random(__LINE__)
-
-uint8_t *get_random_addr()
-{
- uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
- return (addr - (unsigned long)addr % sizeof(uint8_t *));
-}
-
-static inline bool compare_context(void *__xsave_buffer)
-{
- uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
-
- int i;
- for (i = 0; i < 4; i++) {
- dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
- i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
- i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
- if ((shadow_plb[i][0] != bounds[i*2]) ||
- (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
- eprintf("ERROR comparing shadow to real bound register %d\n", i);
- eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
- (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
- (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
- return false;
- }
- }
-
- return true;
-}
-
-void mkbnd_shadow(uint8_t *ptr, int index, long offset)
-{
- uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
- uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
- *lower = (unsigned long)ptr;
- *upper = (unsigned long)ptr + offset - 1;
-}
-
-void check_lowerbound_shadow(uint8_t *ptr, int index)
-{
- uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
- if (*lower > (uint64_t)(unsigned long)ptr)
- num_lower_brs++;
- else
- dprintf1("LowerBoundChk passed:%p\n", ptr);
-}
-
-void check_upperbound_shadow(uint8_t *ptr, int index)
-{
- uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
- if (upper < (uint64_t)(unsigned long)ptr)
- num_upper_brs++;
- else
- dprintf1("UpperBoundChk passed:%p\n", ptr);
-}
-
-__always_inline void movbndreg_shadow(int src, int dest)
-{
- shadow_plb[dest][0] = shadow_plb[src][0];
- shadow_plb[dest][1] = shadow_plb[src][1];
-}
-
-__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
-{
- unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
- unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
- *dest = *lower;
- *(dest+1) = *upper;
-}
-
-__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
-{
- unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
- unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
- *lower = *src;
- *upper = *(src+1);
-}
-
-__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
- shadow_map[0] = (unsigned long)shadow_plb[index][0];
- shadow_map[1] = (unsigned long)shadow_plb[index][1];
- shadow_map[2] = (unsigned long)ptr_val;
- dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
- index, ptr, ptr_val, ptr_val);
- /*ptr ignored */
-}
-
-void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
- uint64_t lower = shadow_map[0];
- uint64_t upper = shadow_map[1];
- uint8_t *value = (uint8_t *)shadow_map[2];
-
- if (value != ptr_val) {
- dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
- "because %p != %p\n", __func__, index, ptr,
- ptr_val, index, value, ptr_val);
- shadow_plb[index][0] = 0;
- shadow_plb[index][1] = ~(unsigned long)0;
- } else {
- shadow_plb[index][0] = lower;
- shadow_plb[index][1] = upper;
- }
- /* ptr ignored */
-}
-
-static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
-{
- mpx_make_bound_helper((unsigned long)ptr, 0x1800);
-}
-
-static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
-{
- mkbnd_shadow(ptr, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
-{
- /* these are hard-coded to check bnd0 */
- expected_bnd_index = 0;
- mpx_check_lowerbound_helper((unsigned long)(ptr-1));
- mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
- /* reset this since we do not expect any more bounds exceptions */
- expected_bnd_index = -1;
-}
-
-static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
-{
- check_lowerbound_shadow(ptr-1, 0);
- check_upperbound_shadow(ptr+0x1800, 0);
-}
-
-static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
-{
- mpx_make_bound_helper((unsigned long)ptr, 0x1800);
- mpx_movbndreg_helper();
- mpx_movbnd2mem_helper(buf);
- mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
-{
- mkbnd_shadow(ptr, 0, 0x1800);
- movbndreg_shadow(0, 2);
- movbnd2mem_shadow(0, (unsigned long *)buf);
- mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
-{
- mpx_movbnd_from_mem_helper(buf);
-}
-
-static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
-{
- movbnd_from_mem_shadow((unsigned long *)buf, 0);
-}
-
-static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
-{
- mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
- mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
-{
- stdsc_shadow(0, buf, ptr);
- mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
-{
- mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
-}
-
-static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
-{
- lddsc_shadow(0, buf, ptr);
-}
-
-#define NR_MPX_TEST_FUNCTIONS 6
-
-/*
- * For compatibility reasons, MPX will clear the bounds registers
- * when you make function calls (among other things). We have to
- * preserve the registers in between calls to the "helpers" since
- * they build on each other.
- *
- * Be very careful not to make any function calls inside the
- * helpers, or anywhere else beween the xrstor and xsave.
- */
-#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
- xrstor_state(xsave_test_buf, flags); \
- mpx_test_helper##helper_nr(buf, ptr); \
- xsave_state(xsave_test_buf, flags); \
- mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
-} while (0)
-
-static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
-{
- uint64_t flags = 0x18;
-
- dprint_context(xsave_test_buf);
- switch (nr) {
- case 0:
- run_helper(0, buf, buf_shadow, ptr);
- break;
- case 1:
- run_helper(1, buf, buf_shadow, ptr);
- break;
- case 2:
- run_helper(2, buf, buf_shadow, ptr);
- break;
- case 3:
- run_helper(3, buf, buf_shadow, ptr);
- break;
- case 4:
- run_helper(4, buf, buf_shadow, ptr);
- break;
- case 5:
- run_helper(5, buf, buf_shadow, ptr);
- break;
- default:
- test_failed();
- break;
- }
- dprint_context(xsave_test_buf);
-}
-
-unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
-extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
-
-long cover_buf_with_bt_entries(void *buf, long buf_len)
-{
- int i;
- long nr_to_fill;
- int ratio = 1000;
- unsigned long buf_len_in_ptrs;
-
- /* Fill about 1/100 of the space with bt entries */
- nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
-
- if (!nr_to_fill)
- dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
-
- /* Align the buffer to pointer size */
- while (((unsigned long)buf) % sizeof(void *)) {
- buf++;
- buf_len--;
- }
- /* We are storing pointers, so make */
- buf_len_in_ptrs = buf_len / sizeof(void *);
-
- for (i = 0; i < nr_to_fill; i++) {
- long index = (mpx_random() % buf_len_in_ptrs);
- void *ptr = buf + index * sizeof(unsigned long);
- unsigned long ptr_addr = (unsigned long)ptr;
-
- /* ptr and size can be anything */
- mpx_make_bound_helper((unsigned long)ptr, 8);
-
- /*
- * take bnd0 and put it in to bounds tables "buf + index" is an
- * address inside the buffer where we are pretending that we
- * are going to put a pointer We do not, though because we will
- * never load entries from the table, so it doesn't matter.
- */
- mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
- dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
- ptr_addr, buf);
- }
- return nr_to_fill;
-}
-
-unsigned long align_down(unsigned long alignme, unsigned long align_to)
-{
- return alignme & ~(align_to-1);
-}
-
-unsigned long align_up(unsigned long alignme, unsigned long align_to)
-{
- return (alignme + align_to - 1) & ~(align_to-1);
-}
-
-/*
- * Using 1MB alignment guarantees that each no allocation
- * will overlap with another's bounds tables.
- *
- * We have to cook our own allocator here. malloc() can
- * mix other allocation with ours which means that even
- * if we free all of our allocations, there might still
- * be bounds tables for the *areas* since there is other
- * valid memory there.
- *
- * We also can't use malloc() because a free() of an area
- * might not free it back to the kernel. We want it
- * completely unmapped an malloc() does not guarantee
- * that.
- */
-#ifdef __i386__
-long alignment = 4096;
-long sz_alignment = 4096;
-#else
-long alignment = 1 * MB;
-long sz_alignment = 1 * MB;
-#endif
-void *mpx_mini_alloc(unsigned long sz)
-{
- unsigned long long tries = 0;
- static void *last;
- void *ptr;
- void *try_at;
-
- sz = align_up(sz, sz_alignment);
-
- try_at = last + alignment;
- while (1) {
- ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (ptr == (void *)-1)
- return NULL;
- if (ptr == try_at)
- break;
-
- munmap(ptr, sz);
- try_at += alignment;
-#ifdef __i386__
- /*
- * This isn't quite correct for 32-bit binaries
- * on 64-bit kernels since they can use the
- * entire 32-bit address space, but it's close
- * enough.
- */
- if (try_at > (void *)0xC0000000)
-#else
- if (try_at > (void *)0x0000800000000000)
-#endif
- try_at = (void *)0x0;
- if (!(++tries % 10000))
- dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
- continue;
- }
- last = ptr;
- dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
- return ptr;
-}
-void mpx_mini_free(void *ptr, long sz)
-{
- dprintf2("%s() ptr: %p\n", __func__, ptr);
- if ((unsigned long)ptr > 0x100000000000) {
- dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
- test_failed();
- }
- sz = align_up(sz, sz_alignment);
- dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
- munmap(ptr, sz);
- dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
-}
-
-#define NR_MALLOCS 100
-struct one_malloc {
- char *ptr;
- int nr_filled_btes;
- unsigned long size;
-};
-struct one_malloc mallocs[NR_MALLOCS];
-
-void free_one_malloc(int index)
-{
- unsigned long free_ptr;
- unsigned long mask;
-
- if (!mallocs[index].ptr)
- return;
-
- mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
- dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
-
- free_ptr = (unsigned long)mallocs[index].ptr;
- mask = alignment-1;
- dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
- (free_ptr & mask), mask);
- assert((free_ptr & mask) == 0);
-
- mallocs[index].ptr = NULL;
-}
-
-#ifdef __i386__
-#define MPX_BOUNDS_TABLE_COVERS 4096
-#else
-#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
-#endif
-void zap_everything(void)
-{
- long after_zap;
- long before_zap;
- int i;
-
- before_zap = inspect_me(bounds_dir_ptr);
- dprintf1("zapping everything start: %ld\n", before_zap);
- for (i = 0; i < NR_MALLOCS; i++)
- free_one_malloc(i);
-
- after_zap = inspect_me(bounds_dir_ptr);
- dprintf1("zapping everything done: %ld\n", after_zap);
- /*
- * We only guarantee to empty the thing out if our allocations are
- * exactly aligned on the boundaries of a boudns table.
- */
- if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
- (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
- if (after_zap != 0)
- test_failed();
-
- assert(after_zap == 0);
- }
-}
-
-void do_one_malloc(void)
-{
- static int malloc_counter;
- long sz;
- int rand_index = (mpx_random() % NR_MALLOCS);
- void *ptr = mallocs[rand_index].ptr;
-
- dprintf3("%s() enter\n", __func__);
-
- if (ptr) {
- dprintf3("freeing one malloc at index: %d\n", rand_index);
- free_one_malloc(rand_index);
- if (mpx_random() % (NR_MALLOCS*3) == 3) {
- int i;
- dprintf3("zapping some more\n");
- for (i = rand_index; i < NR_MALLOCS; i++)
- free_one_malloc(i);
- }
- if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
- zap_everything();
- }
-
- /* 1->~1M */
- sz = (1 + mpx_random() % 1000) * 1000;
- ptr = mpx_mini_alloc(sz);
- if (!ptr) {
- /*
- * If we are failing allocations, just assume we
- * are out of memory and zap everything.
- */
- dprintf3("zapping everything because out of memory\n");
- zap_everything();
- goto out;
- }
-
- dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
- mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
- mallocs[rand_index].ptr = ptr;
- mallocs[rand_index].size = sz;
-out:
- if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
- inspect_me(bounds_dir_ptr);
-}
-
-void run_timed_test(void (*test_func)(void))
-{
- int done = 0;
- long iteration = 0;
- static time_t last_print;
- time_t now;
- time_t start;
-
- time(&start);
- while (!done) {
- time(&now);
- if ((now - start) > TEST_DURATION_SECS)
- done = 1;
-
- test_func();
- iteration++;
-
- if ((now - last_print > 1) || done) {
- printf("iteration %ld complete, OK so far\n", iteration);
- last_print = now;
- }
- }
-}
-
-void check_bounds_table_frees(void)
-{
- printf("executing unmaptest\n");
- inspect_me(bounds_dir_ptr);
- run_timed_test(&do_one_malloc);
- printf("done with malloc() fun\n");
-}
-
-void insn_test_failed(int test_nr, int test_round, void *buf,
- void *buf_shadow, void *ptr)
-{
- print_context(xsave_test_buf);
- eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
- while (test_nr == 5) {
- struct mpx_bt_entry *bte;
- struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
- struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
-
- printf(" bd: %p\n", bd);
- printf("&bde: %p\n", bde);
- printf("*bde: %lx\n", *(unsigned long *)bde);
- if (!bd_entry_valid(bde))
- break;
-
- bte = mpx_vaddr_to_bt_entry(buf, bd);
- printf(" te: %p\n", bte);
- printf("bte[0]: %lx\n", bte->contents[0]);
- printf("bte[1]: %lx\n", bte->contents[1]);
- printf("bte[2]: %lx\n", bte->contents[2]);
- printf("bte[3]: %lx\n", bte->contents[3]);
- break;
- }
- test_failed();
-}
-
-void check_mpx_insns_and_tables(void)
-{
- int successes = 0;
- int failures = 0;
- int buf_size = (1024*1024);
- unsigned long *buf = malloc(buf_size);
- const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
- int i, j;
-
- memset(buf, 0, buf_size);
- memset(buf_shadow, 0, sizeof(buf_shadow));
-
- for (i = 0; i < TEST_ROUNDS; i++) {
- uint8_t *ptr = get_random_addr() + 8;
-
- for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
- if (0 && j != 5) {
- successes++;
- continue;
- }
- dprintf2("starting test %d round %d\n", j, i);
- dprint_context(xsave_test_buf);
- /*
- * test5 loads an address from the bounds tables.
- * The load will only complete if 'ptr' matches
- * the load and the store, so with random addrs,
- * the odds of this are very small. Make it
- * higher by only moving 'ptr' 1/10 times.
- */
- if (random() % 10 <= 0)
- ptr = get_random_addr() + 8;
- dprintf3("random ptr{%p}\n", ptr);
- dprint_context(xsave_test_buf);
- run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
- dprint_context(xsave_test_buf);
- if (!compare_context(xsave_test_buf)) {
- insn_test_failed(j, i, buf, buf_shadow, ptr);
- failures++;
- goto exit;
- }
- successes++;
- dprint_context(xsave_test_buf);
- dprintf2("finished test %d round %d\n", j, i);
- dprintf3("\n");
- dprint_context(xsave_test_buf);
- }
- }
-
-exit:
- dprintf2("\nabout to free:\n");
- free(buf);
- dprintf1("successes: %d\n", successes);
- dprintf1(" failures: %d\n", failures);
- dprintf1(" tests: %d\n", total_nr_tests);
- dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
- dprintf1(" saw: %d #BRs\n", br_count);
- if (failures) {
- eprintf("ERROR: non-zero number of failures\n");
- exit(20);
- }
- if (successes != total_nr_tests) {
- eprintf("ERROR: succeeded fewer than number of tries (%d != %d)\n",
- successes, total_nr_tests);
- exit(21);
- }
- if (num_upper_brs + num_lower_brs != br_count) {
- eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
- num_upper_brs, num_lower_brs, br_count);
- eprintf("successes: %d\n", successes);
- eprintf(" failures: %d\n", failures);
- eprintf(" tests: %d\n", total_nr_tests);
- eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
- eprintf(" saw: %d #BRs\n", br_count);
- exit(22);
- }
-}
-
-/*
- * This is supposed to SIGSEGV nicely once the kernel
- * can no longer allocate vaddr space.
- */
-void exhaust_vaddr_space(void)
-{
- unsigned long ptr;
- /* Try to make sure there is no room for a bounds table anywhere */
- unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
-#ifdef __i386__
- unsigned long max_vaddr = 0xf7788000UL;
-#else
- unsigned long max_vaddr = 0x800000000000UL;
-#endif
-
- dprintf1("%s() start\n", __func__);
- /* do not start at 0, we aren't allowed to map there */
- for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
- void *ptr_ret;
- int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
-
- if (!ret) {
- dprintf1("madvise() %lx ret: %d\n", ptr, ret);
- continue;
- }
- ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (ptr_ret != (void *)ptr) {
- perror("mmap");
- dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
- break;
- }
- if (!(ptr & 0xffffff))
- dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
- }
- for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
- dprintf2("covering 0x%lx with bounds table entries\n", ptr);
- cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
- }
- dprintf1("%s() end\n", __func__);
- printf("done with vaddr space fun\n");
-}
-
-void mpx_table_test(void)
-{
- printf("starting mpx bounds table test\n");
- run_timed_test(check_mpx_insns_and_tables);
- printf("done with mpx bounds table test\n");
-}
-
-int main(int argc, char **argv)
-{
- int unmaptest = 0;
- int vaddrexhaust = 0;
- int tabletest = 0;
- int i;
-
- check_mpx_support();
- mpx_prepare();
- srandom(11179);
-
- bd_incore();
- init();
- bd_incore();
-
- trace_me();
-
- xsave_state((void *)xsave_test_buf, 0x1f);
- if (!compare_context(xsave_test_buf))
- printf("Init failed\n");
-
- for (i = 1; i < argc; i++) {
- if (!strcmp(argv[i], "unmaptest"))
- unmaptest = 1;
- if (!strcmp(argv[i], "vaddrexhaust"))
- vaddrexhaust = 1;
- if (!strcmp(argv[i], "tabletest"))
- tabletest = 1;
- }
- if (!(unmaptest || vaddrexhaust || tabletest)) {
- unmaptest = 1;
- /* vaddrexhaust = 1; */
- tabletest = 1;
- }
- if (unmaptest)
- check_bounds_table_frees();
- if (tabletest)
- mpx_table_test();
- if (vaddrexhaust)
- exhaust_vaddr_space();
- printf("%s completed successfully\n", argv[0]);
- exit(0);
-}
-
-#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
deleted file mode 100644
index 6dbdd66b8242..000000000000
--- a/tools/testing/selftests/x86/mpx-mm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_MM_H
-#define _MPX_MM_H
-
-#define PAGE_SIZE 4096
-#define MB (1UL<<20)
-
-extern long nr_incore(void *ptr, unsigned long size_bytes);
-
-#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 3e49a7873f3e..57c4f67f16ef 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -451,6 +451,19 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
ctx->uc_mcontext.gregs[REG_CX] = 0;
+#ifdef __i386__
+ /*
+ * Make sure the kernel doesn't inadvertently use DS or ES-relative
+ * accesses in a region where user DS or ES is loaded.
+ *
+ * Skip this for 64-bit builds because long mode doesn't care about
+ * DS and ES and skipping it increases test coverage a little bit,
+ * since 64-bit kernels can still run the 32-bit build.
+ */
+ ctx->uc_mcontext.gregs[REG_DS] = 0;
+ ctx->uc_mcontext.gregs[REG_ES] = 0;
+#endif
+
memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50ce6c3dd904..1063328e275c 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -43,7 +43,19 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction");
}
-static volatile sig_atomic_t sig_traps;
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps, sig_eflags;
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -90,6 +102,25 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
}
}
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
+
+ sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
+ siglongjmp(jmpbuf, 1);
+}
+
static void check_result(void)
{
unsigned long new_eflags = get_eflags();
@@ -109,6 +140,22 @@ static void check_result(void)
sig_traps = 0;
}
+static void fast_syscall_no_tf(void)
+{
+ sig_traps = 0;
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+}
+
int main()
{
#ifdef CAN_BUILD_32
@@ -163,17 +210,46 @@ int main()
check_result();
/* Now make sure that another fast syscall doesn't set TF again. */
- printf("[RUN]\tFast syscall with TF cleared\n");
- fflush(stdout); /* Force a syscall */
- if (get_eflags() & X86_EFLAGS_TF) {
- printf("[FAIL]\tTF is now set\n");
- exit(1);
+ fast_syscall_no_tf();
+
+ /*
+ * And do a forced SYSENTER to make sure that this works even if
+ * fast syscalls don't use SYSENTER.
+ *
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ unsigned long nr = SYS_getpid;
+ printf("[RUN]\tSet TF and check SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, print_and_longjmp,
+ SA_RESETHAND | SA_ONSTACK);
+ sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ /* Clear EBP first to make sure we segfault cleanly. */
+ asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
}
- if (sig_traps) {
- printf("[FAIL]\tGot SIGTRAP\n");
+ clearhandler(SIGSEGV);
+ clearhandler(SIGILL);
+ if (!(sig_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
exit(1);
}
- printf("[OK]\tNothing unexpected happened\n");
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ fast_syscall_no_tf();
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
new file mode 100644
index 000000000000..d6b09cb1aa2c
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2018 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <syscall.h>
+
+static int nerrs;
+
+#define X32_BIT 0x40000000UL
+
+static void check_enosys(unsigned long nr, bool *ok)
+{
+ /* If this fails, a segfault is reasonably likely. */
+ fflush(stdout);
+
+ long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
+ if (ret == 0) {
+ printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
+ *ok = false;
+ } else if (errno != ENOSYS) {
+ printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
+ *ok = false;
+ }
+}
+
+static void test_x32_without_x32_bit(void)
+{
+ bool ok = true;
+
+ /*
+ * Syscalls 512-547 are "x32" syscalls. They are intended to be
+ * called with the x32 (0x40000000) bit set. Calling them without
+ * the x32 bit set is nonsense and should not work.
+ */
+ printf("[RUN]\tChecking syscalls 512-547\n");
+ for (int i = 512; i <= 547; i++)
+ check_enosys(i, &ok);
+
+ /*
+ * Check that a handful of 64-bit-only syscalls are rejected if the x32
+ * bit is set.
+ */
+ printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
+ check_enosys(16 | X32_BIT, &ok); /* ioctl */
+ check_enosys(19 | X32_BIT, &ok); /* readv */
+ check_enosys(20 | X32_BIT, &ok); /* writev */
+
+ /*
+ * Check some syscalls with high bits set.
+ */
+ printf("[RUN]\tChecking numbers above 2^32-1\n");
+ check_enosys((1UL << 32), &ok);
+ check_enosys(X32_BIT | (1UL << 32), &ok);
+
+ if (!ok)
+ nerrs++;
+ else
+ printf("[OK]\tThey all returned -ENOSYS\n");
+}
+
+int main()
+{
+ /*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them.
+ */
+ printf("\tChecking for x32...");
+ fflush(stdout);
+ if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
+ printf(" supported\n");
+ } else if (errno == ENOSYS) {
+ printf(" not supported\n");
+ } else {
+ printf(" confused\n");
+ }
+
+ test_x32_without_x32_bit();
+
+ return nerrs ? 1 : 0;
+}
OpenPOWER on IntegriCloud