From 9116e5e2b1fff71dce501d971e86a3695acc3dba Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 14 Aug 2019 09:27:16 +0200 Subject: xsk: replace ndo_xsk_async_xmit with ndo_xsk_wakeup This commit replaces ndo_xsk_async_xmit with ndo_xsk_wakeup. This new ndo provides the same functionality as before but with the addition of a new flags field that is used to specifiy if Rx, Tx or both should be woken up. The previous ndo only woke up Tx, as implied by the name. The i40e and ixgbe drivers (which are all the supported ones) are updated with this new interface. This new ndo will be used by the new need_wakeup functionality of XDP sockets that need to be able to wake up both Rx and Tx driver processing. Signed-off-by: Magnus Karlsson Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55ac223553f8..0ef0570386a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,6 +901,10 @@ struct netdev_bpf { }; }; +/* Flags for ndo_xsk_wakeup. */ +#define XDP_WAKEUP_RX (1 << 0) +#define XDP_WAKEUP_TX (1 << 1) + #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { int (*xdo_dev_state_add) (struct xfrm_state *x); @@ -1227,6 +1231,12 @@ struct tlsdev_ops; * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); + * This function is used to wake up the softirq, ksoftirqd or kthread + * responsible for sending and/or receiving packets on a specific + * queue id bound to an AF_XDP socket. The flags field specifies if + * only RX, only Tx, or both should be woken up using the flags + * XDP_WAKEUP_RX and XDP_WAKEUP_TX. * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, @@ -1426,8 +1436,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); - int (*ndo_xsk_async_xmit)(struct net_device *dev, - u32 queue_id); + int (*ndo_xsk_wakeup)(struct net_device *dev, + u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); }; -- cgit v1.2.3 From b0e4701ce15d0381cdea0643c7f0a35dc529cec2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 14 Aug 2019 10:37:48 -0700 Subject: bpf: export bpf_map_inc_not_zero Rename existing bpf_map_inc_not_zero to __bpf_map_inc_not_zero to indicate that it's caller's responsibility to do proper locking. Create and export bpf_map_inc_not_zero wrapper that properly locks map_idr_lock. Will be used in the next commit to hold a map while cloning a socket. Cc: Martin KaFai Lau Cc: Yonghong Song Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f9a506147c8a..15ae49862b82 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -647,6 +647,8 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, + bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5d141f16f6fa..cf8052b016e7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -683,8 +683,8 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) } /* map_idr_lock should have been held */ -static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, - bool uref) +static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, + bool uref) { int refold; @@ -704,6 +704,16 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, return map; } +struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +{ + spin_lock_bh(&map_idr_lock); + map = __bpf_map_inc_not_zero(map, uref); + spin_unlock_bh(&map_idr_lock); + + return map; +} +EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); + int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -2177,7 +2187,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) spin_lock_bh(&map_idr_lock); map = idr_find(&map_idr, id); if (map) - map = bpf_map_inc_not_zero(map, true); + map = __bpf_map_inc_not_zero(map, true); else map = ERR_PTR(-ENOENT); spin_unlock_bh(&map_idr_lock); -- cgit v1.2.3 From 69ecfdaa534943efd95ee12b53fbb0f344e42e7e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Aug 2019 01:10:35 +0900 Subject: bpf: add include guard to tnum.h Add a header include guard just in case. Signed-off-by: Masahiro Yamada Signed-off-by: Daniel Borkmann --- include/linux/tnum.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index c7dc2b5902c0..c17af77f3fae 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -5,6 +5,10 @@ * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ + +#ifndef _LINUX_TNUM_H +#define _LINUX_TNUM_H + #include struct tnum { @@ -81,3 +85,5 @@ bool tnum_in(struct tnum a, struct tnum b); int tnum_strn(char *str, size_t size, struct tnum a); /* Format a tnum as tristate binary expansion */ int tnum_sbin(char *str, size_t size, struct tnum a); + +#endif /* _LINUX_TNUM_H */ -- cgit v1.2.3 From 1b9ed84ecf268904d89edf2908426a8eb3b5a4ba Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 20 Aug 2019 10:31:50 +0100 Subject: bpf: add new BPF_BTF_GET_NEXT_ID syscall command Add a new command for the bpf() system call: BPF_BTF_GET_NEXT_ID is used to cycle through all BTF objects loaded on the system. The motivation is to be able to inspect (list) all BTF objects presents on the system. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 1 + kernel/bpf/btf.c | 4 ++-- kernel/bpf/syscall.c | 4 ++++ 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 15ae49862b82..5b9d22338606 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,9 @@ struct seq_file; struct btf; struct btf_type; +extern struct idr btf_idr; +extern spinlock_t btf_idr_lock; + /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0ef594ac3899..8aa6126f0b6e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -106,6 +106,7 @@ enum bpf_cmd { BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, }; enum bpf_map_type { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6b403dc18486..adb3adcebe3c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -195,8 +195,8 @@ i < btf_type_vlen(struct_type); \ i++, member++) -static DEFINE_IDR(btf_idr); -static DEFINE_SPINLOCK(btf_idr_lock); +DEFINE_IDR(btf_idr); +DEFINE_SPINLOCK(btf_idr_lock); struct btf { void *data; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf8052b016e7..c0f62fd67c6b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2884,6 +2884,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock); break; + case BPF_BTF_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &btf_idr, &btf_idr_lock); + break; case BPF_PROG_GET_FD_BY_ID: err = bpf_prog_get_fd_by_id(&attr); break; -- cgit v1.2.3 From 10d274e880eb208ec6a76261a9f8f8155020f771 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 22 Aug 2019 22:52:12 -0700 Subject: bpf: introduce verifier internal test flag Introduce BPF_F_TEST_STATE_FREQ flag to stress test parentage chain and state pruning. Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + include/uapi/linux/bpf.h | 3 +++ kernel/bpf/syscall.c | 1 + kernel/bpf/verifier.c | 5 ++++- 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5fe99f322b1c..26a6d58ca78c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -355,6 +355,7 @@ struct bpf_verifier_env { struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ + bool test_state_freq; /* test verifier with different pruning frequency */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b5889257cc33..5d2fb183ee2d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -285,6 +285,9 @@ enum bpf_attach_type { */ #define BPF_F_TEST_RND_HI32 (1U << 2) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c0f62fd67c6b..ca60eafa6922 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1629,6 +1629,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | + BPF_F_TEST_STATE_FREQ | BPF_F_TEST_RND_HI32)) return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 10c0ff93f52b..f340cfe53c6e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7222,7 +7222,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - bool add_new_state = false; + bool add_new_state = env->test_state_freq ? true : false; cur->last_insn_idx = env->prev_insn_idx; if (!env->insn_aux_data[insn_idx].prune_point) @@ -9262,6 +9262,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->allow_ptr_leaks = is_priv; + if (is_priv) + env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; -- cgit v1.2.3