diff options
Diffstat (limited to 'include/net')
131 files changed, 3244 insertions, 800 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index c61a1bf4e3de..71347a90a9d1 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -15,6 +15,7 @@ struct tcf_idrinfo { struct mutex lock; struct idr action_idr; + struct net *net; }; struct tc_action_ops; @@ -22,7 +23,6 @@ struct tc_action_ops; struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; struct tcf_idrinfo *idrinfo; u32 tcfa_index; @@ -40,6 +40,7 @@ struct tc_action { struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; + u32 tcfa_flags; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -77,6 +78,8 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 +typedef void (*tc_action_priv_destructor)(void *priv); + struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; @@ -91,15 +94,18 @@ struct tc_action_ops { int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, - struct netlink_ext_ack *extack); + u32 flags, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *, struct netlink_ext_ack *); void (*stats_update)(struct tc_action *, u64, u32, u64, bool); size_t (*get_fill_size)(const struct tc_action *act); - struct net_device *(*get_dev)(const struct tc_action *a); - void (*put_dev)(struct net_device *dev); + struct net_device *(*get_dev)(const struct tc_action *a, + tc_action_priv_destructor *destructor); + struct psample_group * + (*get_psample_group)(const struct tc_action *a, + tc_action_priv_destructor *destructor); }; struct tc_action_net { @@ -108,7 +114,7 @@ struct tc_action_net { }; static inline -int tc_action_net_init(struct tc_action_net *tn, +int tc_action_net_init(struct net *net, struct tc_action_net *tn, const struct tc_action_ops *ops) { int err = 0; @@ -117,6 +123,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; + tn->idrinfo->net = net; mutex_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; @@ -147,7 +154,11 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, - int bind, bool cpustats); + int bind, bool cpustats, u32 flags); +int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, + struct nlattr *est, struct tc_action **a, + const struct tc_action_ops *ops, int bind, + u32 flags); void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); @@ -179,6 +190,43 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); + +static inline void tcf_action_update_bstats(struct tc_action *a, + struct sk_buff *skb) +{ + if (likely(a->cpu_bstats)) { + bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); + return; + } + spin_lock(&a->tcfa_lock); + bstats_update(&a->tcfa_bstats, skb); + spin_unlock(&a->tcfa_lock); +} + +static inline void tcf_action_inc_drop_qstats(struct tc_action *a) +{ + if (likely(a->cpu_qstats)) { + qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); + return; + } + spin_lock(&a->tcfa_lock); + qstats_drop_inc(&a->tcfa_qstats); + spin_unlock(&a->tcfa_lock); +} + +static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) +{ + if (likely(a->cpu_qstats)) { + qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats)); + return; + } + spin_lock(&a->tcfa_lock); + qstats_overlimit_inc(&a->tcfa_qstats); + spin_unlock(&a->tcfa_lock); +} + +void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets, + bool drop, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index becdad576859..a088349dd94f 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -202,11 +202,11 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ -static inline int ipv6_mc_may_pull(struct sk_buff *skb, - unsigned int len) +static inline bool ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) - return -EINVAL; + return false; return pskb_may_pull(skb, len); } @@ -437,7 +437,7 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | @@ -449,7 +449,7 @@ static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | @@ -466,7 +466,7 @@ static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | ((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) & cpu_to_be64(0xffffffffff000000UL))) == 0UL; @@ -481,7 +481,7 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(0x6a))) == 0UL; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 3426d6dacc45..17e10fba2152 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -41,6 +41,10 @@ struct unix_skb_parms { u32 consumed; } __randomize_layout; +struct scm_stat { + u32 nr_fds; +}; + #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) @@ -65,6 +69,7 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; wait_queue_entry_t peer_wake; + struct scm_stat scm_stat; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 80ea0f93d3f7..b1c717286993 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,7 +10,7 @@ #include <linux/kernel.h> #include <linux/workqueue.h> -#include <linux/vm_sockets.h> +#include <uapi/linux/vm_sockets.h> #include "vsock_addr.h" @@ -27,6 +27,7 @@ extern spinlock_t vsock_table_lock; struct vsock_sock { /* sk must be the first member. */ struct sock sk; + const struct vsock_transport *transport; struct sockaddr_vm local_addr; struct sockaddr_vm remote_addr; /* Links for the global tables of bound and connected sockets. */ @@ -64,16 +65,18 @@ struct vsock_sock { bool sent_request; bool ignore_connecting_rst; + /* Protected by lock_sock(sk) */ + u64 buffer_size; + u64 buffer_min_size; + u64 buffer_max_size; + /* Private to transport. */ void *trans; }; s64 vsock_stream_has_data(struct vsock_sock *vsk); s64 vsock_stream_has_space(struct vsock_sock *vsk); -struct sock *__vsock_create(struct net *net, - struct socket *sock, - struct sock *parent, - gfp_t priority, unsigned short type, int kern); +struct sock *vsock_create_connected(struct sock *parent); /**** TRANSPORT ****/ @@ -88,7 +91,19 @@ struct vsock_transport_send_notify_data { u64 data2; /* Transport-defined. */ }; +/* Transport features flags */ +/* Transport provides host->guest communication */ +#define VSOCK_TRANSPORT_F_H2G 0x00000001 +/* Transport provides guest->host communication */ +#define VSOCK_TRANSPORT_F_G2H 0x00000002 +/* Transport provides DGRAM communication */ +#define VSOCK_TRANSPORT_F_DGRAM 0x00000004 +/* Transport provides local (loopback) communication */ +#define VSOCK_TRANSPORT_F_LOCAL 0x00000008 + struct vsock_transport { + struct module *module; + /* Initialize/tear-down socket. */ int (*init)(struct vsock_sock *, struct vsock_sock *); void (*destruct)(struct vsock_sock *); @@ -139,33 +154,23 @@ struct vsock_transport { struct vsock_transport_send_notify_data *); int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, struct vsock_transport_send_notify_data *); + /* sk_lock held by the caller */ + void (*notify_buffer_size)(struct vsock_sock *, u64 *); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); - /* Buffer sizes. */ - void (*set_buffer_size)(struct vsock_sock *, u64); - void (*set_min_buffer_size)(struct vsock_sock *, u64); - void (*set_max_buffer_size)(struct vsock_sock *, u64); - u64 (*get_buffer_size)(struct vsock_sock *); - u64 (*get_min_buffer_size)(struct vsock_sock *); - u64 (*get_max_buffer_size)(struct vsock_sock *); - /* Addressing. */ u32 (*get_local_cid)(void); }; /**** CORE ****/ -int __vsock_core_init(const struct vsock_transport *t, struct module *owner); -static inline int vsock_core_init(const struct vsock_transport *t) -{ - return __vsock_core_init(t, THIS_MODULE); -} -void vsock_core_exit(void); +int vsock_core_register(const struct vsock_transport *t, int features); +void vsock_core_unregister(const struct vsock_transport *t); /* The transport may downcast this to access transport-specific functions */ -const struct vsock_transport *vsock_core_get_transport(void); +const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk); /**** UTILS ****/ @@ -193,6 +198,8 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); +bool vsock_find_cid(unsigned int cid); /**** TAP ****/ diff --git a/include/net/arp.h b/include/net/arp.h index c8f580a0e6b1..4950191f6b2b 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -57,8 +57,8 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index fabee6db0abb..e42bb8e03c09 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -129,6 +129,8 @@ void bt_warn(const char *fmt, ...); __printf(1, 2) void bt_err(const char *fmt, ...); __printf(1, 2) +void bt_warn_ratelimited(const char *fmt, ...); +__printf(1, 2) void bt_err_ratelimited(const char *fmt, ...); #define BT_INFO(fmt, ...) bt_info(fmt "\n", ##__VA_ARGS__) @@ -136,8 +138,6 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_ERR(fmt, ...) bt_err(fmt "\n", ##__VA_ARGS__) #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) -#define BT_ERR_RATELIMITED(fmt, ...) bt_err_ratelimited(fmt "\n", ##__VA_ARGS__) - #define bt_dev_info(hdev, fmt, ...) \ BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ @@ -147,8 +147,10 @@ void bt_err_ratelimited(const char *fmt, ...); #define bt_dev_dbg(hdev, fmt, ...) \ BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) +#define bt_dev_warn_ratelimited(hdev, fmt, ...) \ + bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - BT_ERR_RATELIMITED("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) /* Connection and socket states */ enum { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5bc1e30dedde..6293bdd7d862 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -27,6 +27,7 @@ #define HCI_MAX_ACL_SIZE 1024 #define HCI_MAX_SCO_SIZE 255 +#define HCI_MAX_ISO_SIZE 251 #define HCI_MAX_EVENT_SIZE 260 #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) @@ -303,6 +304,7 @@ enum { #define HCI_ACLDATA_PKT 0x02 #define HCI_SCODATA_PKT 0x03 #define HCI_EVENT_PKT 0x04 +#define HCI_ISODATA_PKT 0x05 #define HCI_DIAG_PKT 0xf0 #define HCI_VENDOR_PKT 0xff @@ -352,6 +354,15 @@ enum { #define ACL_ACTIVE_BCAST 0x04 #define ACL_PICO_BCAST 0x08 +/* ISO PB flags */ +#define ISO_START 0x00 +#define ISO_CONT 0x01 +#define ISO_SINGLE 0x02 +#define ISO_END 0x03 + +/* ISO TS flags */ +#define ISO_TS 0x01 + /* Baseband links */ #define SCO_LINK 0x00 #define ACL_LINK 0x01 @@ -359,6 +370,7 @@ enum { /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 #define AMP_LINK 0x81 +#define ISO_LINK 0x82 #define INVALID_LINK 0xff /* LMP features */ @@ -440,6 +452,8 @@ enum { #define HCI_LE_PHY_2M 0x01 #define HCI_LE_PHY_CODED 0x08 #define HCI_LE_CHAN_SEL_ALG2 0x40 +#define HCI_LE_CIS_MASTER 0x10 +#define HCI_LE_CIS_SLAVE 0x20 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1718,6 +1732,86 @@ struct hci_cp_le_set_adv_set_rand_addr { bdaddr_t bdaddr; } __packed; +#define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 +struct hci_rp_le_read_buffer_size_v2 { + __u8 status; + __le16 acl_mtu; + __u8 acl_max_pkt; + __le16 iso_mtu; + __u8 iso_max_pkt; +} __packed; + +#define HCI_OP_LE_READ_ISO_TX_SYNC 0x2061 +struct hci_cp_le_read_iso_tx_sync { + __le16 handle; +} __packed; + +struct hci_rp_le_read_iso_tx_sync { + __u8 status; + __le16 handle; + __le16 seq; + __le32 imestamp; + __u8 offset[3]; +} __packed; + +#define HCI_OP_LE_SET_CIG_PARAMS 0x2062 +struct hci_cis_params { + __u8 cis_id; + __le16 m_sdu; + __le16 s_sdu; + __u8 m_phy; + __u8 s_phy; + __u8 m_rtn; + __u8 s_rtn; +} __packed; + +struct hci_cp_le_set_cig_params { + __u8 cig_id; + __u8 m_interval[3]; + __u8 s_interval[3]; + __u8 sca; + __u8 packing; + __u8 framing; + __le16 m_latency; + __le16 s_latency; + __u8 num_cis; + struct hci_cis_params cis[0]; +} __packed; + +struct hci_rp_le_set_cig_params { + __u8 status; + __u8 cig_id; + __u8 num_handles; + __le16 handle[0]; +} __packed; + +#define HCI_OP_LE_CREATE_CIS 0x2064 +struct hci_cis { + __le16 cis_handle; + __le16 acl_handle; +} __packed; + +struct hci_cp_le_create_cis { + __u8 num_cis; + struct hci_cis cis[0]; +} __packed; + +#define HCI_OP_LE_REMOVE_CIG 0x2065 +struct hci_cp_le_remove_cig { + __u8 cig_id; +} __packed; + +#define HCI_OP_LE_ACCEPT_CIS 0x2066 +struct hci_cp_le_accept_cis { + __le16 handle; +} __packed; + +#define HCI_OP_LE_REJECT_CIS 0x2067 +struct hci_cp_le_reject_cis { + __le16 handle; + __u8 reason; +} __packed; + /* ---- HCI Events ---- */ #define HCI_EV_INQUIRY_COMPLETE 0x01 @@ -2186,6 +2280,14 @@ struct hci_ev_le_direct_adv_info { __s8 rssi; } __packed; +#define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c +struct hci_ev_le_phy_update_complete { + __u8 status; + __le16 handle; + __u8 tx_phy; + __u8 rx_phy; +} __packed; + #define HCI_EV_LE_EXT_ADV_REPORT 0x0d struct hci_ev_le_ext_adv_report { __le16 evt_type; @@ -2226,6 +2328,34 @@ struct hci_evt_le_ext_adv_set_term { __u8 num_evts; } __packed; +#define HCI_EVT_LE_CIS_ESTABLISHED 0x19 +struct hci_evt_le_cis_established { + __u8 status; + __le16 handle; + __u8 cig_sync_delay[3]; + __u8 cis_sync_delay[3]; + __u8 m_latency[3]; + __u8 s_latency[3]; + __u8 m_phy; + __u8 s_phy; + __u8 nse; + __u8 m_bn; + __u8 s_bn; + __u8 m_ft; + __u8 s_ft; + __le16 m_mtu; + __le16 s_mtu; + __le16 interval; +} __packed; + +#define HCI_EVT_LE_CIS_REQ 0x1a +struct hci_evt_le_cis_req { + __le16 acl_handle; + __le16 cis_handle; + __u8 cig_id; + __u8 cis_id; +} __packed; + #define HCI_EV_VENDOR 0xff /* Internal events generated by Bluetooth stack */ @@ -2254,6 +2384,7 @@ struct hci_ev_si_security { #define HCI_EVENT_HDR_SIZE 2 #define HCI_ACL_HDR_SIZE 4 #define HCI_SCO_HDR_SIZE 3 +#define HCI_ISO_HDR_SIZE 4 struct hci_command_hdr { __le16 opcode; /* OCF & OGF */ @@ -2275,6 +2406,30 @@ struct hci_sco_hdr { __u8 dlen; } __packed; +struct hci_iso_hdr { + __le16 handle; + __le16 dlen; + __u8 data[0]; +} __packed; + +/* ISO data packet status flags */ +#define HCI_ISO_STATUS_VALID 0x00 +#define HCI_ISO_STATUS_INVALID 0x01 +#define HCI_ISO_STATUS_NOP 0x02 + +#define HCI_ISO_DATA_HDR_SIZE 4 +struct hci_iso_data_hdr { + __le16 sn; + __le16 slen; +}; + +#define HCI_ISO_TS_DATA_HDR_SIZE 8 +struct hci_iso_ts_data_hdr { + __le32 ts; + __le16 sn; + __le16 slen; +}; + static inline struct hci_event_hdr *hci_event_hdr(const struct sk_buff *skb) { return (struct hci_event_hdr *) skb->data; @@ -2300,4 +2455,14 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) #define hci_handle(h) (h & 0x0fff) #define hci_flags(h) (h >> 12) +/* ISO handle and flags pack/unpack */ +#define hci_iso_flags_pb(f) (f & 0x0003) +#define hci_iso_flags_ts(f) ((f >> 2) & 0x0001) +#define hci_iso_flags_pack(pb, ts) ((pb & 0x03) | ((ts & 0x01) << 2)) + +/* ISO data length and flags pack/unpack */ +#define hci_iso_data_len_pack(h, f) ((__u16) ((h) | ((f) << 14))) +#define hci_iso_data_len(h) ((h) & 0x3fff) +#define hci_iso_data_flags(h) ((h) >> 14) + #endif /* __HCI_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ffc95b382eb5..89ecf0a80aa1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -118,6 +118,13 @@ struct bt_uuid { u8 svc_hint; }; +struct blocked_key { + struct list_head list; + struct rcu_head rcu; + u8 type; + u8 val[16]; +}; + struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; @@ -397,6 +404,7 @@ struct hci_dev { struct list_head le_conn_params; struct list_head pend_le_conns; struct list_head pend_le_reports; + struct list_head blocked_keys; struct hci_dev_stats stat; @@ -493,6 +501,8 @@ struct hci_conn { __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; + __u8 le_tx_phy; + __u8 le_rx_phy; __s8 rssi; __s8 tx_power; __s8 max_tx_power; @@ -1121,6 +1131,8 @@ struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 val[16], bdaddr_t *rpa); void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type); +bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]); +void hci_blocked_keys_clear(struct hci_dev *hdev); void hci_smp_irks_clear(struct hci_dev *hdev); bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); @@ -1517,6 +1529,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +#define DISCOV_LE_FAST_ADV_INT_MIN 100 /* msec */ +#define DISCOV_LE_FAST_ADV_INT_MAX 150 /* msec */ void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 240786b04a46..2d5fcda1bcd0 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -49,6 +49,8 @@ struct hci_mon_hdr { #define HCI_MON_CTRL_CLOSE 15 #define HCI_MON_CTRL_COMMAND 16 #define HCI_MON_CTRL_EVENT 17 +#define HCI_MON_ISO_TX_PKT 18 +#define HCI_MON_ISO_RX_PKT 19 struct hci_mon_new_index { __u8 type; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 9cee7ddc6741..a90666af05bd 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -654,6 +654,23 @@ struct mgmt_cp_set_phy_confguration { } __packed; #define MGMT_SET_PHY_CONFIGURATION_SIZE 4 +#define MGMT_OP_SET_BLOCKED_KEYS 0x0046 + +#define HCI_BLOCKED_KEY_TYPE_LINKKEY 0x00 +#define HCI_BLOCKED_KEY_TYPE_LTK 0x01 +#define HCI_BLOCKED_KEY_TYPE_IRK 0x02 + +struct mgmt_blocked_key_info { + __u8 type; + __u8 val[16]; +} __packed; + +struct mgmt_cp_set_blocked_keys { + __le16 key_count; + struct mgmt_blocked_key_info keys[0]; +} __packed; +#define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/include/net/bonding.h b/include/net/bonding.h index f7fe45689142..3d56b026bb9e 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -159,7 +159,6 @@ struct slave { unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ - s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ @@ -203,7 +202,6 @@ struct bonding { struct slave __rcu *primary_slave; struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ bool force_primary; - u32 nest_level; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); @@ -239,6 +237,7 @@ struct bonding { struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; + struct lock_class_key stats_lock_key; }; #define bond_slave_get_rcu(dev) \ @@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state) static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == slave->link_new_state) + if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index b9dcb02e756b..8e4f831d2e52 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +#ifdef CONFIG_BPF_SYSCALL +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk); +#else +static inline int bpf_sk_storage_clone(const struct sock *sk, + struct sock *newsk) +{ + return 0; +} +#endif + #endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 127a5c4e3699..86e028388bad 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - sk->sk_napi_id = skb->napi_id; + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); } @@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - if (!sk->sk_napi_id) - sk->sk_napi_id = skb->napi_id; + if (!READ_ONCE(sk->sk_napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 26e2ad2c7027..f22bd6c838a3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -247,6 +247,19 @@ struct ieee80211_rate { }; /** + * struct ieee80211_he_obss_pd - AP settings for spatial reuse + * + * @enable: is the feature enabled. + * @min_offset: minimal tx power offset an associated station shall use + * @max_offset: maximum tx power offset an associated station shall use + */ +struct ieee80211_he_obss_pd { + bool enable; + u8 min_offset; + u8 max_offset; +}; + +/** * struct ieee80211_sta_ht_cap - STA's HT capabilities * * This structure describes most essential parameters needed @@ -318,6 +331,60 @@ struct ieee80211_sband_iftype_data { }; /** + * enum ieee80211_edmg_bw_config - allowed channel bandwidth configurations + * + * @IEEE80211_EDMG_BW_CONFIG_4: 2.16GHz + * @IEEE80211_EDMG_BW_CONFIG_5: 2.16GHz and 4.32GHz + * @IEEE80211_EDMG_BW_CONFIG_6: 2.16GHz, 4.32GHz and 6.48GHz + * @IEEE80211_EDMG_BW_CONFIG_7: 2.16GHz, 4.32GHz, 6.48GHz and 8.64GHz + * @IEEE80211_EDMG_BW_CONFIG_8: 2.16GHz and 2.16GHz + 2.16GHz + * @IEEE80211_EDMG_BW_CONFIG_9: 2.16GHz, 4.32GHz and 2.16GHz + 2.16GHz + * @IEEE80211_EDMG_BW_CONFIG_10: 2.16GHz, 4.32GHz, 6.48GHz and 2.16GHz+2.16GHz + * @IEEE80211_EDMG_BW_CONFIG_11: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz and + * 2.16GHz+2.16GHz + * @IEEE80211_EDMG_BW_CONFIG_12: 2.16GHz, 2.16GHz + 2.16GHz and + * 4.32GHz + 4.32GHz + * @IEEE80211_EDMG_BW_CONFIG_13: 2.16GHz, 4.32GHz, 2.16GHz + 2.16GHz and + * 4.32GHz + 4.32GHz + * @IEEE80211_EDMG_BW_CONFIG_14: 2.16GHz, 4.32GHz, 6.48GHz, 2.16GHz + 2.16GHz + * and 4.32GHz + 4.32GHz + * @IEEE80211_EDMG_BW_CONFIG_15: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz, + * 2.16GHz + 2.16GHz and 4.32GHz + 4.32GHz + */ +enum ieee80211_edmg_bw_config { + IEEE80211_EDMG_BW_CONFIG_4 = 4, + IEEE80211_EDMG_BW_CONFIG_5 = 5, + IEEE80211_EDMG_BW_CONFIG_6 = 6, + IEEE80211_EDMG_BW_CONFIG_7 = 7, + IEEE80211_EDMG_BW_CONFIG_8 = 8, + IEEE80211_EDMG_BW_CONFIG_9 = 9, + IEEE80211_EDMG_BW_CONFIG_10 = 10, + IEEE80211_EDMG_BW_CONFIG_11 = 11, + IEEE80211_EDMG_BW_CONFIG_12 = 12, + IEEE80211_EDMG_BW_CONFIG_13 = 13, + IEEE80211_EDMG_BW_CONFIG_14 = 14, + IEEE80211_EDMG_BW_CONFIG_15 = 15, +}; + +/** + * struct ieee80211_edmg - EDMG configuration + * + * This structure describes most essential parameters needed + * to describe 802.11ay EDMG configuration + * + * @channels: bitmap that indicates the 2.16 GHz channel(s) + * that are allowed to be used for transmissions. + * Bit 0 indicates channel 1, bit 1 indicates channel 2, etc. + * Set to 0 indicate EDMG not supported. + * @bw_config: Channel BW Configuration subfield encodes + * the allowed channel bandwidth configurations + */ +struct ieee80211_edmg { + u8 channels; + enum ieee80211_edmg_bw_config bw_config; +}; + +/** * struct ieee80211_supported_band - frequency band definition * * This structure describes a frequency band a wiphy @@ -333,6 +400,7 @@ struct ieee80211_sband_iftype_data { * @n_bitrates: Number of bitrates in @bitrates * @ht_cap: HT capabilities in this band * @vht_cap: VHT capabilities in this band + * @edmg_cap: EDMG capabilities in this band * @n_iftype_data: number of iftype data entries * @iftype_data: interface type data entries. Note that the bits in * @types_mask inside this structure cannot overlap (i.e. only @@ -347,6 +415,7 @@ struct ieee80211_supported_band { int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + struct ieee80211_edmg edmg_cap; u16 n_iftype_data; const struct ieee80211_sband_iftype_data *iftype_data; }; @@ -496,6 +565,7 @@ struct vif_params { * with the get_key() callback, must be in little endian, * length given by @seq_len. * @seq_len: length of @seq. + * @vlan_id: vlan_id for VLAN group key (if nonzero) * @mode: key install mode (RX_TX, NO_TX or SET_TX) */ struct key_params { @@ -503,6 +573,7 @@ struct key_params { const u8 *seq; int key_len; int seq_len; + u16 vlan_id; u32 cipher; enum nl80211_key_mode mode; }; @@ -514,12 +585,17 @@ struct key_params { * @center_freq1: center frequency of first segment * @center_freq2: center frequency of second segment * (only with 80+80 MHz) + * @edmg: define the EDMG channels configuration. + * If edmg is requested (i.e. the .channels member is non-zero), + * chan will define the primary channel and all other + * parameters are ignored. */ struct cfg80211_chan_def { struct ieee80211_channel *chan; enum nl80211_chan_width width; u32 center_freq1; u32 center_freq2; + struct ieee80211_edmg edmg; }; /** @@ -578,6 +654,19 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, } /** + * cfg80211_chandef_is_edmg - check if chandef represents an EDMG channel + * + * @chandef: the channel definition + * + * Return: %true if EDMG defined, %false otherwise. + */ +static inline bool +cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef) +{ + return chandef->edmg.channels || chandef->edmg.bw_config; +} + +/** * cfg80211_chandef_compatible - check if two channel definitions are compatible * @chandef1: first channel definition * @chandef2: second channel definition @@ -681,6 +770,7 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef) * @SURVEY_INFO_TIME_RX: receive time was filled in * @SURVEY_INFO_TIME_TX: transmit time was filled in * @SURVEY_INFO_TIME_SCAN: scan time was filled in + * @SURVEY_INFO_TIME_BSS_RX: local BSS receive time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). @@ -694,6 +784,7 @@ enum survey_info_flags { SURVEY_INFO_TIME_RX = BIT(5), SURVEY_INFO_TIME_TX = BIT(6), SURVEY_INFO_TIME_SCAN = BIT(7), + SURVEY_INFO_TIME_BSS_RX = BIT(8), }; /** @@ -710,6 +801,7 @@ enum survey_info_flags { * @time_rx: amount of time the radio spent receiving data * @time_tx: amount of time the radio spent transmitting data * @time_scan: amount of time the radio spent for scanning + * @time_bss_rx: amount of time the radio spent receiving data on a local BSS * * Used by dump_survey() to report back per-channel survey information. * @@ -724,6 +816,7 @@ struct survey_info { u64 time_rx; u64 time_tx; u64 time_scan; + u64 time_bss_rx; u32 filled; s8 noise; }; @@ -896,6 +989,7 @@ enum cfg80211_ap_settings_flags { * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time * @flags: flags, as defined in enum cfg80211_ap_settings_flags + * @he_obss_pd: OBSS Packet Detection settings */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -923,6 +1017,7 @@ struct cfg80211_ap_settings { bool ht_required, vht_required; bool twt_responder; u32 flags; + struct ieee80211_he_obss_pd he_obss_pd; }; /** @@ -1031,6 +1126,7 @@ struct sta_txpwr { * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @listen_interval: listen interval or -1 for no change * @aid: AID or zero for no change + * @vlan_id: VLAN ID for station (if nonzero) * @peer_aid: mesh peer AID or zero for no change * @plink_action: plink action to take * @plink_state: set the peer link state for a station @@ -1066,6 +1162,7 @@ struct station_parameters { u32 sta_modify_mask; int listen_interval; u16 aid; + u16 vlan_id; u16 peer_aid; u8 supported_rates_len; u8 plink_action; @@ -1162,15 +1259,17 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval - * @RATE_INFO_FLAGS_60G: 60GHz MCS + * @RATE_INFO_FLAGS_DMG: 60GHz MCS * @RATE_INFO_FLAGS_HE_MCS: HE MCS information + * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), RATE_INFO_FLAGS_VHT_MCS = BIT(1), RATE_INFO_FLAGS_SHORT_GI = BIT(2), - RATE_INFO_FLAGS_60G = BIT(3), + RATE_INFO_FLAGS_DMG = BIT(3), RATE_INFO_FLAGS_HE_MCS = BIT(4), + RATE_INFO_FLAGS_EDMG = BIT(5), }; /** @@ -1210,6 +1309,7 @@ enum rate_info_bw { * @he_dcm: HE DCM value * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc, * only valid if bw is %RATE_INFO_BW_HE_RU) + * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4) */ struct rate_info { u8 flags; @@ -1220,6 +1320,7 @@ struct rate_info { u8 he_gi; u8 he_dcm; u8 he_ru_alloc; + u8 n_bonded_ch; }; /** @@ -1315,6 +1416,7 @@ struct cfg80211_tid_stats { * indicate the relevant values in this struct for them * @connected_time: time(in secs) since a station is last connected * @inactive_time: time since last station activity (tx/rx) in milliseconds + * @assoc_at: bootime (ns) of the last association * @rx_bytes: bytes (size of MPDUs) received from this station * @tx_bytes: bytes (size of MPDUs) transmitted to this station * @llid: mesh local link id @@ -1375,6 +1477,7 @@ struct station_info { u64 filled; u32 connected_time; u32 inactive_time; + u64 assoc_at; u64 rx_bytes; u64 tx_bytes; u16 llid; @@ -2421,6 +2524,9 @@ struct cfg80211_bss_selection { * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. * @want_1x: indicates user-space supports and wants to use 802.1X driver * offload of 4-way handshake. + * @edmg: define the EDMG channels. + * This may specify multiple channels and bonding options for the driver + * to choose from, based on BSS configuration. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -2454,6 +2560,7 @@ struct cfg80211_connect_params { const u8 *fils_erp_rrk; size_t fils_erp_rrk_len; bool want_1x; + struct ieee80211_edmg edmg; }; /** @@ -2499,6 +2606,13 @@ enum wiphy_params_flags { #define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256 +/* The per TXQ device queue limit in airtime */ +#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000 +#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000 + +/* The per interface airtime threshold to switch to lower queue limit */ +#define IEEE80211_AQL_THRESHOLD 24000 + /** * struct cfg80211_pmksa - PMK Security Association * @@ -3434,6 +3548,9 @@ struct cfg80211_update_owe_info { * * @start_radar_detection: Start radar detection in the driver. * + * @end_cac: End running CAC, probably because a related CAC + * was finished on another phy. + * * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. @@ -3760,6 +3877,8 @@ struct cfg80211_ops { struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms); + void (*end_cac)(struct wiphy *wiphy, + struct net_device *dev); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); int (*crit_proto_start)(struct wiphy *wiphy, @@ -5447,6 +5566,14 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, const char *reg_initiator_name(enum nl80211_reg_initiator initiator); /** + * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom + * @wiphy: wiphy for which pre-CAC capability is checked. + * + * Pre-CAC is allowed only in some regdomains (notable ETSI). + */ +bool regulatory_pre_cac_allowed(struct wiphy *wiphy); + +/** * DOC: Internal regulatory db functions * */ @@ -6482,7 +6609,7 @@ struct cfg80211_roam_info { * time it is accessed in __cfg80211_roamed() due to delay in scheduling * rdev->event_work. In case of any failures, the reference is released * either in cfg80211_roamed() or in __cfg80211_romed(), Otherwise, it will be - * released while diconneting from the current bss. + * released while disconnecting from the current bss. */ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp); diff --git a/include/net/devlink.h b/include/net/devlink.h index bc36f942a7d5..ce5cea428fdc 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -31,10 +32,15 @@ struct devlink { struct list_head reporter_list; struct mutex reporters_lock; /* protects reporter_list */ struct devlink_dpipe_headers *dpipe_headers; + struct list_head trap_list; + struct list_head trap_group_list; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; struct mutex lock; + u8 reload_failed:1, + reload_enabled:1, + registered:1; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -72,7 +78,7 @@ struct devlink_port { struct list_head list; struct list_head param_list; struct devlink *devlink; - unsigned index; + unsigned int index; bool registered; spinlock_t type_lock; /* Protects type and type_dev * pointer consistency. @@ -395,6 +401,8 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -425,6 +433,13 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 +#define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME \ + "reset_dev_on_drv_probe" +#define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE DEVLINK_PARAM_TYPE_U8 + +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce" +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -455,6 +470,13 @@ enum devlink_param_generic_id { /* Maker of the board */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" +/* Part number, identifier of asic design */ +#define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID "asic.id" +/* Revision of asic design */ +#define DEVLINK_INFO_VERSION_GENERIC_ASIC_REV "asic.rev" + +/* Overall FW version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW "fw" /* Control processor FW version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt" /* Data path microcode controlling high-speed packet processing */ @@ -463,6 +485,10 @@ enum devlink_param_generic_id { #define DEVLINK_INFO_VERSION_GENERIC_FW_UNDI "fw.undi" /* NCSI support/handler version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_NCSI "fw.ncsi" +/* FW parameter set id */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_PSID "fw.psid" +/* RoCE FW version */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce" struct devlink_region; struct devlink_info_req; @@ -490,15 +516,206 @@ enum devlink_health_reporter_state { struct devlink_health_reporter_ops { char *name; int (*recover)(struct devlink_health_reporter *reporter, - void *priv_ctx); + void *priv_ctx, struct netlink_ext_ack *extack); int (*dump)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx); + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack); int (*diagnose)(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg); + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack); +}; + +/** + * struct devlink_trap_group - Immutable packet trap group attributes. + * @name: Trap group name. + * @id: Trap group identifier. + * @generic: Whether the trap group is generic or not. + * + * Describes immutable attributes of packet trap groups that drivers register + * with devlink. + */ +struct devlink_trap_group { + const char *name; + u16 id; + bool generic; +}; + +#define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT BIT(0) + +/** + * struct devlink_trap - Immutable packet trap attributes. + * @type: Trap type. + * @init_action: Initial trap action. + * @generic: Whether the trap is generic or not. + * @id: Trap identifier. + * @name: Trap name. + * @group: Immutable packet trap group attributes. + * @metadata_cap: Metadata types that can be provided by the trap. + * + * Describes immutable attributes of packet traps that drivers register with + * devlink. + */ +struct devlink_trap { + enum devlink_trap_type type; + enum devlink_trap_action init_action; + bool generic; + u16 id; + const char *name; + struct devlink_trap_group group; + u32 metadata_cap; }; +/* All traps must be documented in + * Documentation/networking/devlink/devlink-trap.rst + */ +enum devlink_trap_generic_id { + DEVLINK_TRAP_GENERIC_ID_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER, + DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, + DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER, + DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE, + DEVLINK_TRAP_GENERIC_ID_TTL_ERROR, + DEVLINK_TRAP_GENERIC_ID_TAIL_DROP, + DEVLINK_TRAP_GENERIC_ID_NON_IP_PACKET, + DEVLINK_TRAP_GENERIC_ID_UC_DIP_MC_DMAC, + DEVLINK_TRAP_GENERIC_ID_DIP_LB, + DEVLINK_TRAP_GENERIC_ID_SIP_MC, + DEVLINK_TRAP_GENERIC_ID_SIP_LB, + DEVLINK_TRAP_GENERIC_ID_CORRUPTED_IP_HDR, + DEVLINK_TRAP_GENERIC_ID_IPV4_SIP_BC, + DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_RESERVED_SCOPE, + DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + DEVLINK_TRAP_GENERIC_ID_MTU_ERROR, + DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, + DEVLINK_TRAP_GENERIC_ID_RPF, + DEVLINK_TRAP_GENERIC_ID_REJECT_ROUTE, + DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS, + DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS, + DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, + + /* Add new generic trap IDs above */ + __DEVLINK_TRAP_GENERIC_ID_MAX, + DEVLINK_TRAP_GENERIC_ID_MAX = __DEVLINK_TRAP_GENERIC_ID_MAX - 1, +}; + +/* All trap groups must be documented in + * Documentation/networking/devlink/devlink-trap.rst + */ +enum devlink_trap_group_generic_id { + DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS, + + /* Add new generic trap group IDs above */ + __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, + DEVLINK_TRAP_GROUP_GENERIC_ID_MAX = + __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX - 1, +}; + +#define DEVLINK_TRAP_GENERIC_NAME_SMAC_MC \ + "source_mac_is_multicast" +#define DEVLINK_TRAP_GENERIC_NAME_VLAN_TAG_MISMATCH \ + "vlan_tag_mismatch" +#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_VLAN_FILTER \ + "ingress_vlan_filter" +#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_STP_FILTER \ + "ingress_spanning_tree_filter" +#define DEVLINK_TRAP_GENERIC_NAME_EMPTY_TX_LIST \ + "port_list_is_empty" +#define DEVLINK_TRAP_GENERIC_NAME_PORT_LOOPBACK_FILTER \ + "port_loopback_filter" +#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_ROUTE \ + "blackhole_route" +#define DEVLINK_TRAP_GENERIC_NAME_TTL_ERROR \ + "ttl_value_is_too_small" +#define DEVLINK_TRAP_GENERIC_NAME_TAIL_DROP \ + "tail_drop" +#define DEVLINK_TRAP_GENERIC_NAME_NON_IP_PACKET \ + "non_ip" +#define DEVLINK_TRAP_GENERIC_NAME_UC_DIP_MC_DMAC \ + "uc_dip_over_mc_dmac" +#define DEVLINK_TRAP_GENERIC_NAME_DIP_LB \ + "dip_is_loopback_address" +#define DEVLINK_TRAP_GENERIC_NAME_SIP_MC \ + "sip_is_mc" +#define DEVLINK_TRAP_GENERIC_NAME_SIP_LB \ + "sip_is_loopback_address" +#define DEVLINK_TRAP_GENERIC_NAME_CORRUPTED_IP_HDR \ + "ip_header_corrupted" +#define DEVLINK_TRAP_GENERIC_NAME_IPV4_SIP_BC \ + "ipv4_sip_is_limited_bc" +#define DEVLINK_TRAP_GENERIC_NAME_IPV6_MC_DIP_RESERVED_SCOPE \ + "ipv6_mc_dip_reserved_scope" +#define DEVLINK_TRAP_GENERIC_NAME_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE \ + "ipv6_mc_dip_interface_local_scope" +#define DEVLINK_TRAP_GENERIC_NAME_MTU_ERROR \ + "mtu_value_is_too_small" +#define DEVLINK_TRAP_GENERIC_NAME_UNRESOLVED_NEIGH \ + "unresolved_neigh" +#define DEVLINK_TRAP_GENERIC_NAME_RPF \ + "mc_reverse_path_forwarding" +#define DEVLINK_TRAP_GENERIC_NAME_REJECT_ROUTE \ + "reject_route" +#define DEVLINK_TRAP_GENERIC_NAME_IPV4_LPM_UNICAST_MISS \ + "ipv4_lpm_miss" +#define DEVLINK_TRAP_GENERIC_NAME_IPV6_LPM_UNICAST_MISS \ + "ipv6_lpm_miss" +#define DEVLINK_TRAP_GENERIC_NAME_NON_ROUTABLE \ + "non_routable_packet" +#define DEVLINK_TRAP_GENERIC_NAME_DECAP_ERROR \ + "decap_error" +#define DEVLINK_TRAP_GENERIC_NAME_OVERLAY_SMAC_MC \ + "overlay_smac_is_mc" + +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ + "l2_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_DROPS \ + "l3_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BUFFER_DROPS \ + "buffer_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_TUNNEL_DROPS \ + "tunnel_drops" + +#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \ + { \ + .type = DEVLINK_TRAP_TYPE_##_type, \ + .init_action = DEVLINK_TRAP_ACTION_##_init_action, \ + .generic = true, \ + .id = DEVLINK_TRAP_GENERIC_ID_##_id, \ + .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \ + .group = _group, \ + .metadata_cap = _metadata_cap, \ + } + +#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group, \ + _metadata_cap) \ + { \ + .type = DEVLINK_TRAP_TYPE_##_type, \ + .init_action = DEVLINK_TRAP_ACTION_##_init_action, \ + .generic = false, \ + .id = _id, \ + .name = _name, \ + .group = _group, \ + .metadata_cap = _metadata_cap, \ + } + +#define DEVLINK_TRAP_GROUP_GENERIC(_id) \ + { \ + .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \ + .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \ + .generic = true, \ + } + struct devlink_ops { - int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); + int (*reload_down)(struct devlink *devlink, bool netns_change, + struct netlink_ext_ack *extack); + int (*reload_up)(struct devlink *devlink, + struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, @@ -558,6 +775,38 @@ struct devlink_ops { int (*flash_update)(struct devlink *devlink, const char *file_name, const char *component, struct netlink_ext_ack *extack); + /** + * @trap_init: Trap initialization function. + * + * Should be used by device drivers to initialize the trap in the + * underlying device. Drivers should also store the provided trap + * context, so that they could efficiently pass it to + * devlink_trap_report() when the trap is triggered. + */ + int (*trap_init)(struct devlink *devlink, + const struct devlink_trap *trap, void *trap_ctx); + /** + * @trap_fini: Trap de-initialization function. + * + * Should be used by device drivers to de-initialize the trap in the + * underlying device. + */ + void (*trap_fini)(struct devlink *devlink, + const struct devlink_trap *trap, void *trap_ctx); + /** + * @trap_action_set: Trap action set function. + */ + int (*trap_action_set)(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action); + /** + * @trap_group_init: Trap group initialization function. + * + * Should be used by device drivers to initialize the trap group in the + * underlying device. + */ + int (*trap_group_init)(struct devlink *devlink, + const struct devlink_trap_group *group); }; static inline void *devlink_priv(struct devlink *devlink) @@ -591,9 +840,13 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev) struct ib_device; +struct net *devlink_net(const struct devlink *devlink); +void devlink_net_set(struct devlink *devlink, struct net *net); struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); +void devlink_reload_enable(struct devlink *devlink); +void devlink_reload_disable(struct devlink *devlink); void devlink_free(struct devlink *devlink); int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, @@ -701,8 +954,8 @@ struct devlink_region *devlink_region_create(struct devlink *devlink, u32 region_max_snapshots, u64 region_size); void devlink_region_destroy(struct devlink_region *region); -u32 devlink_region_shapshot_id_get(struct devlink *devlink); -int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, +u32 devlink_region_snapshot_id_get(struct devlink *devlink); +int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor); int devlink_info_serial_number_put(struct devlink_info_req *req, @@ -734,8 +987,6 @@ int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len); int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, bool value); @@ -748,7 +999,7 @@ int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, const char *value); int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u16 value_len); + const void *value, u32 value_len); struct devlink_health_reporter * devlink_health_reporter_create(struct devlink *devlink, @@ -765,6 +1016,10 @@ int devlink_health_report(struct devlink_health_reporter *reporter, void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +void +devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); + +bool devlink_is_reload_failed(const struct devlink *devlink); void devlink_flash_update_begin_notify(struct devlink *devlink); void devlink_flash_update_end_notify(struct devlink *devlink); @@ -774,6 +1029,17 @@ void devlink_flash_update_status_notify(struct devlink *devlink, unsigned long done, unsigned long total); +int devlink_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv); +void devlink_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count); +void devlink_trap_report(struct devlink *devlink, + struct sk_buff *skb, void *trap_ctx, + struct devlink_port *in_devlink_port); +void *devlink_trap_ctx_priv(void *trap_ctx); + #if IS_ENABLED(CONFIG_NET_DEVLINK) void devlink_compat_running_version(struct net_device *dev, diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h new file mode 100644 index 000000000000..2ab668461463 --- /dev/null +++ b/include/net/drop_monitor.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _NET_DROP_MONITOR_H_ +#define _NET_DROP_MONITOR_H_ + +#include <linux/ktime.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +/** + * struct net_dm_hw_metadata - Hardware-supplied packet metadata. + * @trap_group_name: Hardware trap group name. + * @trap_name: Hardware trap name. + * @input_dev: Input netdevice. + */ +struct net_dm_hw_metadata { + const char *trap_group_name; + const char *trap_name; + struct net_device *input_dev; +}; + +#if IS_ENABLED(CONFIG_NET_DROP_MONITOR) +void net_dm_hw_report(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata); +#else +static inline void +net_dm_hw_report(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ +} +#endif + +#endif /* _NET_DROP_MONITOR_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 1e8650fa8acc..63495e3443ac 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -41,6 +41,9 @@ struct phylink_link_state; #define DSA_TAG_PROTO_TRAILER_VALUE 11 #define DSA_TAG_PROTO_8021Q_VALUE 12 #define DSA_TAG_PROTO_SJA1105_VALUE 13 +#define DSA_TAG_PROTO_KSZ8795_VALUE 14 +#define DSA_TAG_PROTO_OCELOT_VALUE 15 +#define DSA_TAG_PROTO_AR9331_VALUE 16 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -57,6 +60,9 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE, DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, + DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, + DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, + DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, }; struct packet_type; @@ -84,7 +90,6 @@ struct dsa_device_ops { struct dsa_skb_cb { struct sk_buff *clone; - bool deferred_xmit; }; struct __dsa_skb_cb { @@ -92,8 +97,6 @@ struct __dsa_skb_cb { u8 priv[48 - sizeof(struct dsa_skb_cb)]; }; -#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb)) - #define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb)) #define DSA_SKB_CB_PRIV(skb) \ @@ -120,15 +123,11 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* - * The switch port to which the CPU is attached. - */ - struct dsa_port *cpu_dp; + /* List of switch ports */ + struct list_head ports; - /* - * Data for the individual switch chips. - */ - struct dsa_switch *ds[DSA_MAX_SWITCHES]; + /* List of DSA links composing the routing table */ + struct list_head rtable; }; /* TC matchall action types, only mirroring for now */ @@ -192,8 +191,7 @@ struct dsa_port { struct phylink *pl; struct phylink_config pl_config; - struct work_struct xmit_work; - struct sk_buff_head xmit_queue; + struct list_head list; /* * Give the switch driver somewhere to hang its per-port private data @@ -210,9 +208,24 @@ struct dsa_port { * Original copy of the master netdev net_device_ops */ const struct net_device_ops *orig_ndo_ops; + + bool setup; +}; + +/* TODO: ideally DSA ports would have a single dp->link_dp member, + * and no dst->rtable nor this struct dsa_link would be needed, + * but this would require some more complex tree walking, + * so keep it stupid at the moment and list them all. + */ +struct dsa_link { + struct dsa_port *dp; + struct dsa_port *link_dp; + struct list_head list; }; struct dsa_switch { + bool setup; + struct device *dev; /* @@ -241,13 +254,6 @@ struct dsa_switch { const struct dsa_switch_ops *ops; /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; - - /* * Slave mii_bus and devices for the individual ports. */ u32 phys_mii_mask; @@ -273,17 +279,24 @@ struct dsa_switch { */ bool vlan_filtering; - unsigned long *bitmap; - unsigned long _bitmap; + /* MAC PCS does not provide link state change interrupt, and requires + * polling. Flag passed on to PHYLINK. + */ + bool pcs_poll; - /* Dynamically allocated ports, keep last */ size_t num_ports; - struct dsa_port ports[]; }; -static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) +static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { - return &ds->ports[p]; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dp->ds == ds && dp->index == p) + return dp; + + return NULL; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) @@ -318,6 +331,19 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } +/* Return the local port used to reach an arbitrary switch device */ +static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) +{ + struct dsa_switch_tree *dst = ds->dst; + struct dsa_link *dl; + + list_for_each_entry(dl, &dst->rtable, list) + if (dl->dp->ds == ds && dl->link_dp->ds->index == device) + return dl->dp->index; + + return ds->num_ports; +} + /* Return the local port used to reach an arbitrary switch port */ static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, int port) @@ -325,7 +351,7 @@ static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, if (device == ds->index) return port; else - return ds->rtable[device]; + return dsa_routing_port(ds, device); } /* Return the local port used to reach the dedicated CPU port */ @@ -354,7 +380,8 @@ typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, - int port); + int port, + enum dsa_tag_protocol mprot); int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); @@ -516,6 +543,8 @@ struct dsa_switch_ops { bool ingress); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); + int (*port_setup_tc)(struct dsa_switch *ds, int port, + enum tc_setup_type type, void *type_data); /* * Cross-chip operations @@ -537,11 +566,45 @@ struct dsa_switch_ops { bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); - /* - * Deferred frame Tx - */ - netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port, - struct sk_buff *skb); + /* Devlink parameters */ + int (*devlink_param_get)(struct dsa_switch *ds, u32 id, + struct devlink_param_gset_ctx *ctx); + int (*devlink_param_set)(struct dsa_switch *ds, u32 id, + struct devlink_param_gset_ctx *ctx); +}; + +#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ + DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, \ + dsa_devlink_param_get, dsa_devlink_param_set, NULL) + +int dsa_devlink_param_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx); +int dsa_devlink_param_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx); +int dsa_devlink_params_register(struct dsa_switch *ds, + const struct devlink_param *params, + size_t params_count); +void dsa_devlink_params_unregister(struct dsa_switch *ds, + const struct devlink_param *params, + size_t params_count); +int dsa_devlink_resource_register(struct dsa_switch *ds, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params); + +void dsa_devlink_resources_unregister(struct dsa_switch *ds); + +void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv); +void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, + u64 resource_id); + +struct dsa_devlink_priv { + struct dsa_switch *ds; }; struct dsa_switch_driver { @@ -569,7 +632,6 @@ static inline bool dsa_can_decode(const struct sk_buff *skb, return false; } -struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); #ifdef CONFIG_PM_SLEEP diff --git a/include/net/dsfield.h b/include/net/dsfield.h index 1a245ee10c95..a59a57ffc546 100644 --- a/include/net/dsfield.h +++ b/include/net/dsfield.h @@ -21,7 +21,7 @@ static inline __u8 ipv4_get_dsfield(const struct iphdr *iph) static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h) { - return ntohs(*(const __be16 *)ipv6h) >> 4; + return ntohs(*(__force const __be16 *)ipv6h) >> 4; } diff --git a/include/net/dst.h b/include/net/dst.h index fe62fe2eb781..3448cf865ede 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -82,7 +82,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; refcount_t refcnt; -}; +} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ extern const struct dst_metrics dst_default_metrics; u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); @@ -516,7 +516,16 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops->update_pmtu) - dst->ops->update_pmtu(dst, NULL, skb, mtu); + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); +} + +/* update dst pmtu but not do neighbor confirm */ +static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, @@ -526,7 +535,7 @@ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, u32 encap_mtu = dst_mtu(encap_dst); if (skb->len > encap_mtu - headroom) - skb_dst_update_pmtu(skb, encap_mtu - headroom); + skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom); } #endif /* _NET_DST_H */ diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 5ec645f27ee3..443863c7b8da 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -27,7 +27,8 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/espintcp.h b/include/net/espintcp.h new file mode 100644 index 000000000000..dd7026a00066 --- /dev/null +++ b/include/net/espintcp.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_ESPINTCP_H +#define _NET_ESPINTCP_H + +#include <net/strparser.h> +#include <linux/skmsg.h> + +void __init espintcp_init(void); + +int espintcp_push_skb(struct sock *sk, struct sk_buff *skb); +int espintcp_queue_out(struct sock *sk, struct sk_buff *skb); +bool tcp_is_ulp_esp(struct sock *sk); + +struct espintcp_msg { + struct sk_buff *skb; + struct sk_msg skmsg; + int offset; + int len; +}; + +struct espintcp_ctx { + struct strparser strp; + struct sk_buff_head ike_queue; + struct sk_buff_head out_queue; + struct espintcp_msg partial; + void (*saved_data_ready)(struct sock *sk); + void (*saved_write_space)(struct sock *sk); + struct work_struct work; + bool tx_running; +}; + +static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* RCU is only needed for diag */ + return (__force void *)icsk->icsk_ulp_data; +} +#endif diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index c49d7bfb5c30..6d59221ff05a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -8,7 +8,6 @@ struct module; struct fib_notifier_info { - struct net *net; int family; struct netlink_ext_ack *extack; }; @@ -30,19 +29,21 @@ struct fib_notifier_ops { int family; struct list_head list; unsigned int (*fib_seq_read)(struct net *net); - int (*fib_dump)(struct net *net, struct notifier_block *nb); + int (*fib_dump)(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct module *owner; struct rcu_head rcu; }; -int call_fib_notifier(struct notifier_block *nb, struct net *net, +int call_fib_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct net *net, struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + struct netlink_ext_ack *extack); +int unregister_fib_notifier(struct net *net, struct notifier_block *nb); struct fib_notifier_ops * fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 20dcadd8eed9..54e227e6b06a 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -194,7 +194,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); -int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, + struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 90bd210be060..e9391e877f9a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,8 +4,11 @@ #include <linux/types.h> #include <linux/in6.h> +#include <linux/siphash.h> #include <uapi/linux/if_ether.h> +struct sk_buff; + /** * struct flow_dissector_key_control: * @thoff: Transport header offset @@ -30,7 +33,6 @@ enum flow_dissect_ret { /** * struct flow_dissector_key_basic: - * @thoff: Transport header offset * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ @@ -45,9 +47,14 @@ struct flow_dissector_key_tags { }; struct flow_dissector_key_vlan { - u16 vlan_id:12, - vlan_dei:1, - vlan_priority:3; + union { + struct { + u16 vlan_id:12, + vlan_dei:1, + vlan_priority:3; + }; + __be16 vlan_tci; + }; __be16 vlan_tpid; }; @@ -156,19 +163,16 @@ struct flow_dissector_key_ports { /** * flow_dissector_key_icmp: - * @ports: type and code of ICMP header - * icmp: ICMP type (high) and code (low) * type: ICMP type * code: ICMP code + * id: session identifier */ struct flow_dissector_key_icmp { - union { - __be16 icmp; - struct { - u8 type; - u8 code; - }; + struct { + u8 type; + u8 code; }; + u16 id; }; /** @@ -203,9 +207,11 @@ struct flow_dissector_key_ip { /** * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex + * @ingress_iftype: ingress interface type */ struct flow_dissector_key_meta { int ingress_ifindex; + u16 ingress_iftype; }; /** @@ -228,6 +234,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ @@ -276,12 +283,14 @@ struct flow_keys_basic { struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic - struct flow_dissector_key_basic basic; + struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; + struct flow_dissector_key_icmp icmp; + /* 'addrs' must be the last member */ struct flow_dissector_key_addrs addrs; }; @@ -315,6 +324,9 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +void skb_flow_get_icmp_tci(const struct sk_buff *skb, + struct flow_dissector_key_icmp *key_icmp, + void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index b16d21636d69..c6f7bd22db60 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -4,6 +4,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <net/flow_dissector.h> +#include <linux/rhashtable.h> struct flow_match { struct flow_dissector *dissector; @@ -117,6 +118,8 @@ enum flow_action_id { FLOW_ACTION_GOTO, FLOW_ACTION_REDIRECT, FLOW_ACTION_MIRRED, + FLOW_ACTION_REDIRECT_INGRESS, + FLOW_ACTION_MIRRED_INGRESS, FLOW_ACTION_VLAN_PUSH, FLOW_ACTION_VLAN_POP, FLOW_ACTION_VLAN_MANGLE, @@ -126,11 +129,16 @@ enum flow_action_id { FLOW_ACTION_ADD, FLOW_ACTION_CSUM, FLOW_ACTION_MARK, + FLOW_ACTION_PTYPE, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, FLOW_ACTION_POLICE, FLOW_ACTION_CT, + FLOW_ACTION_MPLS_PUSH, + FLOW_ACTION_MPLS_POP, + FLOW_ACTION_MPLS_MANGLE, + NUM_FLOW_ACTIONS, }; /* This is mirroring enum pedit_header_type definition for easy mapping between @@ -146,8 +154,12 @@ enum flow_action_mangle_base { FLOW_ACT_MANGLE_HDR_TYPE_UDP, }; +typedef void (*action_destr)(void *priv); + struct flow_action_entry { enum flow_action_id id; + action_destr destructor; + void *destructor_priv; union { u32 chain_index; /* FLOW_ACTION_GOTO */ struct net_device *dev; /* FLOW_ACTION_REDIRECT */ @@ -162,9 +174,10 @@ struct flow_action_entry { u32 mask; u32 val; } mangle; - const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ + struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ + u16 ptype; /* FLOW_ACTION_PTYPE */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; u32 index; @@ -184,6 +197,22 @@ struct flow_action_entry { int action; u16 zone; } ct; + struct { /* FLOW_ACTION_MPLS_PUSH */ + u32 label; + __be16 proto; + u8 tc; + u8 bos; + u8 ttl; + } mpls_push; + struct { /* FLOW_ACTION_MPLS_POP */ + __be16 proto; + } mpls_pop; + struct { /* FLOW_ACTION_MPLS_MANGLE */ + u32 label; + u8 tc; + u8 bos; + u8 ttl; + } mpls_mangle; }; }; @@ -259,6 +288,7 @@ struct flow_block_offload { enum flow_block_command command; enum flow_block_binder_type binder_type; bool block_shared; + bool unlocked_driver_cb; struct net *net; struct flow_block *block; struct list_head cb_list; @@ -347,4 +377,39 @@ static inline void flow_block_init(struct flow_block *flow_block) INIT_LIST_HEAD(&flow_block->cb_list); } +typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, + enum tc_setup_type type, void *type_data); + +typedef void flow_indr_block_cmd_t(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, void *cb_priv, + enum flow_block_command command); + +struct flow_indr_block_entry { + flow_indr_block_cmd_t *cb; + struct list_head list; +}; + +void flow_indr_add_block_cb(struct flow_indr_block_entry *entry); + +void flow_indr_del_block_cb(struct flow_indr_block_entry *entry); + +int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +void __flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, void *cb_ident); + +void flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +void flow_indr_block_call(struct net_device *dev, + struct flow_block_offload *bo, + enum flow_block_command command); + #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/fq.h b/include/net/fq.h index d126b5d20261..2ad85e683041 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -69,7 +69,7 @@ struct fq { struct list_head backlogs; spinlock_t lock; u32 flows_cnt; - u32 perturbation; + siphash_key_t perturbation; u32 limit; u32 memory_limit; u32 memory_usage; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index be40a4b327e3..38a9a3d1222b 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -108,7 +108,7 @@ begin: static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) { - u32 hash = skb_get_hash_perturb(skb, fq->perturbation); + u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); return reciprocal_scale(hash, fq->flows_cnt); } @@ -308,12 +308,12 @@ static int fq_init(struct fq *fq, int flows_cnt) INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); - fq->perturbation = prandom_u32(); + get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ - fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) return -ENOMEM; @@ -331,7 +331,7 @@ static void fq_reset(struct fq *fq, for (i = 0; i < fq->flows_cnt; i++) fq_flow_reset(fq, &fq->flows[i], free_func); - kfree(fq->flows); + kvfree(fq->flows); fq->flows = NULL; } diff --git a/include/net/garp.h b/include/net/garp.h index c41833bd4590..4d9a0c6a2e5f 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -37,7 +37,7 @@ struct garp_skb_cb { static inline struct garp_skb_cb *garp_cb(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct garp_skb_cb) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_field(struct sk_buff, cb)); return (struct garp_skb_cb *)skb->cb; } diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index ca23860adbb9..1424e02cef90 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -7,6 +7,12 @@ #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> +/* Note: this used to be in include/uapi/linux/gen_stats.h */ +struct gnet_stats_basic_packed { + __u64 bytes; + __u64 packets; +}; + struct gnet_stats_basic_cpu { struct gnet_stats_basic_packed bstats; struct u64_stats_sync syncp; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9292f1c588b7..74950663bb00 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -75,8 +75,6 @@ struct genl_family { struct module *module; }; -struct nlattr **genl_family_attrbuf(const struct genl_family *family); - /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -128,6 +126,24 @@ enum genl_validate_flags { }; /** + * struct genl_info - info that is available during dumpit op call + * @family: generic netlink family - for internal genl code usage + * @ops: generic netlink ops - for internal genl code usage + * @attrs: netlink attributes + */ +struct genl_dumpit_info { + const struct genl_family *family; + const struct genl_ops *ops; + struct nlattr **attrs; +}; + +static inline const struct genl_dumpit_info * +genl_dumpit_info(struct netlink_callback *cb) +{ + return cb->data; +} + +/** * struct genl_ops - generic netlink operations * @cmd: command identifier * @internal_flags: flags used by the family diff --git a/include/net/hwbm.h b/include/net/hwbm.h index 81643cf8a1c4..c81444611a22 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -21,9 +21,13 @@ void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf); int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp); int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num); #else -void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} -int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; } -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num) +static inline void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} + +static inline int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) +{ return 0; } + +static inline int hwbm_pool_add(struct hwbm_pool *bm_pool, + unsigned int buf_num) { return 0; } #endif /* CONFIG_HWBM */ #endif /* _HWBM_H */ diff --git a/include/net/icmp.h b/include/net/icmp.h index 5d4bfdba9adf..9ac2d2672a93 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } +#if IS_ENABLED(CONFIG_NF_NAT) +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); +#else +#define icmp_ndo_send icmp_send +#endif + int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 50037913c9b1..a01981d7108f 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -89,9 +89,9 @@ struct ip6_sf_socklist { struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; + unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ipv6_mc_socklist __rcu *next; rwlock_t sflock; - unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip6_sf_socklist *sflist; struct rcu_head rcu; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c57d53e7e02c..895546058a20 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -97,7 +97,7 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; - void *icsk_ulp_data; + void __rcu *icsk_ulp_data; void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index af2b4c065a04..d0019d3395cf 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -103,13 +103,19 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; -/* - * Sockets can be hashed in established or listening table +/* Sockets can be hashed in established or listening table. + * We must use different 'nulls' end-of-chain value for all hash buckets : + * A socket might transition from ESTABLISH to LISTEN state without + * RCU grace period. A lookup in ehash table needs to handle this case. */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; unsigned int count; - struct hlist_head head; + union { + struct hlist_head head; + struct hlist_nulls_head nulls_head; + }; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 7769c9b36d75..34c4436fd18f 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -160,6 +160,7 @@ struct inet_cork { char priority; __u16 gso_size; u64 transmit_time; + u32 mark; }; struct inet_cork_full { diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index aef38c140014..dfd919b3119e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -71,6 +71,7 @@ struct inet_timewait_sock { tw_pad : 2, /* 2 bits hole */ tw_tos : 8; u32 tw_txhash; + u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; diff --git a/include/net/ip.h b/include/net/ip.h index 29d89de39822..5b317c9f4470 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -88,6 +88,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, { ipcm_init(ipcm); + ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = inet->sk.sk_bound_dev_if; ipcm->addr = inet->inet_saddr; @@ -184,7 +185,7 @@ static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) } struct ip_frag_state { - struct iphdr *iph; + bool DF; unsigned int hlen; unsigned int ll_rs; unsigned int mtu; @@ -195,7 +196,7 @@ struct ip_frag_state { }; void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, - unsigned int mtu, struct ip_frag_state *state); + unsigned int mtu, bool DF, struct ip_frag_state *state); struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state); @@ -338,10 +339,10 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -static inline int inet_is_local_reserved_port(struct net *net, int port) +static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { if (!net->ipv4.sysctl_local_reserved_ports) - return 0; + return false; return test_bit(port, net->ipv4.sysctl_local_reserved_ports); } @@ -350,20 +351,20 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) return strcmp(name, "default") != 0 && strcmp(name, "all") != 0; } -static inline int inet_prot_sock(struct net *net) +static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return net->ipv4.sysctl_ip_prot_sock; + return port < net->ipv4.sysctl_ip_prot_sock; } #else -static inline int inet_is_local_reserved_port(struct net *net, int port) +static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) { - return 0; + return false; } -static inline int inet_prot_sock(struct net *net) +static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return PROT_SOCK; + return port < PROT_SOCK; } #endif @@ -759,4 +760,9 @@ int ip_misc_proc_init(void); int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack); +static inline bool inetdev_valid_mtu(unsigned int mtu) +{ + return likely(mtu >= IPV4_MIN_MTU); +} + #endif /* _IP_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 4b5656c71abc..fd60a8ac02ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -90,7 +90,32 @@ struct fib6_gc_args { #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL + +static inline bool fib6_routes_require_src(const struct net *net) +{ + return false; +} + +static inline void fib6_routes_require_src_inc(struct net *net) {} +static inline void fib6_routes_require_src_dec(struct net *net) {} + #else + +static inline bool fib6_routes_require_src(const struct net *net) +{ + return net->ipv6.fib6_routes_require_src > 0; +} + +static inline void fib6_routes_require_src_inc(struct net *net) +{ + net->ipv6.fib6_routes_require_src++; +} + +static inline void fib6_routes_require_src_dec(struct net *net) +{ + net->ipv6.fib6_routes_require_src--; +} + #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif @@ -167,7 +192,9 @@ struct fib6_info { dst_nopolicy:1, dst_host:1, fib6_destroying:1, - unused:3; + offload:1, + trap:1, + unused:1; struct rcu_head rcu; struct nexthop *nh; @@ -212,6 +239,11 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } +static inline bool fib6_requires_src(const struct fib6_info *rt) +{ + return rt->fib6_src.plen > 0; +} + static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; @@ -299,6 +331,13 @@ static inline void fib6_info_release(struct fib6_info *f6i) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } +static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload, + bool trap) +{ + f6i->offload = offload; + f6i->trap = trap; +} + enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, @@ -457,6 +496,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net, struct fib6_info *rt, unsigned int nsiblings, struct netlink_ext_ack *extack); +int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, @@ -478,7 +518,7 @@ struct ipv6_route_iter { extern const struct seq_operations ipv6_route_seq_ops; -int call_fib6_notifier(struct notifier_block *nb, struct net *net, +int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, @@ -488,7 +528,8 @@ int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); -int fib6_tables_dump(struct net *net, struct notifier_block *nb); +int fib6_tables_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); @@ -501,10 +542,16 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) } #ifdef CONFIG_IPV6_MULTIPLE_TABLES +static inline bool fib6_has_custom_rules(const struct net *net) +{ + return net->ipv6.fib6_has_custom_rules; +} + int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); -int fib6_rules_dump(struct net *net, struct notifier_block *nb); +int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, @@ -525,6 +572,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net, return true; } #else +static inline bool fib6_has_custom_rules(const struct net *net) +{ + return false; +} static inline int fib6_rules_init(void) { return 0; @@ -537,7 +588,8 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } -static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4c81846ccce8..6a1ae49809de 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -204,6 +204,18 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) #define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) +struct fib_rt_info { + struct fib_info *fi; + u32 tb_id; + __be32 dst; + int dst_len; + u8 tos; + u8 type; + u8 offload:1, + trap:1, + unused:6; +}; + struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; @@ -219,7 +231,7 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -int call_fib4_notifier(struct notifier_block *nb, struct net *net, +int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, @@ -229,7 +241,8 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); -void fib_notify(struct net *net, struct notifier_block *nb); +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; @@ -310,12 +323,18 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, return err; } +static inline bool fib4_has_custom_rules(const struct net *net) +{ + return false; +} + static inline bool fib4_rule_default(const struct fib_rule *rule) { return true; } -static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { return 0; } @@ -376,8 +395,14 @@ out: return err; } +static inline bool fib4_has_custom_rules(const struct net *net) +{ + return net->ipv4.fib_has_custom_rules; +} + bool fib4_rule_default(const struct fib_rule *rule); -int fib4_rules_dump(struct net *net, struct notifier_block *nb); +int fib4_rules_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, @@ -451,6 +476,7 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap, void fib_nh_common_release(struct fib_nh_common *nhc); /* Exported by fib_trie.c */ +void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri); void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); @@ -513,7 +539,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, - unsigned char *flags, bool skip_oif); + u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight); + int nh_weight, u8 rt_family); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index af645604f328..236503a50759 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -33,8 +33,8 @@ /* Used to memset ipv4 address padding. */ #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ - (FIELD_SIZEOF(struct ip_tunnel_key, u) - \ - FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4)) + (sizeof_field(struct ip_tunnel_key, u) - \ + sizeof_field(struct ip_tunnel_key, u.ipv4)) struct ip_tunnel_key { __be64 tun_id; @@ -63,7 +63,7 @@ struct ip_tunnel_key { /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ - GENMASK((FIELD_SIZEOF(struct ip_tunnel_info, \ + GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) struct ip_tunnel_info { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3759167f91f5..83be2d93b407 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -889,6 +889,7 @@ struct netns_ipvs { struct delayed_work defense_work; /* Work handler */ int drop_rate; int drop_counter; + int old_secure_tcp; atomic_t dropentry; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ @@ -1324,7 +1325,7 @@ void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs); void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs); -void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs); +void ip_vs_service_nets_cleanup(struct list_head *net_list); /* IPVS application functions * (from ip_vs_app.c) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8dfc65639aa4..cec1a54401f2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -696,6 +696,11 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a) +{ + return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]); +} + static inline u32 ipv6_portaddr_hash(const struct net *net, const struct in6_addr *addr6, unsigned int port) @@ -981,7 +986,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb); * upper-layer output functions */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - __u32 mark, struct ipv6_txoptions *opt, int tclass); + __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); @@ -1017,7 +1022,7 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); -struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, +struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, @@ -1108,6 +1113,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); +int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); +int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); /* * reassembly.c diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 5c93e942c50b..3e7d2c0e79ca 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -24,8 +24,10 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, - struct dst_entry **dst, struct flowi6 *fl6); + struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, + const struct in6_addr *final_dst); int (*ipv6_route_input)(struct sk_buff *skb); struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); diff --git a/include/net/ipx.h b/include/net/ipx.h index baf090390998..9d1342807b59 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -47,11 +47,6 @@ struct ipxhdr { /* From af_ipx.c */ extern int sysctl_ipx_pprop_broadcasting; -static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb) -{ - return (struct ipxhdr *)skb_transport_header(skb); -} - struct ipx_interface { /* IPX address */ __be32 if_netnum; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index df528a623548..ea985aa7a6c5 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d26da013f7c0..77e6b5a83b06 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -312,9 +312,10 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected * keep alive) changed. * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface - * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder + * @BSS_CHANGED_FTM_RESPONDER: fine timing measurement request responder * functionality changed for this BSS (AP mode). * @BSS_CHANGED_TWT: TWT status changed + * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed. * */ enum ieee80211_bss_change { @@ -346,6 +347,7 @@ enum ieee80211_bss_change { BSS_CHANGED_MCAST_RATE = 1<<25, BSS_CHANGED_FTM_RESPONDER = 1<<26, BSS_CHANGED_TWT = 1<<27, + BSS_CHANGED_HE_OBSS_PD = 1<<28, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -600,6 +602,8 @@ struct ieee80211_ftm_responder_params { * nontransmitted BSSIDs * @profile_periodicity: the least number of beacon frames need to be received * in order to discover all the nontransmitted BSSIDs in the set. + * @he_operation: HE operation information of the AP we are connected to + * @he_obss_pd: OBSS Packet Detection parameters. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -661,6 +665,8 @@ struct ieee80211_bss_conf { u8 bssid_indicator; bool ema_ap; u8 profile_periodicity; + struct ieee80211_he_operation he_operation; + struct ieee80211_he_obss_pd he_obss_pd; }; /** @@ -961,6 +967,7 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @band: the band to transmit on (use for checking for races) * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC * @ack_frame_id: internal frame ID for TX status, used internally + * @tx_time_est: TX time estimate in units of 4us, used internally * @control: union part for control data * @control.rates: TX rates array to try * @control.rts_cts_rate_idx: rate for RTS or CTS @@ -997,11 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) struct ieee80211_tx_info { /* common information */ u32 flags; - u8 band; - - u8 hw_queue; - - u16 ack_frame_id; + u32 band:3, + ack_frame_id:13, + hw_queue:4, + tx_time_est:10; + /* 2 free bits */ union { struct { @@ -1052,17 +1059,35 @@ struct ieee80211_tx_info { }; }; +static inline u16 +ieee80211_info_set_tx_time_est(struct ieee80211_tx_info *info, u16 tx_time_est) +{ + /* We only have 10 bits in tx_time_est, so store airtime + * in increments of 4us and clamp the maximum to 2**12-1 + */ + info->tx_time_est = min_t(u16, tx_time_est, 4095) >> 2; + return info->tx_time_est << 2; +} + +static inline u16 +ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info) +{ + return info->tx_time_est << 2; +} + /** - * struct ieee80211_tx_status - extended tx staus info for rate control + * struct ieee80211_tx_status - extended tx status info for rate control * * @sta: Station that the packet was transmitted for * @info: Basic tx status information * @skb: Packet skb (can be NULL if not provided by the driver) + * @rate: The TX rate that was used when sending the packet */ struct ieee80211_tx_status { struct ieee80211_sta *sta; struct ieee80211_tx_info *info; struct sk_buff *skb; + struct rate_info *rate; }; /** @@ -1694,7 +1719,7 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * %IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW. * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the * driver for a CCMP/GCMP key to indicate that is requires IV generation - * only for managment frames (MFP). + * only for management frames (MFP). * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the * driver for a key to indicate that sufficient tailroom must always * be reserved for ICV or MIC, even when HW encryption is enabled. @@ -1702,6 +1727,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); * a TKIP key if it only requires MIC space. Do not set together with * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation. + * @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver + * for a AES_CMAC key to indicate that it requires sequence number + * generation only */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -1714,6 +1742,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7), IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9), + IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10), }; /** @@ -1986,7 +2015,7 @@ struct ieee80211_sta { * * * If the skb is transmitted as part of a BA agreement, the * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. - * * If the skb is not part of a BA aggreement, the A-MSDU maximal + * * If the skb is not part of a BA agreement, the A-MSDU maximal * size is min(max_amsdu_len, 7935) bytes. * * Both additional HT limits must be enforced by the low level @@ -2268,11 +2297,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID * only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set. * - * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended - * Key ID and can handle two unicast keys per station for Rx and Tx. - * - * @IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT: The card/driver can't handle - * active Tx A-MPDU sessions with Extended Key IDs during rekey. + * @IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT: The card and driver is only + * aggregating MPDUs with the same keyid, allowing mac80211 to keep Tx + * A-MPDU sessions active while rekeying with Extended Key ID. * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ @@ -2325,8 +2352,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN, IEEE80211_HW_SUPPORTS_MULTI_BSSID, IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, - IEEE80211_HW_EXT_KEY_ID_NATIVE, - IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT, + IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2454,6 +2480,8 @@ enum ieee80211_hw_flags { * * @weight_multiplier: Driver specific airtime weight multiplier used while * refilling deficit of each TXQ. + * + * @max_mtu: the max mtu could be set. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2491,6 +2519,7 @@ struct ieee80211_hw { u8 max_nan_de_entries; u8 tx_sk_pacing_shift; u8 weight_multiplier; + u32 max_mtu; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -2614,7 +2643,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * @hw: the hardware * @skb: the skb * - * Free a transmit skb. Use this funtion when some failure + * Free a transmit skb. Use this function when some failure * to transmit happened and thus status cannot be reported. */ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -3083,7 +3112,9 @@ enum ieee80211_filter_flags { * * @IEEE80211_AMPDU_RX_START: start RX aggregation * @IEEE80211_AMPDU_RX_STOP: stop RX aggregation - * @IEEE80211_AMPDU_TX_START: start TX aggregation + * @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either + * call ieee80211_start_tx_ba_cb_irqsafe() or return the special + * status %IEEE80211_AMPDU_TX_START_IMMEDIATE. * @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational * @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting * queued packets, now unaggregated. After all packets are transmitted the @@ -3107,6 +3138,8 @@ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_TX_OPERATIONAL, }; +#define IEEE80211_AMPDU_TX_START_IMMEDIATE 1 + /** * struct ieee80211_ampdu_params - AMPDU action parameters * @@ -3171,13 +3204,13 @@ enum ieee80211_rate_control_changed { * * With the support for multi channel contexts and multi channel operations, * remain on channel operations might be limited/deferred/aborted by other - * flows/operations which have higher priority (and vise versa). + * flows/operations which have higher priority (and vice versa). * Specifying the ROC type can be used by devices to prioritize the ROC * operations compared to other operations/flows. * * @IEEE80211_ROC_TYPE_NORMAL: There are no special requirements for this ROC. * @IEEE80211_ROC_TYPE_MGMT_TX: The remain on channel request is required - * for sending managment frames offchannel. + * for sending management frames offchannel. */ enum ieee80211_roc_type { IEEE80211_ROC_TYPE_NORMAL = 0, @@ -3884,7 +3917,10 @@ struct ieee80211_ops { * * Even ``189`` would be wrong since 1 could be lost again. * - * Returns a negative error code on failure. + * Returns a negative error code on failure. The driver may return + * %IEEE80211_AMPDU_TX_START_IMMEDIATE for %IEEE80211_AMPDU_TX_START + * if the session can start immediately. + * * The callback can sleep. */ int (*ampdu_action)(struct ieee80211_hw *hw, @@ -3914,7 +3950,8 @@ struct ieee80211_ops { struct ieee80211_channel *chan, int duration, enum ieee80211_roc_type type); - int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); + int (*cancel_remain_on_channel)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx); void (*get_ringparam)(struct ieee80211_hw *hw, u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max); @@ -5544,6 +5581,18 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, u32 tx_airtime, u32 rx_airtime); /** + * ieee80211_txq_airtime_check - check if a txq can send frame to device + * + * @hw: pointer obtained from ieee80211_alloc_hw() + * @txq: pointer obtained from station or virtual interface + * + * Return true if the AQL's airtime limit has not been reached and the txq can + * continue to send more packets to the device. Otherwise return false. + */ +bool +ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq); + +/** * ieee80211_iter_keys - iterate keys programmed into the device * @hw: pointer obtained from ieee80211_alloc_hw() * @vif: virtual interface to iterate, may be %NULL for all @@ -5596,7 +5645,7 @@ void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, /** * ieee80211_iter_chan_contexts_atomic - iterate channel contexts - * @hw: pointre obtained from ieee80211_alloc_hw(). + * @hw: pointer obtained from ieee80211_alloc_hw(). * @iter: iterator function * @iter_data: data passed to iterator function * @@ -5945,7 +5994,6 @@ struct rate_control_ops { void (*add_sta_debugfs)(void *priv, void *priv_sta, struct dentry *dir); - void (*remove_sta_debugfs)(void *priv, void *priv_sta); u32 (*get_expected_throughput)(void *priv_sta); }; @@ -6234,11 +6282,37 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid); * but for the duration of the frame handling. * However, also note that while in the wake_tx_queue() method, * rcu_read_lock() is already held. + * + * softirqs must also be disabled when this function is called. + * In process context, use ieee80211_tx_dequeue_ni() instead. */ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *txq); /** + * ieee80211_tx_dequeue_ni - dequeue a packet from a software tx queue + * (in process context) + * + * Like ieee80211_tx_dequeue() but can be called in process context + * (internally disables bottom halves). + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @txq: pointer obtained from station or virtual interface, or from + * ieee80211_next_txq() + */ +static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = ieee80211_tx_dequeue(hw, txq); + local_bh_enable(); + + return skb; +} + +/** * ieee80211_next_txq - get next tx queue to pull packets from * * @hw: pointer as obtained from ieee80211_alloc_hw() @@ -6319,7 +6393,7 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, * again. * * The API ieee80211_txq_may_transmit() also ensures that TXQ list will be - * aligned aginst driver's own round-robin scheduler list. i.e it rotates + * aligned against driver's own round-robin scheduler list. i.e it rotates * the TXQ list till it makes the requested node becomes the first entry * in TXQ list. Thus both the TXQ list and driver's list are in sync. If this * function returns %true, the driver is expected to schedule packets @@ -6377,4 +6451,33 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp); +/** + * ieee80211_calc_rx_airtime - calculate estimated transmission airtime for RX. + * + * This function calculates the estimated airtime usage of a frame based on the + * rate information in the RX status struct and the frame length. + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @status: &struct ieee80211_rx_status containing the transmission rate + * information. + * @len: frame length in bytes + */ +u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw, + struct ieee80211_rx_status *status, + int len); + +/** + * ieee80211_calc_tx_airtime - calculate estimated transmission airtime for TX. + * + * This function calculates the estimated airtime usage of a frame based on the + * rate information in the TX info struct and the frame length. + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @info: &struct ieee80211_tx_info of the frame. + * @len: frame length in bytes + */ +u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, + struct ieee80211_tx_info *info, + int len); + #endif /* MAC80211_H */ diff --git a/include/net/macsec.h b/include/net/macsec.h new file mode 100644 index 000000000000..92e43db8b566 --- /dev/null +++ b/include/net/macsec.h @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * MACsec netdev header, used for h/w accelerated implementations. + * + * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net> + */ +#ifndef _NET_MACSEC_H_ +#define _NET_MACSEC_H_ + +#include <linux/u64_stats_sync.h> +#include <uapi/linux/if_link.h> +#include <uapi/linux/if_macsec.h> + +typedef u64 __bitwise sci_t; + +#define MACSEC_NUM_AN 4 /* 2 bits for the association number */ + +/** + * struct macsec_key - SA key + * @id: user-provided key identifier + * @tfm: crypto struct, key storage + */ +struct macsec_key { + u8 id[MACSEC_KEYID_LEN]; + struct crypto_aead *tfm; +}; + +struct macsec_rx_sc_stats { + __u64 InOctetsValidated; + __u64 InOctetsDecrypted; + __u64 InPktsUnchecked; + __u64 InPktsDelayed; + __u64 InPktsOK; + __u64 InPktsInvalid; + __u64 InPktsLate; + __u64 InPktsNotValid; + __u64 InPktsNotUsingSA; + __u64 InPktsUnusedSA; +}; + +struct macsec_rx_sa_stats { + __u32 InPktsOK; + __u32 InPktsInvalid; + __u32 InPktsNotValid; + __u32 InPktsNotUsingSA; + __u32 InPktsUnusedSA; +}; + +struct macsec_tx_sa_stats { + __u32 OutPktsProtected; + __u32 OutPktsEncrypted; +}; + +struct macsec_tx_sc_stats { + __u64 OutPktsProtected; + __u64 OutPktsEncrypted; + __u64 OutOctetsProtected; + __u64 OutOctetsEncrypted; +}; + +/** + * struct macsec_rx_sa - receive secure association + * @active: + * @next_pn: packet number expected for the next packet + * @lock: protects next_pn manipulations + * @key: key structure + * @stats: per-SA stats + */ +struct macsec_rx_sa { + struct macsec_key key; + spinlock_t lock; + u32 next_pn; + refcount_t refcnt; + bool active; + struct macsec_rx_sa_stats __percpu *stats; + struct macsec_rx_sc *sc; + struct rcu_head rcu; +}; + +struct pcpu_rx_sc_stats { + struct macsec_rx_sc_stats stats; + struct u64_stats_sync syncp; +}; + +struct pcpu_tx_sc_stats { + struct macsec_tx_sc_stats stats; + struct u64_stats_sync syncp; +}; + +/** + * struct macsec_rx_sc - receive secure channel + * @sci: secure channel identifier for this SC + * @active: channel is active + * @sa: array of secure associations + * @stats: per-SC stats + */ +struct macsec_rx_sc { + struct macsec_rx_sc __rcu *next; + sci_t sci; + bool active; + struct macsec_rx_sa __rcu *sa[MACSEC_NUM_AN]; + struct pcpu_rx_sc_stats __percpu *stats; + refcount_t refcnt; + struct rcu_head rcu_head; +}; + +/** + * struct macsec_tx_sa - transmit secure association + * @active: + * @next_pn: packet number to use for the next packet + * @lock: protects next_pn manipulations + * @key: key structure + * @stats: per-SA stats + */ +struct macsec_tx_sa { + struct macsec_key key; + spinlock_t lock; + u32 next_pn; + refcount_t refcnt; + bool active; + struct macsec_tx_sa_stats __percpu *stats; + struct rcu_head rcu; +}; + +/** + * struct macsec_tx_sc - transmit secure channel + * @active: + * @encoding_sa: association number of the SA currently in use + * @encrypt: encrypt packets on transmit, or authenticate only + * @send_sci: always include the SCI in the SecTAG + * @end_station: + * @scb: single copy broadcast flag + * @sa: array of secure associations + * @stats: stats for this TXSC + */ +struct macsec_tx_sc { + bool active; + u8 encoding_sa; + bool encrypt; + bool send_sci; + bool end_station; + bool scb; + struct macsec_tx_sa __rcu *sa[MACSEC_NUM_AN]; + struct pcpu_tx_sc_stats __percpu *stats; +}; + +/** + * struct macsec_secy - MACsec Security Entity + * @netdev: netdevice for this SecY + * @n_rx_sc: number of receive secure channels configured on this SecY + * @sci: secure channel identifier used for tx + * @key_len: length of keys used by the cipher suite + * @icv_len: length of ICV used by the cipher suite + * @validate_frames: validation mode + * @operational: MAC_Operational flag + * @protect_frames: enable protection for this SecY + * @replay_protect: enable packet number checks on receive + * @replay_window: size of the replay window + * @tx_sc: transmit secure channel + * @rx_sc: linked list of receive secure channels + */ +struct macsec_secy { + struct net_device *netdev; + unsigned int n_rx_sc; + sci_t sci; + u16 key_len; + u16 icv_len; + enum macsec_validation_type validate_frames; + bool operational; + bool protect_frames; + bool replay_protect; + u32 replay_window; + struct macsec_tx_sc tx_sc; + struct macsec_rx_sc __rcu *rx_sc; +}; + +/** + * struct macsec_context - MACsec context for hardware offloading + */ +struct macsec_context { + struct phy_device *phydev; + enum macsec_offload offload; + + struct macsec_secy *secy; + struct macsec_rx_sc *rx_sc; + struct { + unsigned char assoc_num; + u8 key[MACSEC_KEYID_LEN]; + union { + struct macsec_rx_sa *rx_sa; + struct macsec_tx_sa *tx_sa; + }; + } sa; + + u8 prepare:1; +}; + +/** + * struct macsec_ops - MACsec offloading operations + */ +struct macsec_ops { + /* Device wide */ + int (*mdo_dev_open)(struct macsec_context *ctx); + int (*mdo_dev_stop)(struct macsec_context *ctx); + /* SecY */ + int (*mdo_add_secy)(struct macsec_context *ctx); + int (*mdo_upd_secy)(struct macsec_context *ctx); + int (*mdo_del_secy)(struct macsec_context *ctx); + /* Security channels */ + int (*mdo_add_rxsc)(struct macsec_context *ctx); + int (*mdo_upd_rxsc)(struct macsec_context *ctx); + int (*mdo_del_rxsc)(struct macsec_context *ctx); + /* Security associations */ + int (*mdo_add_rxsa)(struct macsec_context *ctx); + int (*mdo_upd_rxsa)(struct macsec_context *ctx); + int (*mdo_del_rxsa)(struct macsec_context *ctx); + int (*mdo_add_txsa)(struct macsec_context *ctx); + int (*mdo_upd_txsa)(struct macsec_context *ctx); + int (*mdo_del_txsa)(struct macsec_context *ctx); +}; + +void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); + +#endif /* _NET_MACSEC_H_ */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h new file mode 100644 index 000000000000..c971d25431ea --- /dev/null +++ b/include/net/mptcp.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Multipath TCP + * + * Copyright (c) 2017 - 2019, Intel Corporation. + */ + +#ifndef __NET_MPTCP_H +#define __NET_MPTCP_H + +#include <linux/skbuff.h> +#include <linux/tcp.h> +#include <linux/types.h> + +/* MPTCP sk_buff extension data */ +struct mptcp_ext { + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + __unused:2; + /* one byte hole */ +}; + +struct mptcp_out_options { +#if IS_ENABLED(CONFIG_MPTCP) + u16 suboptions; + u64 sndr_key; + u64 rcvr_key; + struct mptcp_ext ext_copy; +#endif +}; + +#ifdef CONFIG_MPTCP + +void mptcp_init(void); + +static inline bool sk_is_mptcp(const struct sock *sk) +{ + return tcp_sk(sk)->is_mptcp; +} + +static inline bool rsk_is_mptcp(const struct request_sock *req) +{ + return tcp_rsk(req)->is_mptcp; +} + +void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, + int opsize, struct tcp_options_received *opt_rx); +bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, + unsigned int *size, struct mptcp_out_options *opts); +void mptcp_rcv_synsent(struct sock *sk); +bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, + struct mptcp_out_options *opts); +bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, + unsigned int *size, unsigned int remaining, + struct mptcp_out_options *opts); +void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *opt_rx); + +void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); + +/* move the skb extension owership, with the assumption that 'to' is + * newly allocated + */ +static inline void mptcp_skb_ext_move(struct sk_buff *to, + struct sk_buff *from) +{ + if (!skb_ext_exist(from, SKB_EXT_MPTCP)) + return; + + if (WARN_ON_ONCE(to->active_extensions)) + skb_ext_put(to); + + to->active_extensions = from->active_extensions; + to->extensions = from->extensions; + from->active_extensions = 0; +} + +static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext, + const struct mptcp_ext *from_ext) +{ + /* MPTCP always clears the ext when adding it to the skb, so + * holes do not bother us here + */ + return !from_ext || + (to_ext && from_ext && + !memcmp(from_ext, to_ext, sizeof(struct mptcp_ext))); +} + +/* check if skbs can be collapsed. + * MPTCP collapse is allowed if neither @to or @from carry an mptcp data + * mapping, or if the extension of @to is the same as @from. + * Collapsing is not possible if @to lacks an extension, but @from carries one. + */ +static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP), + skb_ext_find(from, SKB_EXT_MPTCP)); +} + +#else + +static inline void mptcp_init(void) +{ +} + +static inline bool sk_is_mptcp(const struct sock *sk) +{ + return false; +} + +static inline bool rsk_is_mptcp(const struct request_sock *req) +{ + return false; +} + +static inline void mptcp_parse_option(const struct sk_buff *skb, + const unsigned char *ptr, int opsize, + struct tcp_options_received *opt_rx) +{ +} + +static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, + unsigned int *size, + struct mptcp_out_options *opts) +{ + return false; +} + +static inline void mptcp_rcv_synsent(struct sock *sk) +{ +} + +static inline bool mptcp_synack_options(const struct request_sock *req, + unsigned int *size, + struct mptcp_out_options *opts) +{ + return false; +} + +static inline bool mptcp_established_options(struct sock *sk, + struct sk_buff *skb, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + return false; +} + +static inline void mptcp_incoming_options(struct sock *sk, + struct sk_buff *skb, + struct tcp_options_received *opt_rx) +{ +} + +static inline void mptcp_skb_ext_move(struct sk_buff *to, + const struct sk_buff *from) +{ +} + +static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return true; +} + +#endif /* CONFIG_MPTCP */ + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) +int mptcpv6_init(void); +void mptcpv6_handle_mapped(struct sock *sk, bool mapped); +#elif IS_ENABLED(CONFIG_IPV6) +static inline int mptcpv6_init(void) { return 0; } +static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } +#endif + +#endif /* __NET_MPTCP_H */ diff --git a/include/net/mrp.h b/include/net/mrp.h index ef58b4a07190..1c308c034e1a 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -39,7 +39,7 @@ struct mrp_skb_cb { static inline struct mrp_skb_cb *mrp_cb(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct mrp_skb_cb) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_field(struct sk_buff, cb)); return (struct mrp_skb_cb *)skb->cb; } diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 366150053043..b5ebeb3b0de0 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -40,6 +40,7 @@ enum { ND_OPT_RDNSS = 25, /* RFC5006 */ ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ + ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ __ND_OPT_MAX }; @@ -413,8 +414,8 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } @@ -430,8 +431,8 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 50a67bd6a434..8ec77bfdc1a4 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -72,7 +72,6 @@ struct neigh_parms { struct net_device *dev; struct list_head list; int (*neigh_setup)(struct neighbour *); - void (*neigh_cleanup)(struct neighbour *); struct neigh_table *tbl; void *sysctl_table; @@ -439,8 +438,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; - if (neigh->used != now) - neigh->used = now; + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -468,7 +467,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cb668bc2692d..854d39ef1ca3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -36,6 +36,7 @@ #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> +#include <linux/notifier.h> struct user_namespace; struct proc_dir_entry; @@ -52,7 +53,10 @@ struct bpf_prog; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - refcount_t passive; /* To decided when the network + /* First cache line can be often dirtied. + * Do not place here read-mostly fields. + */ + refcount_t passive; /* To decide when the network * namespace should be freed. */ refcount_t count; /* To decided when the network @@ -60,7 +64,13 @@ struct net { */ spinlock_t rules_mod_lock; - u32 hash_mix; + unsigned int dev_unreg_count; + + unsigned int dev_base_seq; /* protected by rtnl_mutex */ + int ifindex; + + spinlock_t nsid_lock; + atomic_t fnhe_genid; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit @@ -76,11 +86,11 @@ struct net { #endif struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; - spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; + struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -93,20 +103,20 @@ struct net { struct uevent_sock *uevent_sock; /* uevent socket */ - struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ - int ifindex; - unsigned int dev_unreg_count; + struct raw_notifier_head netdev_chain; + + /* Note that @hash_mix can be read millions times per second, + * it is critical that it is on a read_mostly cache line. + */ + u32 hash_mix; + + struct net_device *loopback_dev; /* The loopback */ /* core fib_rules */ struct list_head rules_ops; - struct list_head fib_notifier_ops; /* Populated by - * register_pernet_subsys() - */ - struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -170,8 +180,10 @@ struct net { #ifdef CONFIG_XDP_SOCKETS struct netns_xdp xdp; #endif +#if IS_ENABLED(CONFIG_CRYPTO_USER) + struct sock *crypto_nlsk; +#endif struct sock *diag_nlsk; - atomic_t fnhe_genid; } __randomize_layout; #include <linux/seq_file_net.h> @@ -317,7 +329,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) /* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) - +#define for_each_net_continue_reverse(VAR) \ + list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list) #define for_each_net_rcu(VAR) \ list_for_each_entry_rcu(VAR, &net_namespace_list, list) @@ -333,10 +346,10 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif -int peernet2id_alloc(struct net *net, struct net *peer); -int peernet2id(struct net *net, struct net *peer); -bool peernet_has_id(struct net *net, struct net *peer); -struct net *get_net_ns_by_id(struct net *net, int id); +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp); +int peernet2id(const struct net *net, struct net *peer); +bool peernet_has_id(const struct net *net, struct net *peer); +struct net *get_net_ns_by_id(const struct net *net, int id); struct pernet_operations { struct list_head list; @@ -414,7 +427,7 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header) } #endif -static inline int rt_genid_ipv4(struct net *net) +static inline int rt_genid_ipv4(const struct net *net) { return atomic_read(&net->ipv4.rt_genid); } @@ -446,7 +459,7 @@ static inline void rt_genid_bump_all(struct net *net) rt_genid_bump_ipv6(net); } -static inline int fnhe_genid(struct net *net) +static inline int fnhe_genid(const struct net *net) { return atomic_read(&net->fnhe_genid); } diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 302fcd3aade2..371696ec11b2 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -2,16 +2,22 @@ #ifndef _BR_NETFILTER_H_ #define _BR_NETFILTER_H_ +#include <linux/netfilter.h> + #include "../../../net/bridge/br_private.h" static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF); if (b) memset(b, 0, sizeof(*b)); return b; +#else + return NULL; +#endif } void nf_bridge_update_protocol(struct sk_buff *skb); @@ -36,10 +42,14 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct net_bridge_port *port; port = br_port_get_rcu(dev); return port ? &port->br->fake_rtable : NULL; +#else + return NULL; +#endif } struct net_device *setup_pre_routing(struct sk_buff *skb, @@ -57,7 +67,7 @@ static inline int br_validate_ipv6(struct net *net, struct sk_buff *skb) } static inline unsigned int -br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, +br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return NF_ACCEPT; diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h index c962e0be3549..a2bc16cdbcd3 100644 --- a/include/net/netfilter/ipv4/nf_dup_ipv4.h +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -2,6 +2,9 @@ #ifndef _NF_DUP_IPV4_H_ #define _NF_DUP_IPV4_H_ +#include <linux/skbuff.h> +#include <uapi/linux/in.h> + void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in_addr *gw, int oif); diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h deleted file mode 100644 index c86895bc5eb6..000000000000 --- a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ICMPv6 tracking. - * - * 21 Apl 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> - * - separated from nf_conntrack_icmp.h - * - * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h - */ - -#ifndef _NF_CONNTRACK_ICMPV6_H -#define _NF_CONNTRACK_ICMPV6_H - -#ifndef ICMPV6_NI_QUERY -#define ICMPV6_NI_QUERY 139 -#endif -#ifndef ICMPV6_NI_REPLY -#define ICMPV6_NI_REPLY 140 -#endif - -#endif /* _NF_CONNTRACK_ICMPV6_H */ diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 9d7e28736da9..6d31cd041143 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -2,7 +2,9 @@ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H -struct net; +#include <linux/skbuff.h> +#include <linux/types.h> + int nf_defrag_ipv6_enable(struct net *); int nf_ct_frag6_init(void); diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h index caf0c2dd8ee7..f6312bb04a13 100644 --- a/include/net/netfilter/ipv6/nf_dup_ipv6.h +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -2,6 +2,8 @@ #ifndef _NF_DUP_IPV6_H_ #define _NF_DUP_IPV6_H_ +#include <linux/skbuff.h> + void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c86657d99630..9f551f3b69c6 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -13,17 +13,14 @@ #ifndef _NF_CONNTRACK_H #define _NF_CONNTRACK_H -#include <linux/netfilter/nf_conntrack_common.h> - #include <linux/bitops.h> #include <linux/compiler.h> -#include <linux/atomic.h> +#include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_dccp.h> #include <linux/netfilter/nf_conntrack_sctp.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> -#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -148,16 +145,14 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); -#define NFCT_INFOMASK 7UL -#define NFCT_PTRMASK ~(NFCT_INFOMASK) - /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - *ctinfo = skb->_nfct & NFCT_INFOMASK; + unsigned long nfct = skb_get_nfct(skb); - return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK); + *ctinfo = nfct & NFCT_INFOMASK; + return (struct nf_conn *)(nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ @@ -321,7 +316,7 @@ u32 nf_ct_get_id(const struct nf_conn *ct); static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { - skb->_nfct = (unsigned long)ct | info; + skb_set_nfct(skb, (unsigned long)ct | info); } #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 1fee733c18a7..f7a060c6eb28 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -29,6 +29,7 @@ struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct) static inline struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { +#if IS_ENABLED(CONFIG_NF_CONNTRACK) struct net *net = nf_ct_net(ct); struct nf_conn_acct *acct; @@ -41,22 +42,32 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) return acct; -}; +#else + return NULL; +#endif +} /* Check if connection tracking accounting is enabled */ static inline bool nf_ct_acct_enabled(struct net *net) { +#if IS_ENABLED(CONFIG_NF_CONNTRACK) return net->ct.sysctl_acct != 0; +#else + return false; +#endif } /* Enable/disable connection tracking accounting */ static inline void nf_ct_set_acct(struct net *net, bool enable) { +#if IS_ENABLED(CONFIG_NF_CONNTRACK) net->ct.sysctl_acct = enable; +#endif } void nf_conntrack_acct_pernet_init(struct net *net); int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); + #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h index 9a5514d5bc51..c564281ede5e 100644 --- a/include/net/netfilter/nf_conntrack_bridge.h +++ b/include/net/netfilter/nf_conntrack_bridge.h @@ -1,6 +1,12 @@ #ifndef NF_CONNTRACK_BRIDGE_ #define NF_CONNTRACK_BRIDGE_ +#include <linux/module.h> +#include <linux/types.h> +#include <uapi/linux/if_ether.h> + +struct nf_hook_ops; + struct nf_ct_bridge_info { struct nf_hook_ops *ops; unsigned int ops_size; @@ -10,11 +16,4 @@ struct nf_ct_bridge_info { void nf_ct_bridge_register(struct nf_ct_bridge_info *info); void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info); -struct nf_ct_bridge_frag_data { - char mac[ETH_HLEN]; - bool vlan_present; - u16 vlan_tci; - __be16 vlan_proto; -}; - #endif diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index de10faf2ce91..09f2efea0b97 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -14,13 +14,16 @@ #define _NF_CONNTRACK_CORE_H #include <linux/netfilter.h> -#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_l4proto.h> /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state); + +unsigned int nf_conntrack_in(struct sk_buff *skb, + const struct nf_hook_state *state); int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index f32fc8289473..9645b47fa7e4 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -2,6 +2,9 @@ #define _NF_CONNTRACK_COUNT_H #include <linux/list.h> +#include <linux/spinlock.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> struct nf_conncount_data; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 52b44192b43f..eb81f9195e28 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -61,9 +61,10 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) #else return NULL; #endif -}; +} #ifdef CONFIG_NF_CONNTRACK_EVENTS + /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; @@ -84,9 +85,26 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); +#else + +static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) +{ +} + +static inline int nf_conntrack_eventmask_report(unsigned int eventmask, + struct nf_conn *ct, + u32 portid, + int report) +{ + return 0; +} + +#endif + static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { +#ifdef CONFIG_NF_CONNTRACK_EVENTS struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; @@ -98,31 +116,42 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) return; set_bit(event, &e->cache); +#endif } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { +#ifdef CONFIG_NF_CONNTRACK_EVENTS const struct net *net = nf_ct_net(ct); if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return 0; return nf_conntrack_eventmask_report(1 << event, ct, portid, report); +#else + return 0; +#endif } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { +#ifdef CONFIG_NF_CONNTRACK_EVENTS const struct net *net = nf_ct_net(ct); if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return 0; return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); +#else + return 0; +#endif } +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_exp_event { struct nf_conntrack_expect *exp; u32 portid; @@ -148,41 +177,18 @@ void nf_conntrack_ecache_pernet_fini(struct net *net); int nf_conntrack_ecache_init(void); void nf_conntrack_ecache_fini(void); -static inline void nf_conntrack_ecache_delayed_work(struct net *net) +#else /* CONFIG_NF_CONNTRACK_EVENTS */ + +static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, + struct nf_conntrack_expect *exp, + u32 portid, + int report) { - if (!delayed_work_pending(&net->ct.ecache_dwork)) { - schedule_delayed_work(&net->ct.ecache_dwork, HZ); - net->ct.ecache_dwork_pending = true; - } } -static inline void nf_conntrack_ecache_work(struct net *net) +static inline void nf_conntrack_ecache_pernet_init(struct net *net) { - if (net->ct.ecache_dwork_pending) { - net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0); - } } -#else /* CONFIG_NF_CONNTRACK_EVENTS */ -static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, - struct nf_conn *ct) {} -static inline int nf_conntrack_eventmask_report(unsigned int eventmask, - struct nf_conn *ct, - u32 portid, - int report) { return 0; } -static inline int nf_conntrack_event(enum ip_conntrack_events event, - struct nf_conn *ct) { return 0; } -static inline int nf_conntrack_event_report(enum ip_conntrack_events event, - struct nf_conn *ct, - u32 portid, - int report) { return 0; } -static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} -static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, - struct nf_conntrack_expect *exp, - u32 portid, - int report) {} - -static inline void nf_conntrack_ecache_pernet_init(struct net *net) {} static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { @@ -197,14 +203,26 @@ static inline void nf_conntrack_ecache_fini(void) { } +#endif /* CONFIG_NF_CONNTRACK_EVENTS */ + static inline void nf_conntrack_ecache_delayed_work(struct net *net) { +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (!delayed_work_pending(&net->ct.ecache_dwork)) { + schedule_delayed_work(&net->ct.ecache_dwork, HZ); + net->ct.ecache_dwork_pending = true; + } +#endif } static inline void nf_conntrack_ecache_work(struct net *net) { +#ifdef CONFIG_NF_CONNTRACK_EVENTS + if (net->ct.ecache_dwork_pending) { + net->ct.ecache_dwork_pending = false; + mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0); + } +#endif } -#endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ - diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 573429be4d59..0855b60fba17 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -126,7 +126,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, const union nf_inet_addr *, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); -int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, u32 portid, int report, unsigned int flags); static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect, unsigned int flags) diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 21f887c5058c..5ae5295aa46d 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -8,7 +8,7 @@ enum nf_ct_ext_id { NF_CT_EXT_HELPER, -#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) +#if IS_ENABLED(CONFIG_NF_NAT) NF_CT_EXT_NAT, #endif NF_CT_EXT_SEQADJ, @@ -43,7 +43,6 @@ enum nf_ct_ext_id { /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { - struct rcu_head rcu; u8 offset[NF_CT_EXT_NUM]; u8 len; char data[0]; @@ -72,15 +71,6 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) /* Destroy all relationships */ void nf_ct_ext_destroy(struct nf_conn *ct); -/* Free operation. If you want to free a object referred from private area, - * please implement __nf_ct_ext_free() and call it. - */ -static inline void nf_ct_ext_free(struct nf_conn *ct) -{ - if (ct->ext) - kfree_rcu(ct->ext, rcu); -} - /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 44b5a00a9c64..37f0fbefb060 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -81,7 +81,7 @@ struct nf_conn_help { }; #define NF_CT_HELPER_BUILD_BUG_ON(structsize) \ - BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data)) + BUILD_BUG_ON((structsize) > sizeof_field(struct nf_conn_help, data)) struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index a49edfdf47e8..4cad1f0a327a 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -176,42 +176,44 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) static inline struct nf_generic_net *nf_generic_pernet(struct net *net) { - return &net->ct.nf_ct_proto.generic; + return &net->ct.nf_ct_proto.generic; } static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net) { - return &net->ct.nf_ct_proto.tcp; + return &net->ct.nf_ct_proto.tcp; } static inline struct nf_udp_net *nf_udp_pernet(struct net *net) { - return &net->ct.nf_ct_proto.udp; + return &net->ct.nf_ct_proto.udp; } static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net) { - return &net->ct.nf_ct_proto.icmp; + return &net->ct.nf_ct_proto.icmp; } static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) { - return &net->ct.nf_ct_proto.icmpv6; + return &net->ct.nf_ct_proto.icmpv6; } +#endif #ifdef CONFIG_NF_CT_PROTO_DCCP static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net) { - return &net->ct.nf_ct_proto.dccp; + return &net->ct.nf_ct_proto.dccp; } #endif #ifdef CONFIG_NF_CT_PROTO_SCTP static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) { - return &net->ct.nf_ct_proto.sctp; + return &net->ct.nf_ct_proto.sctp; } #endif diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 4eacce6f3bcc..ba916411c4e1 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/types.h> -#include <net/net_namespace.h> + +#ifndef _NF_CONNTRACK_LABELS_H +#define _NF_CONNTRACK_LABELS_H + #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <linux/types.h> +#include <net/net_namespace.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> - #include <uapi/linux/netfilter/xt_connlabel.h> #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) @@ -51,3 +54,5 @@ static inline void nf_conntrack_labels_fini(void) {} static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif + +#endif /* _NF_CONNTRACK_LABELS_H */ diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 44513b93bd55..6a3ab081e4bf 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -32,6 +32,7 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) static inline bool nf_ct_add_synproxy(struct nf_conn *ct, const struct nf_conn *tmpl) { +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) if (tmpl && nfct_synproxy(tmpl)) { if (!nfct_seqadj_ext_add(ct)) return false; @@ -39,47 +40,9 @@ static inline bool nf_ct_add_synproxy(struct nf_conn *ct, if (!nfct_synproxy_ext_add(ct)) return false; } +#endif return true; } -struct synproxy_stats { - unsigned int syn_received; - unsigned int cookie_invalid; - unsigned int cookie_valid; - unsigned int cookie_retrans; - unsigned int conn_reopened; -}; - -struct synproxy_net { - struct nf_conn *tmpl; - struct synproxy_stats __percpu *stats; - unsigned int hook_ref4; - unsigned int hook_ref6; -}; - -extern unsigned int synproxy_net_id; -static inline struct synproxy_net *synproxy_pernet(struct net *net) -{ - return net_generic(net, synproxy_net_id); -} - -struct synproxy_options { - u8 options; - u8 wscale; - u16 mss; - u16 mss_encode; - u32 tsval; - u32 tsecr; -}; - -struct tcphdr; -struct nf_synproxy_info; -bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, - const struct tcphdr *th, - struct synproxy_options *opts); - -void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, - struct synproxy_options *opts); - #endif /* _NF_CONNTRACK_SYNPROXY_H */ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 00a8fbb2d735..6dd72396f534 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -32,6 +32,7 @@ struct nf_conn_timeout { static inline unsigned int * nf_ct_timeout_data(const struct nf_conn_timeout *t) { +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_ct_timeout *timeout; timeout = rcu_dereference(t->timeout); @@ -39,6 +40,9 @@ nf_ct_timeout_data(const struct nf_conn_timeout *t) return NULL; return (unsigned int *)timeout->data; +#else + return NULL; +#endif } static inline diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 0ed617bf0a3d..820ea34b6029 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -38,16 +38,6 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) #endif }; -static inline bool nf_ct_tstamp_enabled(struct net *net) -{ - return net->ct.sysctl_tstamp != 0; -} - -static inline void nf_ct_set_tstamp(struct net *net, bool enable) -{ - net->ct.sysctl_tstamp = enable; -} - #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP void nf_conntrack_tstamp_pernet_init(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index bf0444e111a6..9334371c94e2 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -123,7 +123,7 @@ struct nf_conntrack_tuple_hash { static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) -{ +{ return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) && t1->src.u.all == t2->src.u.all && t1->src.l3num == t2->src.l3num); diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 52950baa3ab5..48dbadb96fb3 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -3,9 +3,7 @@ #define _NF_CONNTRACK_ZONES_H #include <linux/netfilter/nf_conntrack_zones_common.h> - -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack.h> static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) @@ -87,5 +85,5 @@ static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, return true; #endif } -#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ + #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 2a6f6dcad3d9..b175d271aec9 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -2,7 +2,15 @@ #ifndef _NF_DUP_NETDEV_H_ #define _NF_DUP_NETDEV_H_ +#include <net/netfilter/nf_tables.h> + void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); +struct nft_offload_ctx; +struct nft_flow_rule; + +int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + enum flow_action_id id, int oif); #endif diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d8c187936bec..e0f709d9d547 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -6,27 +6,52 @@ #include <linux/netdevice.h> #include <linux/rhashtable-types.h> #include <linux/rcupdate.h> +#include <linux/netfilter.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <net/flow_offload.h> #include <net/dst.h> struct nf_flowtable; +struct nf_flow_rule; +struct flow_offload; +enum flow_offload_tuple_dir; struct nf_flowtable_type { struct list_head list; int family; int (*init)(struct nf_flowtable *ft); + int (*setup)(struct nf_flowtable *ft, + struct net_device *dev, + enum flow_block_command cmd); + int (*action)(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; +enum nf_flowtable_flags { + NF_FLOWTABLE_HW_OFFLOAD = 0x1, +}; + struct nf_flowtable { struct list_head list; struct rhashtable rhashtable; + int priority; const struct nf_flowtable_type *type; struct delayed_work gc_work; + unsigned int flags; + struct flow_block flow_block; + possible_net_t net; }; +static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) +{ + return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD; +} + enum flow_offload_tuple_dir { FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, @@ -63,21 +88,37 @@ struct flow_offload_tuple_rhash { struct flow_offload_tuple tuple; }; -#define FLOW_OFFLOAD_SNAT 0x1 -#define FLOW_OFFLOAD_DNAT 0x2 -#define FLOW_OFFLOAD_DYING 0x4 -#define FLOW_OFFLOAD_TEARDOWN 0x8 +enum nf_flow_flags { + NF_FLOW_SNAT, + NF_FLOW_DNAT, + NF_FLOW_TEARDOWN, + NF_FLOW_HW, + NF_FLOW_HW_DYING, + NF_FLOW_HW_DEAD, + NF_FLOW_HW_REFRESH, +}; + +enum flow_offload_type { + NF_FLOW_OFFLOAD_UNSPEC = 0, + NF_FLOW_OFFLOAD_ROUTE, +}; struct flow_offload { struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; - u32 flags; - union { - /* Your private driver data here. */ - u32 timeout; - }; + struct nf_conn *ct; + unsigned long flags; + u16 type; + u32 timeout; + struct rcu_head rcu_head; }; #define NF_FLOW_TIMEOUT (30 * HZ) +#define nf_flowtable_time_stamp (u32)jiffies + +static inline __s32 nf_flow_timeout_delta(unsigned int timeout) +{ + return (__s32)(timeout - nf_flowtable_time_stamp); +} struct nf_flow_route { struct { @@ -85,10 +126,12 @@ struct nf_flow_route { } tuple[FLOW_OFFLOAD_DIR_MAX]; }; -struct flow_offload *flow_offload_alloc(struct nf_conn *ct, - struct nf_flow_route *route); +struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); +int flow_offload_route_init(struct flow_offload *flow, + const struct nf_flow_route *route); + int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); @@ -98,10 +141,6 @@ int nf_flow_table_init(struct nf_flowtable *flow_table); void nf_flow_table_free(struct nf_flowtable *flow_table); void flow_offload_teardown(struct flow_offload *flow); -static inline void flow_offload_dead(struct flow_offload *flow) -{ - flow->flags |= FLOW_OFFLOAD_DYING; -} int nf_flow_snat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, @@ -122,4 +161,25 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) -#endif /* _FLOW_OFFLOAD_H */ +void nf_flow_offload_add(struct nf_flowtable *flowtable, + struct flow_offload *flow); +void nf_flow_offload_del(struct nf_flowtable *flowtable, + struct flow_offload *flow); +void nf_flow_offload_stats(struct nf_flowtable *flowtable, + struct flow_offload *flow); + +void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); +int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); + +int nf_flow_table_offload_init(void); +void nf_flow_table_offload_exit(void); + +#endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 423cda2c6542..0d412dd63707 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,9 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_H #define _NF_NAT_H + +#include <linux/list.h> #include <linux/netfilter_ipv4.h> -#include <linux/netfilter/nf_nat.h> +#include <linux/netfilter/nf_conntrack_pptp.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_tuple.h> +#include <uapi/linux/netfilter/nf_nat.h> enum nf_nat_manip_type { NF_NAT_MANIP_SRC, @@ -14,20 +19,14 @@ enum nf_nat_manip_type { #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \ (hooknum) != NF_INET_LOCAL_IN) -#include <linux/list.h> -#include <linux/netfilter/nf_conntrack_pptp.h> -#include <net/netfilter/nf_conntrack_extend.h> - /* per conntrack: nat application helper private data */ union nf_conntrack_nat_help { /* insert nat helper private data here */ -#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE) +#if IS_ENABLED(CONFIG_NF_NAT_PPTP) struct nf_nat_pptp nat_pptp_info; #endif }; -struct nf_conn; - /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { union nf_conntrack_nat_help help; @@ -48,7 +47,7 @@ struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { -#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) +#if IS_ENABLED(CONFIG_NF_NAT) return nf_ct_ext_find(ct, NF_CT_EXT_NAT); #else return NULL; diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 97d7033e93a4..efae84646353 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -3,9 +3,9 @@ #define _NF_NAT_HELPER_H /* NAT protocol helper routines. */ +#include <linux/skbuff.h> #include <net/netfilter/nf_conntrack.h> - -struct sk_buff; +#include <net/netfilter/nf_conntrack_expect.h> /* These return true or false. */ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h index 54a14d643c34..be7abc9d5f22 100644 --- a/include/net/netfilter/nf_nat_masquerade.h +++ b/include/net/netfilter/nf_nat_masquerade.h @@ -2,6 +2,7 @@ #ifndef _NF_NAT_MASQUERADE_H_ #define _NF_NAT_MASQUERADE_H_ +#include <linux/skbuff.h> #include <net/netfilter/nf_nat.h> unsigned int diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h index c129aacc8ae8..2418653a66db 100644 --- a/include/net/netfilter/nf_nat_redirect.h +++ b/include/net/netfilter/nf_nat_redirect.h @@ -2,6 +2,9 @@ #ifndef _NF_NAT_REDIRECT_H_ #define _NF_NAT_REDIRECT_H_ +#include <linux/skbuff.h> +#include <uapi/linux/netfilter/nf_nat.h> + unsigned int nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_ipv4_multi_range_compat *mr, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 3cb6dcf53a4e..47088083667b 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -5,6 +5,8 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/skbuff.h> /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { @@ -121,4 +123,5 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int index, unsigned int verdict); + #endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h index 221f877f29d1..9051c3a0c8e7 100644 --- a/include/net/netfilter/nf_reject.h +++ b/include/net/netfilter/nf_reject.h @@ -2,6 +2,9 @@ #ifndef _NF_REJECT_H #define _NF_REJECT_H +#include <linux/types.h> +#include <uapi/linux/in.h> + static inline bool nf_reject_verify_csum(__u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h index 87d73fb5279d..a336f9434e73 100644 --- a/include/net/netfilter/nf_synproxy.h +++ b/include/net/netfilter/nf_synproxy.h @@ -11,6 +11,44 @@ #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> +struct synproxy_stats { + unsigned int syn_received; + unsigned int cookie_invalid; + unsigned int cookie_valid; + unsigned int cookie_retrans; + unsigned int conn_reopened; +}; + +struct synproxy_net { + struct nf_conn *tmpl; + struct synproxy_stats __percpu *stats; + unsigned int hook_ref4; + unsigned int hook_ref6; +}; + +extern unsigned int synproxy_net_id; +static inline struct synproxy_net *synproxy_pernet(struct net *net) +{ + return net_generic(net, synproxy_net_id); +} + +struct synproxy_options { + u8 options; + u8 wscale; + u16 mss_option; + u16 mss_encode; + u32 tsval; + u32 tsecr; +}; + +struct nf_synproxy_info; +bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, + const struct tcphdr *th, + struct synproxy_options *opts); + +void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, + struct synproxy_options *opts); + void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts); @@ -20,6 +58,8 @@ bool synproxy_recv_client_ack(struct net *net, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq); +struct nf_hook_state; + unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs); int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 475d6f28ca67..4170c033d461 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -2,6 +2,7 @@ #ifndef _NET_NF_TABLES_H #define _NET_NF_TABLES_H +#include <asm/unaligned.h> #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> @@ -100,33 +101,43 @@ struct nft_regs { }; }; -/* Store/load an u16 or u8 integer to/from the u32 data register. +/* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit * level. So for store instruction, pad the rest part with zero to avoid * garbage values. */ -static inline void nft_reg_store16(u32 *dreg, u16 val) +static inline void nft_reg_store8(u32 *dreg, u8 val) { *dreg = 0; - *(u16 *)dreg = val; + *(u8 *)dreg = val; } -static inline void nft_reg_store8(u32 *dreg, u8 val) +static inline u8 nft_reg_load8(const u32 *sreg) +{ + return *(u8 *)sreg; +} + +static inline void nft_reg_store16(u32 *dreg, u16 val) { *dreg = 0; - *(u8 *)dreg = val; + *(u16 *)dreg = val; } -static inline u16 nft_reg_load16(u32 *sreg) +static inline u16 nft_reg_load16(const u32 *sreg) { return *(u16 *)sreg; } -static inline u8 nft_reg_load8(u32 *sreg) +static inline void nft_reg_store64(u32 *dreg, u64 val) { - return *(u8 *)sreg; + put_unaligned(val, (u64 *)dreg); +} + +static inline u64 nft_reg_load64(const u32 *sreg) +{ + return get_unaligned((u64 *)sreg); } static inline void nft_data_copy(u32 *dst, const struct nft_data *src, @@ -220,6 +231,7 @@ struct nft_userdata { * struct nft_set_elem - generic representation of set elements * * @key: element key + * @key_end: closing element key * @priv: element private data and extensions */ struct nft_set_elem { @@ -227,6 +239,10 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } key; + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } key_end; void *priv; }; @@ -248,11 +264,15 @@ struct nft_set_iter { * @klen: key length * @dlen: data length * @size: number of set elements + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element */ struct nft_set_desc { unsigned int klen; unsigned int dlen; unsigned int size; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; }; /** @@ -289,17 +309,23 @@ struct nft_expr; * struct nft_set_ops - nf_tables set operations * * @lookup: look up an element within the set + * @update: update an element if exists, add it if doesn't exist + * @delete: delete an element * @insert: insert new element into set * @activate: activate new element in the next generation * @deactivate: lookup for element and deactivate it in the next generation * @flush: deactivate element in the next generation * @remove: remove element from set - * @walk: iterate over all set elemeennts + * @walk: iterate over all set elements * @get: get set elements * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance * @elemsize: element private size + * + * Operations lookup, update and delete have simpler interfaces, are faster + * and currently only used in the packet path. All the rest are slower, + * control plane functions. */ struct nft_set_ops { bool (*lookup)(const struct net *net, @@ -314,6 +340,8 @@ struct nft_set_ops { const struct nft_expr *expr, struct nft_regs *regs, const struct nft_set_ext **ext); + bool (*delete)(const struct nft_set *set, + const u32 *key); int (*insert)(const struct net *net, const struct nft_set *set, @@ -385,6 +413,8 @@ void nft_unregister_set(struct nft_set_type *type); * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal @@ -411,6 +441,8 @@ struct nft_set { u32 dtype; u32 objtype; u32 size; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; u32 use; atomic_t nelems; u32 ndeact; @@ -483,6 +515,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); * enum nft_set_extensions - set extension type IDs * * @NFT_SET_EXT_KEY: element key + * @NFT_SET_EXT_KEY_END: upper bound element key, for ranges * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags * @NFT_SET_EXT_TIMEOUT: element timeout @@ -494,6 +527,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); */ enum nft_set_extensions { NFT_SET_EXT_KEY, + NFT_SET_EXT_KEY_END, NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, NFT_SET_EXT_TIMEOUT, @@ -587,6 +621,11 @@ static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_KEY); } +static inline struct nft_data *nft_set_ext_key_end(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_KEY_END); +} + static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_DATA); @@ -636,7 +675,7 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *data, + const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); @@ -801,7 +840,8 @@ struct nft_expr_ops { */ struct nft_expr { const struct nft_expr_ops *ops; - unsigned char data[]; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) @@ -870,6 +910,8 @@ enum nft_chain_flags { NFT_CHAIN_HW_OFFLOAD = 0x2, }; +#define NFT_CHAIN_POLICY_UNSET U8_MAX + /** * struct nft_chain - nf_tables chain * @@ -942,25 +984,31 @@ struct nft_stats { struct u64_stats_sync syncp; }; +struct nft_hook { + struct list_head list; + struct nf_hook_ops ops; + struct rcu_head rcu; +}; + /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family) * @type: chain type * @policy: default policy * @stats: per-cpu chain stats * @chain: the chain - * @dev_name: device name that this base chain is attached to (if any) * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { struct nf_hook_ops ops; + struct list_head hook_list; const struct nft_chain_type *type; u8 policy; u8 flags; struct nft_stats __percpu *stats; struct nft_chain chain; - char dev_name[IFNAMSIZ]; struct flow_block flow_block; }; @@ -1102,6 +1150,7 @@ struct nft_object_type { * @init: initialize object from netlink attributes * @destroy: release existing stateful object * @dump: netlink dump stateful object + * @update: update stateful object */ struct nft_object_ops { void (*eval)(struct nft_object *obj, @@ -1116,13 +1165,15 @@ struct nft_object_ops { int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); + void (*update)(struct nft_object *obj, + struct nft_object *newobj); const struct nft_object_type *type; }; int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); -#define NFT_FLOWTABLE_DEVICE_MAX 8 +#define NFT_NETDEVICE_MAX 256 /** * struct nft_flowtable - nf_tables flow table @@ -1131,7 +1182,6 @@ void nft_unregister_obj(struct nft_object_type *obj_type); * @table: the table the flow table is contained in * @name: name of this flow table * @hooknum: hook number - * @priority: hook priority * @ops_len: number of hooks in array * @genmask: generation mask * @use: number of references to this flow table @@ -1145,13 +1195,12 @@ struct nft_flowtable { struct nft_table *table; char *name; int hooknum; - int priority; int ops_len; u32 genmask:2, use:30; u64 handle; /* runtime data below here */ - struct nf_hook_ops *ops ____cacheline_aligned; + struct list_head hook_list ____cacheline_aligned; struct nf_flowtable data; }; @@ -1159,6 +1208,10 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, const struct nlattr *nla, u8 genmask); +void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, + struct nft_flowtable *flowtable, + enum nft_trans_phase phase); + void nft_register_flowtable_type(struct nf_flowtable_type *type); void nft_unregister_flowtable_type(struct nf_flowtable_type *type); @@ -1206,6 +1259,8 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type)) +#if IS_ENABLED(CONFIG_NF_TABLES) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@ -1279,6 +1334,8 @@ static inline void nft_set_elem_change_active(const struct net *net, ext->genmask ^= nft_genmask_next(net); } +#endif /* IS_ENABLED(CONFIG_NF_TABLES) */ + /* * We use a free bit in the genmask field to indicate the element * is busy, meaning it is currently being processed either by @@ -1398,10 +1455,16 @@ struct nft_trans_elem { struct nft_trans_obj { struct nft_object *obj; + struct nft_object *newobj; + bool update; }; #define nft_trans_obj(trans) \ (((struct nft_trans_obj *)trans->data)->obj) +#define nft_trans_obj_newobj(trans) \ + (((struct nft_trans_obj *)trans->data)->newobj) +#define nft_trans_obj_update(trans) \ + (((struct nft_trans_obj *)trans->data)->update) struct nft_trans_flowtable { struct nft_flowtable *flowtable; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 7281895fa6d9..29e7e1021267 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -41,7 +41,7 @@ struct nft_immediate_expr { */ static inline u32 nft_cmp_fast_mask(unsigned int len) { - return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr, + return cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr, data) * BITS_PER_BYTE - len)); } @@ -74,6 +74,7 @@ extern struct nft_set_type nft_set_hash_type; extern struct nft_set_type nft_set_hash_fast_type; extern struct nft_set_type nft_set_rbtree_type; extern struct nft_set_type nft_set_bitmap_type; +extern struct nft_set_type nft_set_pipapo_type; struct nft_expr; struct nft_regs; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index dabe6fdb553a..d0f1c537b017 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -4,6 +4,7 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/ipv6.h> +#include <net/netfilter/nf_tables.h> static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index c8b9dec376f5..ea7d1d78b92d 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -9,6 +9,7 @@ struct nft_offload_reg { u32 len; u32 base_offset; u32 offset; + struct nft_data data; struct nft_data mask; }; @@ -25,6 +26,7 @@ struct nft_offload_ctx { u8 protonum; } dep; unsigned int num_actions; + struct net *net; struct nft_offload_reg regs[NFT_REG32_15 + 1]; }; @@ -43,6 +45,7 @@ struct nft_flow_key { struct flow_dissector_key_ip ip; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_eth_addrs eth_addrs; + struct flow_dissector_key_meta meta; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct nft_flow_match { @@ -60,7 +63,7 @@ struct nft_flow_rule { #define NFT_OFFLOAD_F_ACTION (1 << 0) struct nft_rule; -struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule); +struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); int nft_flow_rule_offload_commit(struct net *net); @@ -75,4 +78,7 @@ int nft_flow_rule_offload_commit(struct net *net); int nft_chain_offload_priority(struct nft_base_chain *basechain); +int nft_offload_init(void); +void nft_offload_exit(void); + #endif diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index e4c4d8eaca8c..628b6fa579cd 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -2,6 +2,8 @@ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ +#include <net/netfilter/nf_tables.h> + struct nft_fib { enum nft_registers dreg:8; u8 result; diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 5c69e9b09388..07e2fd507963 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -2,6 +2,8 @@ #ifndef _NFT_META_H_ #define _NFT_META_H_ +#include <net/netfilter/nf_tables.h> + struct nft_meta { enum nft_meta_keys key:8; union { diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index de80c50761f0..56b123a42220 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -2,6 +2,11 @@ #ifndef _NFT_REJECT_H_ #define _NFT_REJECT_H_ +#include <linux/types.h> +#include <net/netlink.h> +#include <net/netfilter/nf_tables.h> +#include <uapi/linux/netfilter/nf_tables.h> + struct nft_reject { enum nft_reject_types type:8; u8 icmp_code; diff --git a/include/net/netlink.h b/include/net/netlink.h index b140c8f1be22..56c365dc6dc7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1735,7 +1735,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) } /** - * nla_validate_nested - Validate a stream of nested attributes + * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy @@ -1758,9 +1758,9 @@ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, } static inline int -nl80211_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +nla_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { return __nla_validate_nested(start, maxtype, policy, NL_VALIDATE_STRICT, extack); diff --git a/include/net/netns/can.h b/include/net/netns/can.h index ca9bd9fba5b5..b6ab7d1530d7 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -9,8 +9,8 @@ #include <linux/spinlock.h> struct can_dev_rcv_lists; -struct s_stats; -struct s_pstats; +struct can_pkg_stats; +struct can_rcv_lists_stats; struct netns_can { #if IS_ENABLED(CONFIG_PROC_FS) @@ -28,11 +28,11 @@ struct netns_can { #endif /* receive filters subscribed for 'all' CAN devices */ - struct can_dev_rcv_lists *can_rx_alldev_list; - spinlock_t can_rcvlists_lock; - struct timer_list can_stattimer;/* timer for statistics update */ - struct s_stats *can_stats; /* packet statistics */ - struct s_pstats *can_pstats; /* receive list statistics */ + struct can_dev_rcv_lists *rx_alldev_list; + spinlock_t rcvlists_lock; + struct timer_list stattimer; /* timer for statistics update */ + struct can_pkg_stats *pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats; /* CAN GW per-net gateway jobs */ struct hlist_head cgw_list; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bc24a8ec1ce5..08b98414d94e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -116,6 +116,7 @@ struct netns_ipv4 { int sysctl_tcp_l3mdev_accept; #endif int sysctl_tcp_mtu_probing; + int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; @@ -153,6 +154,7 @@ struct netns_ipv4 { int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; + int sysctl_tcp_no_ssthresh_metrics_save; int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 022a0fd1a5a4..5ec054473d81 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -83,6 +83,9 @@ struct netns_ipv6 { #ifdef CONFIG_IPV6_MULTIPLE_TABLES unsigned int fib6_rules_require_fldissect; bool fib6_has_custom_rules; +#ifdef CONFIG_IPV6_SUBTREES + unsigned int fib6_routes_require_src; +#endif struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 830bdf345b17..b5fdb108d602 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -24,6 +24,9 @@ struct netns_mib { #ifdef CONFIG_XFRM_STATISTICS DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); #endif +#if IS_ENABLED(CONFIG_TLS) + DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics); +#endif }; #endif diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 286fd960896f..a1a8d45adb42 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,6 +7,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; + struct list_head module_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 0db7fb3e4e15..d8d02e4188d1 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -89,6 +89,12 @@ struct netns_sctp { */ int pf_retrans; + /* Primary.Switchover.Max.Retrans sysctl value + * taken from: + * https://tools.ietf.org/html/rfc7829 + */ + int ps_retrans; + /* * Disable Potentially-Failed feature, the feature is enabled by default * pf_enable - 0 : disable pf @@ -97,6 +103,14 @@ struct netns_sctp { int pf_enable; /* + * Disable Potentially-Failed state exposure, ignored by default + * pf_expose - 0 : compatible with old applications (by default) + * - 1 : disable pf state exposure + * - 2 : enable pf state exposure + */ + int pf_expose; + + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes @@ -128,6 +142,9 @@ struct netns_sctp { /* Flag to indicate if stream interleave is enabled */ int intl_enable; + /* Flag to indicate if ecn is enabled */ + int ecn_enable; + /* * Policy to control SCTP IPv4 address scoping * 0 - Disable IPv4 address scoping diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index cfc9441ef074..dec7522b6ce1 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -26,7 +26,7 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_lock(); css = task_css(p, net_prio_cgrp_id); - idx = css->cgroup->id; + idx = css->id; rcu_read_unlock(); return idx; } diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 25f1f9a8419b..331ebbc94fe7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -141,12 +141,6 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh) nh_grp = rcu_dereference_rtnl(nh->nh_grp); rc = nh_grp->num_nh; - } else { - const struct nh_info *nhi; - - nhi = rcu_dereference_rtnl(nh->nh_info); - if (nhi->reject_nh) - rc = 0; } return rc; @@ -167,7 +161,8 @@ struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) } static inline -int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) +int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, + u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; @@ -178,7 +173,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) return -EMSGSIZE; } diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 2cbcdbdec254..cfbed00ba7ee 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -34,8 +34,18 @@ #include <linux/ptr_ring.h> #include <linux/dma-direction.h> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ -#define PP_FLAG_ALL PP_FLAG_DMA_MAP +#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA + * map/unmap + */ +#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets + * from page_pool will be + * DMA-synced-for-device according to + * the length provided by the device + * driver. + * Please note DMA-sync-for-CPU is still + * device driver responsibility + */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) /* * Fast allocation side cache array/stack @@ -65,12 +75,19 @@ struct page_pool_params { int nid; /* Numa node id to allocate from pages from */ struct device *dev; /* device, for DMA pre-mapping purposes */ enum dma_data_direction dma_dir; /* DMA mapping direction */ + unsigned int max_len; /* max DMA sync memory size */ + unsigned int offset; /* DMA addr offset */ }; struct page_pool { struct page_pool_params p; - u32 pages_state_hold_cnt; + struct delayed_work release_dw; + void (*disconnect)(void *); + unsigned long defer_start; + unsigned long defer_warn; + + u32 pages_state_hold_cnt; /* * Data structure for allocation side @@ -107,6 +124,8 @@ struct page_pool { * refcnt serves purpose is to simplify drivers error handling. */ refcount_t user_cnt; + + u64 destroy_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -129,29 +148,23 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) struct page_pool *page_pool_create(const struct page_pool_params *params); -void __page_pool_free(struct page_pool *pool); -static inline void page_pool_free(struct page_pool *pool) -{ - /* When page_pool isn't compiled-in, net/core/xdp.c doesn't - * allow registering MEM_TYPE_PAGE_POOL, but shield linker. - */ #ifdef CONFIG_PAGE_POOL - __page_pool_free(pool); -#endif -} - -/* Drivers use this instead of page_pool_free */ +void page_pool_destroy(struct page_pool *pool); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +#else static inline void page_pool_destroy(struct page_pool *pool) { - if (!pool) - return; +} - page_pool_free(pool); +static inline void page_pool_use_xdp_mem(struct page_pool *pool, + void (*disconnect)(void *)) +{ } +#endif /* Never call this directly, use helpers below */ -void __page_pool_put_page(struct page_pool *pool, - struct page *page, bool allow_direct); +void __page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct); static inline void page_pool_put_page(struct page_pool *pool, struct page *page, bool allow_direct) @@ -160,32 +173,14 @@ static inline void page_pool_put_page(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - __page_pool_put_page(pool, page, allow_direct); + __page_pool_put_page(pool, page, -1, allow_direct); #endif } /* Very limited use-cases allow recycle direct */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) { - __page_pool_put_page(pool, page, true); -} - -/* API user MUST have disconnected alloc-side (not allowed to call - * page_pool_alloc_pages()) before calling this. The free-side can - * still run concurrently, to handle in-flight packet-pages. - * - * A request to shutdown can fail (with false) if there are still - * in-flight packet-pages. - */ -bool __page_pool_request_shutdown(struct page_pool *pool); -static inline bool page_pool_request_shutdown(struct page_pool *pool) -{ - bool safe_to_remove = false; - -#ifdef CONFIG_PAGE_POOL - safe_to_remove = __page_pool_request_shutdown(pool); -#endif - return safe_to_remove; + __page_pool_put_page(pool, page, -1, true); } /* Disconnects a page (from a page_pool). API users can have a need @@ -216,14 +211,16 @@ static inline bool is_page_pool_compiled_in(void) #endif } -static inline void page_pool_get(struct page_pool *pool) -{ - refcount_inc(&pool->user_cnt); -} - static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); } +/* Caller must provide appropriate safe context, e.g. NAPI. */ +void page_pool_update_nid(struct page_pool *pool, int new_nid); +static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) +{ + if (unlikely(pool->p.nid != new_nid)) + page_pool_update_nid(pool, new_nid); +} #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pie.h b/include/net/pie.h new file mode 100644 index 000000000000..fd5a37cb7993 --- /dev/null +++ b/include/net/pie.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __NET_SCHED_PIE_H +#define __NET_SCHED_PIE_H + +#include <linux/ktime.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/inet_ecn.h> +#include <net/pkt_sched.h> + +#define MAX_PROB U64_MAX +#define DTIME_INVALID U64_MAX +#define QUEUE_THRESHOLD 16384 +#define DQCOUNT_INVALID -1 +#define PIE_SCALE 8 + +/** + * struct pie_params - contains pie parameters + * @target: target delay in pschedtime + * @tudpate: interval at which drop probability is calculated + * @limit: total number of packets that can be in the queue + * @alpha: parameter to control drop probability + * @beta: parameter to control drop probability + * @ecn: is ECN marking of packets enabled + * @bytemode: is drop probability scaled based on pkt size + * @dq_rate_estimator: is Little's law used for qdelay calculation + */ +struct pie_params { + psched_time_t target; + u32 tupdate; + u32 limit; + u32 alpha; + u32 beta; + u8 ecn; + u8 bytemode; + u8 dq_rate_estimator; +}; + +/** + * struct pie_vars - contains pie variables + * @qdelay: current queue delay + * @qdelay_old: queue delay in previous qdelay calculation + * @burst_time: burst time allowance + * @dq_tstamp: timestamp at which dq rate was last calculated + * @prob: drop probability + * @accu_prob: accumulated drop probability + * @dq_count: number of bytes dequeued in a measurement cycle + * @avg_dq_rate: calculated average dq rate + * @qlen_old: queue length during previous qdelay calculation + * @accu_prob_overflows: number of times accu_prob overflows + */ +struct pie_vars { + psched_time_t qdelay; + psched_time_t qdelay_old; + psched_time_t burst_time; + psched_time_t dq_tstamp; + u64 prob; + u64 accu_prob; + u64 dq_count; + u32 avg_dq_rate; + u32 qlen_old; + u8 accu_prob_overflows; +}; + +/** + * struct pie_stats - contains pie stats + * @packets_in: total number of packets enqueued + * @dropped: packets dropped due to pie action + * @overlimit: packets dropped due to lack of space in queue + * @ecn_mark: packets marked with ECN + * @maxq: maximum queue size + */ +struct pie_stats { + u32 packets_in; + u32 dropped; + u32 overlimit; + u32 ecn_mark; + u32 maxq; +}; + +/** + * struct pie_skb_cb - contains private skb vars + * @enqueue_time: timestamp when the packet is enqueued + * @mem_usage: size of the skb during enqueue + */ +struct pie_skb_cb { + psched_time_t enqueue_time; + u32 mem_usage; +}; + +static inline void pie_params_init(struct pie_params *params) +{ + params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */ + params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */ + params->limit = 1000; + params->alpha = 2; + params->beta = 20; + params->ecn = false; + params->bytemode = false; + params->dq_rate_estimator = false; +} + +static inline void pie_vars_init(struct pie_vars *vars) +{ + vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC); /* 150 ms */ + vars->dq_tstamp = DTIME_INVALID; + vars->accu_prob = 0; + vars->dq_count = DQCOUNT_INVALID; + vars->avg_dq_rate = 0; + vars->accu_prob_overflows = 0; +} + +static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb)); + return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static inline psched_time_t pie_get_enqueue_time(const struct sk_buff *skb) +{ + return get_pie_cb(skb)->enqueue_time; +} + +static inline void pie_set_enqueue_time(struct sk_buff *skb) +{ + get_pie_cb(skb)->enqueue_time = psched_get_time(); +} + +bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, + struct pie_vars *vars, u32 qlen, u32 packet_size); + +void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, + struct pie_vars *vars, u32 qlen); + +void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, + u32 qlen); + +#endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 98be18ef1ed3..a972244ab193 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb, { } -static inline -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - -static inline -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@ -176,31 +141,38 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } -static inline unsigned long -cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) +static inline void +__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { - unsigned long old_cl; + unsigned long cl; - sch_tree_lock(q); - old_cl = __cls_set_class(clp, cl); - sch_tree_unlock(q); - return old_cl; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = __cls_set_class(&r->class, cl); + if (cl) + q->ops->cl_ops->unbind_tcf(q, cl); } static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl; /* Check q as it is not set for shared blocks. In that case, * setting class is not supported. */ if (!q) return; - cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); - cl = cls_set_class(q, &r->class, cl); - if (cl) + sch_tree_lock(q); + __tcf_bind_filter(q, r, base); + sch_tree_unlock(q); +} + +static inline void +__tcf_unbind_filter(struct Qdisc *q, struct tcf_result *r) +{ + unsigned long cl; + + if ((cl = __cls_set_class(&r->class, 0)) != 0) q->ops->cl_ops->unbind_tcf(q, cl); } @@ -208,12 +180,10 @@ static inline void tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl; if (!q) return; - if ((cl = __cls_set_class(&r->class, 0)) != 0) - q->ops->cl_ops->unbind_tcf(q, cl); + __tcf_unbind_filter(q, r); } struct tcf_exts { @@ -539,9 +509,26 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, bool rtnl_held); +void tc_cleanup_flow_action(struct flow_action *flow_action); + int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop); + void *type_data, bool err_stop, bool rtnl_held); +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held); +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); struct tc_cls_u32_knode { @@ -809,9 +796,8 @@ enum tc_prio_command { struct tc_prio_qopt_offload_params { int bands; u8 priomap[TC_PRIO_MAX + 1]; - /* In case that a prio qdisc is offloaded and now is changed to a - * non-offloadedable config, it needs to update the backlog & qlen - * values to negate the HW backlog & qlen values (and only them). + /* At the point of un-offloading the Qdisc, the reported backlog and + * qlen need to be reduced by the portion that is in HW. */ struct gnet_stats_queue *qstats; }; @@ -842,4 +828,57 @@ struct tc_root_qopt_offload { bool ingress; }; +enum tc_ets_command { + TC_ETS_REPLACE, + TC_ETS_DESTROY, + TC_ETS_STATS, + TC_ETS_GRAFT, +}; + +struct tc_ets_qopt_offload_replace_params { + unsigned int bands; + u8 priomap[TC_PRIO_MAX + 1]; + unsigned int quanta[TCQ_ETS_MAX_BANDS]; /* 0 for strict bands. */ + unsigned int weights[TCQ_ETS_MAX_BANDS]; + struct gnet_stats_queue *qstats; +}; + +struct tc_ets_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + +struct tc_ets_qopt_offload { + enum tc_ets_command command; + u32 handle; + u32 parent; + union { + struct tc_ets_qopt_offload_replace_params replace_params; + struct tc_qopt_offload_stats stats; + struct tc_ets_qopt_offload_graft_params graft_params; + }; +}; + +enum tc_tbf_command { + TC_TBF_REPLACE, + TC_TBF_DESTROY, + TC_TBF_STATS, +}; + +struct tc_tbf_qopt_offload_replace_params { + struct psched_ratecfg rate; + u32 max_size; + struct gnet_stats_queue *qstats; +}; + +struct tc_tbf_qopt_offload { + enum tc_tbf_command command; + u32 handle; + u32 parent; + union { + struct tc_tbf_qopt_offload_replace_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index a16fbe9a2a67..6a70845bd9ab 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -118,7 +118,12 @@ void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { - __qdisc_run(q); + /* NOLOCK qdisc must check 'state' under the qdisc seqlock + * to avoid racing with dev_qdisc_reset() + */ + if (!(q->flags & TCQ_F_NOLOCK) || + likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) + __qdisc_run(q); qdisc_run_end(q); } } @@ -161,4 +166,27 @@ struct tc_etf_qopt_offload { s32 queue; }; +struct tc_taprio_sched_entry { + u8 command; /* TC_TAPRIO_CMD_* */ + + /* The gate_mask in the offloading side refers to traffic classes */ + u32 gate_mask; + u32 interval; +}; + +struct tc_taprio_qopt_offload { + u8 enable; + ktime_t base_time; + u64 cycle_time; + u64 cycle_time_extension; + + size_t num_entries; + struct tc_taprio_sched_entry entries[0]; +}; + +/* Reference counting */ +struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload + *offload); +void taprio_offload_free(struct tc_taprio_qopt_offload *offload); + #endif diff --git a/include/net/psample.h b/include/net/psample.h index 37a4df2325b2..68ae16bb0a4a 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -11,9 +11,11 @@ struct psample_group { u32 group_num; u32 refcount; u32 seq; + struct rcu_head rcu; }; struct psample_group *psample_group_get(struct net *net, u32 group_num); +void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); #if IS_ENABLED(CONFIG_PSAMPLE) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index fd178d58fa84..cf8b33213bbc 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -185,7 +185,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { - return queue->rskq_accept_head == NULL; + return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, @@ -197,7 +197,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); - queue->rskq_accept_head = req->dl_next; + WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } diff --git a/include/net/route.h b/include/net/route.h index 630a0493f1f3..a9c60fc68e36 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -53,10 +53,11 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; - u8 rt_gw_family; + __u8 rt_uses_gateway; int rt_iif; + u8 rt_gw_family; /* Info on neighbour */ union { __be32 rt_gw4; @@ -184,6 +185,10 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin, struct fib_result *res); +int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin, + const struct sk_buff *hint); + static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { @@ -233,7 +238,7 @@ void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, - int *fa_index, int fa_start); + int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6b6b01234dd9..151208704ed2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -13,6 +13,9 @@ #include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/atomic.h> +#include <linux/hashtable.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> #include <net/flow_offload.h> @@ -23,9 +26,6 @@ struct tcf_walker; struct module; struct bpf_flow_keys; -typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, - enum tc_setup_type type, void *type_data); - struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; @@ -149,8 +149,8 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) - return qdisc->empty; - return !qdisc->q.qlen; + return READ_ONCE(qdisc->empty); + return !READ_ONCE(qdisc->q.qlen); } static inline bool qdisc_run_begin(struct Qdisc *qdisc) @@ -158,7 +158,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; - qdisc->empty = false; + WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; } @@ -308,12 +308,18 @@ struct tcf_proto_ops { int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *); + bool (*delete_empty)(struct tcf_proto *tp); void (*walk)(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); - void (*bind_class)(void *, u32, unsigned long); + void (*hw_add)(struct tcf_proto *tp, + void *type_data); + void (*hw_del)(struct tcf_proto *tp, + void *type_data); + void (*bind_class)(void *, u32, unsigned long, + void *, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, struct nlattr **tca, @@ -332,6 +338,10 @@ struct tcf_proto_ops { int flags; }; +/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags + * are expected to implement tcf_proto_ops->delete_empty(), otherwise race + * conditions can occur when filters are inserted/deleted simultaneously. + */ enum tcf_proto_ops_flags { TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; @@ -359,6 +369,7 @@ struct tcf_proto { bool deleting; refcount_t refcnt; struct rcu_head rcu; + struct hlist_node destroy_ht_node; }; struct qdisc_skb_cb { @@ -399,16 +410,20 @@ struct tcf_block { refcount_t refcnt; struct net *net; struct Qdisc *q; + struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - unsigned int offloadcnt; /* Number of oddloaded filters */ + atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ + unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ struct { struct tcf_chain *chain; struct list_head filter_chain_list; } chain0; struct rcu_head rcu; + DECLARE_HASHTABLE(proto_destroy_ht, 7); + struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; #ifdef CONFIG_PROVE_LOCKING @@ -439,37 +454,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) #define tcf_proto_dereference(p, tp) \ rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) -static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) -{ - if (*flags & TCA_CLS_FLAGS_IN_HW) - return; - *flags |= TCA_CLS_FLAGS_IN_HW; - block->offloadcnt++; -} - -static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) -{ - if (!(*flags & TCA_CLS_FLAGS_IN_HW)) - return; - *flags &= ~TCA_CLS_FLAGS_IN_HW; - block->offloadcnt--; -} - -static inline void -tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt, - u32 *flags, bool add) -{ - if (add) { - if (!*cnt) - tcf_block_offload_inc(block, flags); - (*cnt)++; - } else { - (*cnt)--; - if (!*cnt) - tcf_block_offload_dec(block, flags); - } -} - static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; @@ -520,6 +504,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) return q; } +static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) +{ + return rcu_dereference_bh(qdisc->dev_queue->qdisc); +} + static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) { return qdisc->dev_queue->qdisc_sleeping; @@ -1307,17 +1296,9 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); -static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) +static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { - struct gnet_stats_queue *stats = res->qstats; - int ret; - - if (res->ingress) - ret = netif_receive_skb(skb); - else - ret = dev_queue_xmit(skb); - if (ret && stats) - qstats_overlimit_inc(res->qstats); + return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); } #endif diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index caaae2de9099..d4b3b2dcd15b 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -107,5 +107,7 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, struct sctp_association *asoc, __u16 key_id); int sctp_auth_deact_key_id(struct sctp_endpoint *ep, struct sctp_association *asoc, __u16 key_id); +int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp); +void sctp_auth_free(struct sctp_endpoint *ep); #endif diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 823afc42a3aa..15b4d9aec7ff 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -286,6 +286,18 @@ enum { SCTP_MAX_GABS = 16 }; * functions simpler to write. */ +/* These are the values for pf exposure, UNUSED is to keep compatible with old + * applications by default. + */ +enum { + SCTP_PF_EXPOSE_UNSET, + SCTP_PF_EXPOSE_DISABLE, + SCTP_PF_EXPOSE_ENABLE, +}; +#define SCTP_PF_EXPOSE_MAX SCTP_PF_EXPOSE_ENABLE + +#define SCTP_PS_RETRANS_MAX 0xffff + /* These return values describe the success or failure of a number of * routines which form the lower interface to SCTP_outqueue. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 5d60f13d2347..3ab5c6bbb90b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -610,4 +610,9 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize); } +static inline bool sctp_newsk_ready(const struct sock *sk) +{ + return sock_flag(sk, SOCK_DEAD) || sk->sk_socket; +} + #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ba5c4f6eede5..314a2fa21d6b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -184,7 +184,8 @@ struct sctp_sock { __u32 flowlabel; __u8 dscp; - int pf_retrans; + __u16 pf_retrans; + __u16 ps_retrans; /* The initial Path MTU to use for new associations. */ __u32 pathmtu; @@ -215,6 +216,7 @@ struct sctp_sock { __u32 adaptation_ind; __u32 pd_point; __u16 nodelay:1, + pf_expose:2, reuse:1, disable_fragments:1, v4mapped:1, @@ -896,7 +898,9 @@ struct sctp_transport { * and will be initialized from the assocs value. This can be changed * using the SCTP_PEER_ADDR_THLDS socket option */ - int pf_retrans; + __u16 pf_retrans; + /* Used for primary path switchover. */ + __u16 ps_retrans; /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1239,6 +1243,9 @@ struct sctp_ep_common { /* What socket does this endpoint belong to? */ struct sock *sk; + /* Cache netns and it won't change once set */ + struct net *net; + /* This is where we receive inbound chunks. */ struct sctp_inq inqueue; @@ -1322,9 +1329,11 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; - __u8 auth_enable:1, + __u8 ecn_enable:1, + auth_enable:1, intl_enable:1, prsctp_enable:1, + asconf_enable:1, reconf_enable:1; __u8 strreset_enable; @@ -1770,7 +1779,9 @@ struct sctp_association { * and will be initialized from the assocs value. This can be * changed using the SCTP_PEER_ADDR_THLDS socket option */ - int pf_retrans; + __u16 pf_retrans; + /* Used for primary path switchover. */ + __u16 ps_retrans; /* Maximum number of times the endpoint will retransmit INIT */ __u16 max_init_attempts; @@ -2051,6 +2062,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ + pf_expose:2, /* Expose pf state? */ force_delay:1; __u8 strreset_enable; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index e1a92c4610f3..0b032b92da0b 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -80,13 +80,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( struct sctp_chunk *chunk, gfp_t gfp); -struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( - const struct sctp_association *asoc, - const struct sockaddr_storage *aaddr, - int flags, - int state, - int error, - gfp_t gfp); +void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, + int state, int error); struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, @@ -100,6 +95,13 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( __u32 error, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event( + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + __u16 flags, + __u32 error, + gfp_t gfp); + struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, __u16 flags, diff --git a/include/net/smc.h b/include/net/smc.h index bd9c0fb3b577..646feb4bc75f 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -37,6 +37,8 @@ struct smcd_dmb { #define ISM_EVENT_GID 1 #define ISM_EVENT_SWR 2 +#define ISM_ERROR 0xFFFF + struct smcd_event { u32 type; u32 code; @@ -75,6 +77,11 @@ struct smcd_dev { struct workqueue_struct *event_wq; u8 pnetid[SMC_MAX_PNETID_LEN]; bool pnetid_by_user; + struct list_head lgr_list; + spinlock_t lgr_lock; + atomic_t lgr_cnt; + wait_queue_head_t lgrs_deleted; + u8 going_away : 1; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/include/net/snmp.h b/include/net/snmp.h index cb8ced4380a6..468a67836e2f 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -111,6 +111,12 @@ struct linux_xfrm_mib { unsigned long mibs[LINUX_MIB_XFRMMAX]; }; +/* Linux TLS */ +#define LINUX_MIB_TLSMAX __LINUX_MIB_TLSMAX +struct linux_tls_mib { + unsigned long mibs[LINUX_MIB_TLSMAX]; +}; + #define DEFINE_SNMP_STAT(type, name) \ __typeof__(type) __percpu *name #define DEFINE_SNMP_STAT_ATOMIC(type, name) \ diff --git a/include/net/sock.h b/include/net/sock.h index 2c53f1a1d905..02162b0378f7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,7 +66,6 @@ #include <net/checksum.h> #include <net/tcp_states.h> #include <linux/net_tstamp.h> -#include <net/smc.h> #include <net/l3mdev.h> /* @@ -437,31 +436,15 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - unsigned int __sk_flags_offset[0]; -#ifdef __BIG_ENDIAN_BITFIELD -#define SK_FL_PROTO_SHIFT 16 -#define SK_FL_PROTO_MASK 0x00ff0000 - -#define SK_FL_TYPE_SHIFT 0 -#define SK_FL_TYPE_MASK 0x0000ffff -#else -#define SK_FL_PROTO_SHIFT 8 -#define SK_FL_PROTO_MASK 0x0000ff00 - -#define SK_FL_TYPE_SHIFT 16 -#define SK_FL_TYPE_MASK 0xffff0000 -#endif - - unsigned int sk_padding : 1, + u8 sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, - sk_userlocks : 4, - sk_protocol : 8, - sk_type : 16; -#define SK_PROTOCOL_MAX U8_MAX - u16 sk_gso_max_segs; + sk_userlocks : 4; u8 sk_pacing_shift; + u16 sk_type; + u16 sk_protocol; + u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; @@ -723,6 +706,11 @@ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_h hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } +static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list) +{ + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); +} + static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); @@ -860,17 +848,17 @@ static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) static inline void sk_acceptq_removed(struct sock *sk) { - sk->sk_ack_backlog--; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1); } static inline void sk_acceptq_added(struct sock *sk) { - sk->sk_ack_backlog++; + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); } static inline bool sk_acceptq_is_full(const struct sock *sk) { - return sk->sk_ack_backlog > sk->sk_max_ack_backlog; + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); } /* @@ -878,12 +866,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk) */ static inline int sk_stream_min_wspace(const struct sock *sk) { - return sk->sk_wmem_queued >> 1; + return READ_ONCE(sk->sk_wmem_queued) >> 1; } static inline int sk_stream_wspace(const struct sock *sk) { - return sk->sk_sndbuf - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); +} + +static inline void sk_wmem_queued_add(struct sock *sk, int val) +{ + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } void sk_stream_write_space(struct sock *sk); @@ -895,11 +888,11 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb_dst_force(skb); if (!sk->sk_backlog.tail) - sk->sk_backlog.head = skb; + WRITE_ONCE(sk->sk_backlog.head, skb); else sk->sk_backlog.tail->next = skb; - sk->sk_backlog.tail = skb; + WRITE_ONCE(sk->sk_backlog.tail, skb); skb->next = NULL; } @@ -949,8 +942,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk) { int cpu = raw_smp_processor_id(); - if (unlikely(sk->sk_incoming_cpu != cpu)) - sk->sk_incoming_cpu = cpu; + if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) + WRITE_ONCE(sk->sk_incoming_cpu, cpu); } static inline void sock_rps_record_flow_hash(__u32 hash) @@ -1207,7 +1200,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= sk->sk_sndbuf) + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? @@ -1467,10 +1460,11 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; + sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { + skb_ext_reset(skb); skb_zcopy_clear(skb, true); sk->sk_tx_skb_cache = skb; return; @@ -1484,7 +1478,7 @@ static inline void sock_release_ownership(struct sock *sk) sk->sk_lock.owned = 0; /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } } @@ -1935,8 +1929,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); static inline void sk_dst_confirm(struct sock *sk) { - if (!sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 1; + if (!READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 1); } static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) @@ -1946,10 +1940,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) unsigned long now = jiffies; /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - if (sk && sk->sk_dst_pending_confirm) - sk->sk_dst_pending_confirm = 0; + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } @@ -2014,7 +2008,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; } @@ -2220,10 +2214,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { - sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); - sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF); - } + u32 val; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + + WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); } struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, @@ -2233,12 +2231,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * sk_page_frag - return an appropriate page_frag * @sk: socket * - * If socket allocation mode allows current thread to sleep, it means its - * safe to use the per task page_frag instead of the per socket one. + * Use the per task page_frag instead of the per socket one for + * optimization when we know that we're in the normal context and owns + * everything that's associated with %current. + * + * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest + * inside other socket operations and end up recursing into sk_page_frag() + * while it's already in use. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_allow_blocking(sk->sk_allocation)) + if (gfpflags_normal_context(sk->sk_allocation)) return ¤t->task_frag; return &sk->sk_frag; @@ -2251,7 +2254,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); } static inline gfp_t gfp_any(void) @@ -2271,7 +2274,9 @@ static inline long sock_sndtimeo(const struct sock *sk, bool noblock) static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) { - return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; + int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len); + + return v ?: 1; } /* Alas, with timeout socket operations are not restartable. @@ -2290,7 +2295,7 @@ struct sock_skb_cb { * using skb->cb[] would keep using it directly and utilize its * alignement guarantee. */ -#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \ +#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ sizeof(struct sock_skb_cb))) #define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ @@ -2326,7 +2331,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk) return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif } @@ -2337,7 +2342,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif } @@ -2512,7 +2517,7 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -void sock_enable_timestamp(struct sock *sk, int flag); +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); @@ -2568,9 +2573,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) */ static inline void sk_pacing_shift_update(struct sock *sk, int val) { - if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val) return; - sk->sk_pacing_shift = val; + WRITE_ONCE(sk->sk_pacing_shift, val); } /* if a socket is bound to a device, check that the given device @@ -2592,4 +2597,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) return false; } +void sock_def_readable(struct sock *sk); + #endif /* _SOCK_H */ diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index d9112de85261..43f4a818d88f 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -21,7 +21,8 @@ struct sock_reuseport { unsigned int synq_overflow_ts; /* ID stays the same even after the size of socks[] grows. */ unsigned int reuseport_id; - bool bind_inany; + unsigned int bind_inany:1; + unsigned int has_conns:1; struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[0]; /* array of sock pointers */ }; @@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk, extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_detach_prog(struct sock *sk); +static inline bool reuseport_has_conns(struct sock *sk, bool set) +{ + struct sock_reuseport *reuse; + bool ret = false; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); + if (reuse) { + if (set) + reuse->has_conns = 1; + ret = reuse->has_conns; + } + rcu_read_unlock(); + + return ret; +} + int reuseport_get_id(struct sock_reuseport *reuse); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index c757585a05b0..1cace4c69e44 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -32,6 +32,24 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) return false; } +static inline bool is_tcf_mirred_ingress_redirect(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->id == TCA_ID_MIRRED) + return to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR; +#endif + return false; +} + +static inline bool is_tcf_mirred_ingress_mirror(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->id == TCA_ID_MIRRED) + return to_mirred(a)->tcfm_eaction == TCA_INGRESS_MIRROR; +#endif + return false; +} + static inline struct net_device *tcf_mirred_dev(const struct tc_action *a) { return rtnl_dereference(to_mirred(a)->tcfm_dev); diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h index 4bc3d9250ef0..721de4f5733a 100644 --- a/include/net/tc_act/tc_mpls.h +++ b/include/net/tc_act/tc_mpls.h @@ -27,4 +27,79 @@ struct tcf_mpls { }; #define to_mpls(a) ((struct tcf_mpls *)a) +static inline bool is_tcf_mpls(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->id == TCA_ID_MPLS) + return true; +#endif + return false; +} + +static inline u32 tcf_mpls_action(const struct tc_action *a) +{ + u32 tcfm_action; + + rcu_read_lock(); + tcfm_action = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_action; + rcu_read_unlock(); + + return tcfm_action; +} + +static inline __be16 tcf_mpls_proto(const struct tc_action *a) +{ + __be16 tcfm_proto; + + rcu_read_lock(); + tcfm_proto = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_proto; + rcu_read_unlock(); + + return tcfm_proto; +} + +static inline u32 tcf_mpls_label(const struct tc_action *a) +{ + u32 tcfm_label; + + rcu_read_lock(); + tcfm_label = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_label; + rcu_read_unlock(); + + return tcfm_label; +} + +static inline u8 tcf_mpls_tc(const struct tc_action *a) +{ + u8 tcfm_tc; + + rcu_read_lock(); + tcfm_tc = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_tc; + rcu_read_unlock(); + + return tcfm_tc; +} + +static inline u8 tcf_mpls_bos(const struct tc_action *a) +{ + u8 tcfm_bos; + + rcu_read_lock(); + tcfm_bos = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_bos; + rcu_read_unlock(); + + return tcfm_bos; +} + +static inline u8 tcf_mpls_ttl(const struct tc_action *a) +{ + u8 tcfm_ttl; + + rcu_read_lock(); + tcfm_ttl = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_ttl; + rcu_read_unlock(); + + return tcfm_ttl; +} + #endif /* __NET_TC_MPLS_H */ diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index b4fce0fae645..b5d76305e854 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -41,10 +41,4 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a) return to_sample(a)->trunc_size; } -static inline struct psample_group * -tcf_sample_psample_group(const struct tc_action *a) -{ - return rcu_dereference_rtnl(to_sample(a)->psample_group); -} - #endif /* __NET_TC_SAMPLE_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 4c04e2985508..b22a1f641f02 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -54,4 +54,31 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a) return mark; } +/* Return true iff action is ptype */ +static inline bool is_tcf_skbedit_ptype(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + u32 flags; + + if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { + rcu_read_lock(); + flags = rcu_dereference(to_skbedit(a)->params)->flags; + rcu_read_unlock(); + return flags == SKBEDIT_F_PTYPE; + } +#endif + return false; +} + +static inline u32 tcf_skbedit_ptype(const struct tc_action *a) +{ + u16 ptype; + + rcu_read_lock(); + ptype = rcu_dereference(to_skbedit(a)->params)->ptype; + rcu_read_unlock(); + + return ptype; +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 7c3f777c168c..0689d9bcdf84 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -59,4 +59,21 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) return NULL; #endif } + +static inline struct ip_tunnel_info * +tcf_tunnel_info_copy(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct ip_tunnel_info *tun = tcf_tunnel_info(a); + + if (tun) { + size_t tun_size = sizeof(*tun) + tun->options_len; + struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, + GFP_KERNEL); + + return tun_copy; + } +#endif + return NULL; +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 81e8ade1e6e4..a5ea27df3c2b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -39,6 +39,7 @@ #include <net/tcp_states.h> #include <net/inet_ecn.h> #include <net/dst.h> +#include <net/mptcp.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -64,7 +65,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -/* The least MTU to use for probing */ +/* The initial MTU to use for probing */ #define TCP_BASE_MSS 1024 /* probing interval, default to 10 minutes as per RFC4821 */ @@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP @@ -258,7 +260,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return tcp_memory_pressure; + return READ_ONCE(tcp_memory_pressure); } /* * The next routines deal with comparing 32 bit unsigned ints @@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, + int size_goal); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -415,6 +420,16 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb, const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* + * BPF SKB-less helpers + */ +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *th, u32 *cookie); +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *th, u32 *cookie); +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct tcphdr *th); +/* * TCP v4 functions exported for the inet6 API */ @@ -484,15 +499,16 @@ static inline void tcp_synq_overflow(const struct sock *sk) reuse = rcu_dereference(sk->sk_reuseport_cb); if (likely(reuse)) { last_overflow = READ_ONCE(reuse->synq_overflow_ts); - if (time_after32(now, last_overflow + HZ)) + if (!time_between32(now, last_overflow, + last_overflow + HZ)) WRITE_ONCE(reuse->synq_overflow_ts, now); return; } } - last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - if (time_after32(now, last_overflow + HZ)) - tcp_sk(sk)->rx_opt.ts_recent_stamp = now; + last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); + if (!time_between32(now, last_overflow, last_overflow + HZ)) + WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ @@ -507,13 +523,23 @@ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) reuse = rcu_dereference(sk->sk_reuseport_cb); if (likely(reuse)) { last_overflow = READ_ONCE(reuse->synq_overflow_ts); - return time_after32(now, last_overflow + - TCP_SYNCOOKIE_VALID); + return !time_between32(now, last_overflow - HZ, + last_overflow + + TCP_SYNCOOKIE_VALID); } } - last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID); + last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); + + /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, + * then we're under synflood. However, we have to use + * 'last_overflow - HZ' as lower bound. That's because a concurrent + * tcp_synq_overflow() could update .ts_recent_stamp after we read + * jiffies but before we store .ts_recent_stamp into last_overflow, + * which could lead to rejecting a valid syncookie. + */ + return !time_between32(now, last_overflow - HZ, + last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) @@ -527,7 +553,7 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -u64 cookie_init_timestamp(struct request_sock *req); +u64 cookie_init_timestamp(struct request_sock *req, u64 now); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); bool cookie_ecn_ok(const struct tcp_options_received *opt, @@ -747,10 +773,16 @@ static inline u32 tcp_time_stamp(const struct tcp_sock *tp) return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); } +/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ +static inline u32 tcp_ns_to_ts(u64 ns) +{ + return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); +} + /* Could use tcp_clock_us() / 1000, but this version uses a single divide */ static inline u32 tcp_time_stamp_raw(void) { - return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); + return tcp_ns_to_ts(tcp_clock_ns()); } void tcp_mstamp_refresh(struct tcp_sock *tp); @@ -762,7 +794,7 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ); + return tcp_ns_to_ts(skb->skb_mstamp_ns); } /* provide the departure time in us unit */ @@ -950,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) return likely(!TCP_SKB_CB(skb)->eor); } +static inline bool tcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(tcp_skb_can_collapse_to(to) && + mptcp_skb_can_collapse(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -980,6 +1019,7 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) union tcp_cc_info; @@ -1074,6 +1114,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; +struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); #ifdef CONFIG_INET @@ -1370,13 +1411,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - + READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf); + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } extern void tcp_openreq_init_rwin(struct request_sock *req, @@ -1504,8 +1546,9 @@ struct tcp_md5sig_key { struct hlist_node node; u8 keylen; u8 family; /* AF_INET or AF_INET6 */ - union tcp_md5_addr addr; u8 prefixlen; + union tcp_md5_addr addr; + int l3index; /* set if key added with L3 scope */ u8 key[TCP_MD5SIG_MAXKEYLEN]; struct rcu_head rcu; }; @@ -1549,34 +1592,33 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, - gfp_t gfp); + int family, u8 prefixlen, int l3index, + const u8 *newkey, u8 newkeylen, gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen); + int family, u8 prefixlen, int l3index); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG #include <linux/jump_label.h> extern struct static_key_false tcp_md5_needed; -struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family); static inline struct tcp_md5sig_key * -tcp_md5_do_lookup(const struct sock *sk, - const union tcp_md5_addr *addr, - int family) +tcp_md5_do_lookup(const struct sock *sk, int l3index, + const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed)) return NULL; - return __tcp_md5_do_lookup(sk, addr, family); + return __tcp_md5_do_lookup(sk, l3index, addr, family); } #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else -static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, - const union tcp_md5_addr *addr, - int family) +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup(const struct sock *sk, int l3index, + const union tcp_md5_addr *addr, int family) { return NULL; } @@ -1738,9 +1780,18 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } +/** + * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue + * @sk: socket + * + * Since the write queue can have a temporary empty skb in it, + * we must not use "return skb_queue_empty(&sk->sk_write_queue)" + */ static inline bool tcp_write_queue_empty(const struct sock *sk) { - return skb_queue_empty(&sk->sk_write_queue); + const struct tcp_sock *tp = tcp_sk(sk); + + return tp->write_seq == tp->snd_nxt; } static inline bool tcp_rtx_queue_empty(const struct sock *sk) @@ -1906,7 +1957,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = tp->write_seq - tp->snd_nxt; + u32 notsent_bytes = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } @@ -1964,6 +2016,11 @@ struct tcp_request_sock_ops { enum tcp_synack_type synack_type); }; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; +#if IS_ENABLED(CONFIG_IPV6) +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; +#endif + #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, @@ -2109,9 +2166,16 @@ struct tcp_ulp_ops { /* initialize ulp */ int (*init)(struct sock *sk); /* update ulp */ - void (*update)(struct sock *sk, struct proto *p); + void (*update)(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)); /* cleanup ulp */ void (*release)(struct sock *sk); + /* diagnostic */ + int (*get_info)(const struct sock *sk, struct sk_buff *skb); + size_t (*get_info_size)(const struct sock *sk); + /* clone ulp */ + void (*clone)(const struct request_sock *req, struct sock *newsk, + const gfp_t priority); char name[TCP_ULP_NAME_MAX]; struct module *owner; @@ -2121,7 +2185,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); -void tcp_update_ulp(struct sock *sk, struct proto *p); +void tcp_update_ulp(struct sock *sk, struct proto *p, + void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ __MODULE_INFO(alias, alias_userspace, name); \ diff --git a/include/net/tls.h b/include/net/tls.h index 41b2d41bb1b8..bf9eb4823933 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,8 +40,11 @@ #include <linux/socket.h> #include <linux/tcp.h> #include <linux/skmsg.h> +#include <linux/mutex.h> #include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include <net/net_namespace.h> #include <net/tcp.h> #include <net/strparser.h> #include <crypto/aead.h> @@ -59,7 +62,6 @@ #define TLS_RECORD_TYPE_DATA 0x17 #define TLS_AAD_SPACE_SIZE 13 -#define TLS_DEVICE_NAME_MAX 32 #define MAX_IV_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 @@ -73,36 +75,14 @@ */ #define TLS_AES_CCM_IV_B0_BYTE 2 -/* - * This structure defines the routines for Inline TLS driver. - * The following routines are optional and filled with a - * null pointer if not defined. - * - * @name: Its the name of registered Inline tls device - * @dev_list: Inline tls device list - * int (*feature)(struct tls_device *device); - * Called to return Inline TLS driver capability - * - * int (*hash)(struct tls_device *device, struct sock *sk); - * This function sets Inline driver for listen and program - * device specific functioanlity as required - * - * void (*unhash)(struct tls_device *device, struct sock *sk); - * This function cleans listen state set by Inline TLS driver - * - * void (*release)(struct kref *kref); - * Release the registered device and allocated resources - * @kref: Number of reference to tls_device - */ -struct tls_device { - char name[TLS_DEVICE_NAME_MAX]; - struct list_head dev_list; - int (*feature)(struct tls_device *device); - int (*hash)(struct tls_device *device, struct sock *sk); - void (*unhash)(struct tls_device *device, struct sock *sk); - void (*release)(struct kref *kref); - struct kref kref; -}; +#define __TLS_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define TLS_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.tls_statistics, field) +#define __TLS_DEC_STATS(net, field) \ + __SNMP_DEC_STATS((net)->mib.tls_statistics, field) +#define TLS_DEC_STATS(net, field) \ + SNMP_DEC_STATS((net)->mib.tls_statistics, field) enum { TLS_BASE, @@ -120,7 +100,6 @@ struct tls_rec { struct list_head list; int tx_ready; int tx_flags; - int inplace_crypto; struct sk_msg msg_plaintext; struct sk_msg msg_encrypted; @@ -157,7 +136,7 @@ struct tls_sw_context_tx { struct list_head tx_list; atomic_t encrypt_pending; int async_notify; - int async_capable; + u8 async_capable:1; #define BIT_TX_SCHEDULED 0 #define BIT_TX_CLOSING 1 @@ -173,8 +152,8 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 control; - int async_capable; - bool decrypted; + u8 async_capable:1; + u8 decrypted:1; atomic_t decrypt_pending; bool async_notify; }; @@ -268,28 +247,23 @@ struct tls_context { bool in_tcp_sendpages; bool pending_open_record_frags; + + struct mutex tx_lock; /* protects partially_sent_* fields and + * per-type TX fields + */ unsigned long flags; /* cache cold stuff */ struct proto *sk_proto; void (*sk_destruct)(struct sock *sk); - void (*sk_proto_close)(struct sock *sk, long timeout); - - int (*setsockopt)(struct sock *sk, int level, - int optname, char __user *optval, - unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen); - int (*hash)(struct sock *sk); - void (*unhash)(struct sock *sk); union tls_crypto_context crypto_send; union tls_crypto_context crypto_recv; struct list_head list; refcount_t refcount; + struct rcu_head rcu; }; enum tls_offload_ctx_dir { @@ -348,7 +322,10 @@ struct tls_offload_context_rx { #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) -void tls_ctx_free(struct tls_context *ctx); +struct tls_context *tls_ctx_create(struct sock *sk); +void tls_ctx_free(struct sock *sk, struct tls_context *ctx); +void update_sk_prot(struct sock *sk, struct tls_context *ctx); + int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); @@ -359,6 +336,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_sw_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); @@ -374,13 +353,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_device_free_resources_tx(struct sock *sk); -void tls_device_init(void); -void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, @@ -401,7 +376,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); -bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); +void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { @@ -429,6 +404,23 @@ static inline bool is_tx_ready(struct tls_sw_context_tx *ctx) return READ_ONCE(rec->tx_ready); } +static inline u16 tls_user_config(struct tls_context *ctx, bool tx) +{ + u16 config = tx ? ctx->tx_conf : ctx->rx_conf; + + switch (config) { + case TLS_BASE: + return TLS_CONF_BASE; + case TLS_SW: + return TLS_CONF_SW; + case TLS_HW: + return TLS_CONF_HW; + case TLS_HW_RECORD: + return TLS_CONF_HW_RECORD; + } + return 0; +} + struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); @@ -467,7 +459,10 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - return icsk->icsk_ulp_data; + /* Use RCU on icsk_ulp_data only for sock diag code, + * TLS data path doesn't need rcu_dereference(). + */ + return (__force void *)icsk->icsk_ulp_data; } static inline void tls_advance_record_sn(struct sock *sk, @@ -615,13 +610,6 @@ tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) tls_offload_ctx_rx(tls_ctx)->resync_type = type; } -static inline void tls_offload_tx_resync_request(struct sock *sk) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - - WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); -} - /* Driver's seq tracking has to be disabled until resync succeeded */ static inline bool tls_offload_tx_resync_pending(struct sock *sk) { @@ -633,11 +621,11 @@ static inline bool tls_offload_tx_resync_pending(struct sock *sk) return ret; } +int __net_init tls_proc_init(struct net *net); +void __net_exit tls_proc_fini(struct net *net); + int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); -void tls_register_device(struct tls_device *device); -void tls_unregister_device(struct tls_device *device); -int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); @@ -650,9 +638,53 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info); +#ifdef CONFIG_TLS_DEVICE +void tls_device_init(void); +void tls_device_cleanup(void); +void tls_device_sk_destruct(struct sock *sk); +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); - void tls_device_offload_cleanup_rx(struct sock *sk); void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); +void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); +int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm); +static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) +{ + if (!sk_fullsock(sk) || + smp_load_acquire(&sk->sk_destruct) != tls_device_sk_destruct) + return false; + return tls_get_ctx(sk)->rx_conf == TLS_HW; +} +#else +static inline void tls_device_init(void) {} +static inline void tls_device_cleanup(void) {} + +static inline int +tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +{ + return -EOPNOTSUPP; +} + +static inline void tls_device_free_resources_tx(struct sock *sk) {} + +static inline int +tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) +{ + return -EOPNOTSUPP; +} + +static inline void tls_device_offload_cleanup_rx(struct sock *sk) {} +static inline void +tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} + +static inline int +tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, + struct sk_buff *skb, struct strp_msg *rxm) +{ + return 0; +} +#endif #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/tls_toe.h b/include/net/tls_toe.h new file mode 100644 index 000000000000..b3aa7593ce2c --- /dev/null +++ b/include/net/tls_toe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <linux/list.h> + +struct sock; + +#define TLS_TOE_DEVICE_NAME_MAX 32 + +/* + * This structure defines the routines for Inline TLS driver. + * The following routines are optional and filled with a + * null pointer if not defined. + * + * @name: Its the name of registered Inline tls device + * @dev_list: Inline tls device list + * int (*feature)(struct tls_toe_device *device); + * Called to return Inline TLS driver capability + * + * int (*hash)(struct tls_toe_device *device, struct sock *sk); + * This function sets Inline driver for listen and program + * device specific functioanlity as required + * + * void (*unhash)(struct tls_toe_device *device, struct sock *sk); + * This function cleans listen state set by Inline TLS driver + * + * void (*release)(struct kref *kref); + * Release the registered device and allocated resources + * @kref: Number of reference to tls_toe_device + */ +struct tls_toe_device { + char name[TLS_TOE_DEVICE_NAME_MAX]; + struct list_head dev_list; + int (*feature)(struct tls_toe_device *device); + int (*hash)(struct tls_toe_device *device, struct sock *sk); + void (*unhash)(struct tls_toe_device *device, struct sock *sk); + void (*release)(struct kref *kref); + struct kref kref; +}; + +int tls_toe_bypass(struct sock *sk); +int tls_toe_hash(struct sock *sk); +void tls_toe_unhash(struct sock *sk); + +void tls_toe_register_device(struct tls_toe_device *device); +void tls_toe_unregister_device(struct tls_toe_device *device); diff --git a/include/net/udp.h b/include/net/udp.h index 79d141d2103b..e55d5f765807 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,7 +167,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, __be16 dport); struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, - struct udphdr *uh, udp_lookup_t lookup); + struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -476,11 +476,21 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or + * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial + * packets in udp_gro_complete_segment. As does UDP GSO, verified by + * udp_send_skb. But when those packets are looped in dev_loopback_xmit + * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this + * specific case, where PARTIAL is both correct and required. + */ + if (skb->pkt_type == PACKET_LOOPBACK) + skb->ip_summed = CHECKSUM_PARTIAL; + /* the GSO CB lays after the UDP one, no need to save and restore any * CB fragment */ segs = __skb_gso_segment(skb, features, false); - if (unlikely(IS_ERR_OR_NULL(segs))) { + if (IS_ERR_OR_NULL(segs)) { int segs_nr = skb_shinfo(skb)->gso_segs; atomic_add(segs_nr, &sk->sk_drops); diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h index 57d2db5c4bdf..cf8cc140d68d 100644 --- a/include/net/vsock_addr.h +++ b/include/net/vsock_addr.h @@ -8,7 +8,7 @@ #ifndef _VSOCK_ADDR_H_ #define _VSOCK_ADDR_H_ -#include <linux/vm_sockets.h> +#include <uapi/linux/vm_sockets.h> void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); int vsock_addr_validate(const struct sockaddr_vm *addr); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index dc1583a1fb8a..373aadcfea21 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -197,6 +197,7 @@ struct vxlan_rdst { u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; + struct net_device *remote_dev; struct list_head list; struct rcu_head rcu; struct dst_cache dst_cache; @@ -391,7 +392,7 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) if (ipa->sa.sa_family == AF_INET6) return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); else - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); + return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #else /* !IS_ENABLED(CONFIG_IPV6) */ @@ -403,7 +404,7 @@ static inline bool vxlan_addr_any(const union vxlan_addr *ipa) static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) { - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); + return ipv4_is_multicast(ipa->sin.sin_addr.s_addr); } #endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/include/net/x25.h b/include/net/x25.h index ed1acc3044ac..d7d6c2b4ffa7 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -62,7 +62,8 @@ enum { X25_STATE_1, /* Awaiting Call Accepted */ X25_STATE_2, /* Awaiting Clear Confirmation */ X25_STATE_3, /* Data Transfer */ - X25_STATE_4 /* Awaiting Reset Confirmation */ + X25_STATE_4, /* Awaiting Reset Confirmation */ + X25_STATE_5 /* Call Accepted / Call Connected pending */ }; enum { diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index 6a8cba6ea79a..a9d5b7603b89 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -12,12 +12,8 @@ struct xdp_mem_allocator { struct page_pool *page_pool; struct zero_copy_allocator *zc_alloc; }; - int disconnect_cnt; - unsigned long defer_start; struct rhash_head node; struct rcu_head rcu; - struct delayed_work defer_wq; - unsigned long defer_warn; }; #endif /* __LINUX_NET_XDP_PRIV_H__ */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 69796d264f06..e86ec48ef627 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -16,6 +16,13 @@ struct net_device; struct xsk_queue; +/* Masks for xdp_umem_page flags. + * The low 12-bits of the addr will be 0 since this is the page address, so we + * can use them for flags. + */ +#define XSK_NEXT_PG_CONTIG_SHIFT 0 +#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT) + struct xdp_umem_page { void *addr; dma_addr_t dma; @@ -27,6 +34,13 @@ struct xdp_umem_fq_reuse { u64 handles[]; }; +/* Flags for the umem flags field. + * + * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public + * flags. See inlude/uapi/include/linux/if_xdp.h. + */ +#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1) + struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; @@ -41,15 +55,33 @@ struct xdp_umem { struct work_struct work; struct page **pgs; u32 npgs; + u16 queue_id; + u8 need_wakeup; + u8 flags; int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; - u16 queue_id; bool zc; spinlock_t xsk_list_lock; struct list_head xsk_list; }; +/* Nodes are linked in the struct xdp_sock map_list field, and used to + * track which maps a certain socket reside in. + */ + +struct xsk_map { + struct bpf_map map; + spinlock_t lock; /* Synchronize map updates */ + struct xdp_sock *xsk_map[]; +}; + +struct xsk_map_node { + struct list_head node; + struct xsk_map *map; + struct xdp_sock **map_entry; +}; + struct xdp_sock { /* struct sock must be the first member of struct xdp_sock */ struct sock sk; @@ -75,18 +107,19 @@ struct xdp_sock { /* Protects generic receive. */ spinlock_t rx_lock; u64 rx_dropped; + struct list_head map_list; + /* Protects map_list */ + spinlock_t map_list_lock; }; struct xdp_buff; #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); -int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); -void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); -u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); -void xsk_umem_discard_addr(struct xdp_umem *umem); +bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); +void xsk_umem_release_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); @@ -95,15 +128,62 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq); void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem); +void xsk_set_tx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem); +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem); + +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry); +int xsk_map_inc(struct xsk_map *map); +void xsk_map_put(struct xsk_map *map); +int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); +void __xsk_map_flush(void); + +static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *xs; + + if (key >= map->max_entries) + return NULL; + + xs = READ_ONCE(m->xsk_map[key]); + return xs; +} + +static inline u64 xsk_umem_extract_addr(u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline u64 xsk_umem_extract_offset(u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline u64 xsk_umem_add_offset_to_addr(u64 addr) +{ + return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr); +} static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { - return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); + unsigned long page_addr; + + addr = xsk_umem_add_offset_to_addr(addr); + page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr; + + return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK); } static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) { - return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); + addr = xsk_umem_add_offset_to_addr(addr); + + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK); } /* Reuse-queue aware version of FILL queue helpers */ @@ -117,7 +197,7 @@ static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) return xsk_umem_has_addrs(umem, cnt - rq->length); } -static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; @@ -128,12 +208,12 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return addr; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; if (!rq->length) - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); else rq->length--; } @@ -144,19 +224,23 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) rq->handles[rq->length++] = addr; } -#else -static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) -{ - return -ENOTSUPP; -} -static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +/* Handle the offset appropriately depending on aligned or unaligned mode. + * For unaligned mode, we store the offset in the upper 16-bits of the address. + * For aligned mode, we simply add the offset to the address. + */ +static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address, + u64 offset) { - return -ENOTSUPP; + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) + return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); + else + return address + offset; } - -static inline void xsk_flush(struct xdp_sock *xs) +#else +static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { + return -ENOTSUPP; } static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) @@ -174,7 +258,7 @@ static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +static inline void xsk_umem_release_addr(struct xdp_umem *umem) { } @@ -213,6 +297,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, return NULL; } +static inline u64 xsk_umem_extract_addr(u64 addr) +{ + return 0; +} + +static inline u64 xsk_umem_extract_offset(u64 addr) +{ + return 0; +} + +static inline u64 xsk_umem_add_offset_to_addr(u64 addr) +{ + return 0; +} + static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { return NULL; @@ -233,7 +332,7 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { } @@ -241,6 +340,47 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) { } +static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return false; +} + +static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle, + u64 offset) +{ + return 0; +} + +static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + return -EOPNOTSUPP; +} + +static inline void __xsk_map_flush(void) +{ +} + +static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b22db30c3d88..8f71c111e65a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -193,6 +193,7 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + struct sock __rcu *encap_sk; /* Data for care-of address */ xfrm_address_t *coaddr; @@ -983,7 +984,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { - char name[IFNAMSIZ]; /* name of XFRM device */ int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ }; @@ -991,7 +991,6 @@ struct xfrm_if_parms { struct xfrm_if { struct xfrm_if __rcu *next; /* next interface in list */ struct net_device *dev; /* virtual device associated with interface */ - struct net_device *phydev; /* physical device */ struct net *net; /* netns for packet i/o */ struct xfrm_if_parms p; /* interface parms */ @@ -1549,6 +1548,9 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); @@ -1615,13 +1617,6 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optv { return -ENOPROTOOPT; } - -static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) -{ - /* should not happen */ - kfree_skb(skb); - return 0; -} #endif struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, |