diff options
Diffstat (limited to 'include/net')
93 files changed, 2098 insertions, 650 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index b8eb51a661e5..beede1e1a919 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -336,6 +336,9 @@ enum p9_qid_t { #define P9_NOFID (u32)(~0) #define P9_MAXWELEM 16 +/* Minimal header size: size[4] type[1] tag[2] */ +#define P9_HDRSZ 7 + /* ample room for Twrite/Rread header */ #define P9_IOHDRSZ 24 @@ -558,19 +561,12 @@ struct p9_fcall { size_t offset; size_t capacity; + struct kmem_cache *cache; u8 *sdata; }; -struct p9_idpool; - int p9_errstr2errno(char *errstr, int len); -struct p9_idpool *p9_idpool_create(void); -void p9_idpool_destroy(struct p9_idpool *); -int p9_idpool_get(struct p9_idpool *p); -void p9_idpool_put(int id, struct p9_idpool *p); -int p9_idpool_check(int id, struct p9_idpool *p); - int p9_error_init(void); int p9_trans_fd_init(void); void p9_trans_fd_exit(void); diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 0fa0fbab33b0..947a570307a6 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -64,22 +64,15 @@ enum p9_trans_status { /** * enum p9_req_status_t - status of a request - * @REQ_STATUS_IDLE: request slot unused * @REQ_STATUS_ALLOC: request has been allocated but not sent * @REQ_STATUS_UNSENT: request waiting to be sent * @REQ_STATUS_SENT: request sent to server * @REQ_STATUS_RCVD: response received from server * @REQ_STATUS_FLSHD: request has been flushed * @REQ_STATUS_ERROR: request encountered an error on the client side - * - * The @REQ_STATUS_IDLE state is used to mark a request slot as unused - * but use is actually tracked by the idpool structure which handles tag - * id allocation. - * */ enum p9_req_status_t { - REQ_STATUS_IDLE, REQ_STATUS_ALLOC, REQ_STATUS_UNSENT, REQ_STATUS_SENT, @@ -92,70 +85,46 @@ enum p9_req_status_t { * struct p9_req_t - request slots * @status: status of this request slot * @t_err: transport error - * @flush_tag: tag of request being flushed (for flush requests) * @wq: wait_queue for the client to block on for this request * @tc: the request fcall structure * @rc: the response fcall structure * @aux: transport specific data (provided for trans_fd migration) * @req_list: link for higher level objects to chain requests - * - * Transport use an array to track outstanding requests - * instead of a list. While this may incurr overhead during initial - * allocation or expansion, it makes request lookup much easier as the - * tag id is a index into an array. (We use tag+1 so that we can accommodate - * the -1 tag for the T_VERSION request). - * This also has the nice effect of only having to allocate wait_queues - * once, instead of constantly allocating and freeing them. Its possible - * other resources could benefit from this scheme as well. - * */ - struct p9_req_t { int status; int t_err; + struct kref refcount; wait_queue_head_t wq; - struct p9_fcall *tc; - struct p9_fcall *rc; + struct p9_fcall tc; + struct p9_fcall rc; void *aux; - struct list_head req_list; }; /** * struct p9_client - per client instance state - * @lock: protect @fidlist + * @lock: protect @fids and @reqs * @msize: maximum data size negotiated by protocol - * @dotu: extension flags negotiated by protocol * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client + * @status: connection state * @trans: tranport instance state and API * @fids: All active FID handles - * @tagpool - transaction id accounting for session - * @reqs - 2D array of requests - * @max_tag - current maximum tag id allocated - * @name - node name used as client id + * @reqs: All active requests. + * @name: node name used as client id * * The client structure is used to keep track of various per-client * state that has been instantiated. - * In order to minimize per-transaction overhead we use a - * simple array to lookup requests instead of a hash table - * or linked list. In order to support larger number of - * transactions, we make this a 2D array, allocating new rows - * when we need to grow the total number of the transactions. - * - * Each row is 256 requests and we'll support up to 256 rows for - * a total of 64k concurrent requests per session. - * - * Bugs: duplicated data and potentially unnecessary elements. */ - struct p9_client { - spinlock_t lock; /* protect client structure */ + spinlock_t lock; unsigned int msize; unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; + struct kmem_cache *fcall_cache; union { struct { @@ -170,10 +139,7 @@ struct p9_client { } trans_opts; struct idr fids; - - struct p9_idpool *tagpool; - struct p9_req_t *reqs[P9_ROW_MAXTAG]; - int max_tag; + struct idr reqs; char name[__NEW_UTS_LEN + 1]; }; @@ -266,7 +232,21 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); +void p9_fcall_fini(struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); + +static inline void p9_req_get(struct p9_req_t *r) +{ + kref_get(&r->refcount); +} + +static inline int p9_req_try_get(struct p9_req_t *r) +{ + return kref_get_unless_zero(&r->refcount); +} + +int p9_req_put(struct p9_req_t *r); + void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); @@ -279,4 +259,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); int p9_client_readlink(struct p9_fid *fid, char **target); +int p9_client_init(void); +void p9_client_exit(void); + #endif /* NET_9P_CLIENT_H */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 1ad5b19e83a9..dbc795ec659e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -13,7 +13,7 @@ #include <net/netns/generic.h> struct tcf_idrinfo { - spinlock_t lock; + struct mutex lock; struct idr action_idr; }; @@ -23,20 +23,20 @@ struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; - struct list_head list; struct tcf_idrinfo *idrinfo; u32 tcfa_index; refcount_t tcfa_refcnt; atomic_t tcfa_bindcnt; - u32 tcfa_capab; int tcfa_action; struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; + struct gnet_stats_basic_packed tcfa_bstats_hw; struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; struct tcf_chain *goto_chain; @@ -44,7 +44,6 @@ struct tc_action { #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt -#define tcf_capab common.tcfa_capab #define tcf_action common.tcfa_action #define tcf_tm common.tcfa_tm #define tcf_bstats common.tcfa_bstats @@ -88,8 +87,7 @@ struct tc_action_ops { struct tcf_result *); /* called under RCU BH lock*/ int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *net, struct tc_action **a, u32 index, - struct netlink_ext_ack *extack); + int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind, bool rtnl_held, @@ -98,11 +96,10 @@ struct tc_action_ops { struct netlink_callback *, int, const struct tc_action_ops *, struct netlink_ext_ack *); - void (*stats_update)(struct tc_action *, u64, u32, u64); + void (*stats_update)(struct tc_action *, u64, u32, u64, bool); size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); - int (*delete)(struct net *net, u32 index); }; struct tc_action_net { @@ -120,7 +117,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - spin_lock_init(&tn->idrinfo->lock); + mutex_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; } @@ -148,8 +145,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats); @@ -158,7 +153,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index); int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); static inline int tcf_idr_release(struct tc_action *a, bool bind) @@ -190,45 +184,15 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, - u64 packets, u64 lastuse) + u64 packets, u64 lastuse, bool hw) { #ifdef CONFIG_NET_CLS_ACT if (!a->ops->stats_update) return; - a->ops->stats_update(a, bytes, packets, lastuse); + a->ops->stats_update(a, bytes, packets, lastuse, hw); #endif } -#ifdef CONFIG_NET_CLS_ACT -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv); -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop); -#else -static inline -int tc_setup_cb_egdev_register(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ - return 0; -} - -static inline -void tc_setup_cb_egdev_unregister(const struct net_device *dev, - tc_setup_cb_t *cb, void *cb_priv) -{ -} - -static inline -int tc_setup_cb_egdev_call(const struct net_device *dev, - enum tc_setup_type type, void *type_data, - bool err_stop) -{ - return 0; -} -#endif #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6def0351bcc3..1656c5978498 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -265,6 +265,11 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; @@ -312,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int ipv6_anycast_init(void); +void ipv6_anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f53edb3754bc..1adefe42c0a6 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -13,6 +13,7 @@ #define _NET_RXRPC_H #include <linux/rxrpc.h> +#include <linux/ktime.h> struct key; struct sock; @@ -76,6 +77,10 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); -u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); +void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); +bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, + ktime_t *); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a5ba41b3b867..ddbba838d048 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); -struct sock *unix_peer_get(struct sock *); +struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_SIZE 256 #define UNIX_HASH_BITS 8 @@ -40,7 +40,7 @@ struct unix_skb_parms { u32 consumed; } __randomize_layout; -#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) @@ -52,7 +52,7 @@ struct unix_skb_parms { struct unix_sock { /* WARNING: sk has to be the first member */ struct sock sk; - struct unix_address *addr; + struct unix_address *addr; struct path path; struct mutex iolock, bindlock; struct sock *peer; @@ -63,7 +63,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; - wait_queue_entry_t peer_wake; + wait_queue_entry_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cdd9f1fe7cfa..c36dc1e20556 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1517,6 +1517,20 @@ struct hci_cp_le_write_def_data_len { __le16 tx_time; } __packed; +#define HCI_OP_LE_ADD_TO_RESOLV_LIST 0x2027 +struct hci_cp_le_add_to_resolv_list { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 peer_irk[16]; + __u8 local_irk[16]; +} __packed; + +#define HCI_OP_LE_DEL_FROM_RESOLV_LIST 0x2028 +struct hci_cp_le_del_from_resolv_list { + __u8 bdaddr_type; + bdaddr_t bdaddr; +} __packed; + #define HCI_OP_LE_CLEAR_RESOLV_LIST 0x2029 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE 0x202a diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0db1b9b428b7..e5ea633ea368 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -103,6 +103,14 @@ struct bdaddr_list { u8 bdaddr_type; }; +struct bdaddr_list_with_irk { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + u8 peer_irk[16]; + u8 local_irk[16]; +}; + struct bt_uuid { struct list_head list; u8 uuid[16]; @@ -259,6 +267,8 @@ struct hci_dev { __u16 le_max_tx_time; __u16 le_max_rx_len; __u16 le_max_rx_time; + __u8 le_max_key_size; + __u8 le_min_key_size; __u16 discov_interleaved_timeout; __u16 conn_info_min_age; __u16 conn_info_max_age; @@ -1058,8 +1068,15 @@ int hci_inquiry(void __user *arg); struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list, bdaddr_t *bdaddr, u8 type); +struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( + struct list_head *list, bdaddr_t *bdaddr, + u8 type); int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type); +int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type, u8 *peer_irk, u8 *local_irk); int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type); +int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type); void hci_bdaddr_list_clear(struct list_head *list); struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 0697fd413087..093aedebdf0c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -277,12 +277,19 @@ struct l2cap_conn_rsp { #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 -#define L2CAP_CR_AUTHENTICATION 0x0005 -#define L2CAP_CR_AUTHORIZATION 0x0006 -#define L2CAP_CR_BAD_KEY_SIZE 0x0007 -#define L2CAP_CR_ENCRYPTION 0x0008 -#define L2CAP_CR_INVALID_SCID 0x0009 -#define L2CAP_CR_SCID_IN_USE 0x000A +#define L2CAP_CR_INVALID_SCID 0x0006 +#define L2CAP_CR_SCID_IN_USE 0x0007 + +/* credit based connect results */ +#define L2CAP_CR_LE_SUCCESS 0x0000 +#define L2CAP_CR_LE_BAD_PSM 0x0002 +#define L2CAP_CR_LE_NO_MEM 0x0004 +#define L2CAP_CR_LE_AUTHENTICATION 0x0005 +#define L2CAP_CR_LE_AUTHORIZATION 0x0006 +#define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 +#define L2CAP_CR_LE_ENCRYPTION 0x0008 +#define L2CAP_CR_LE_INVALID_SCID 0x0009 +#define L2CAP_CR_LE_SCID_IN_USE 0X000A /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -455,9 +462,6 @@ struct l2cap_conn_param_update_rsp { #define L2CAP_CONN_PARAM_ACCEPTED 0x0000 #define L2CAP_CONN_PARAM_REJECTED 0x0001 -#define L2CAP_LE_MAX_CREDITS 10 -#define L2CAP_LE_DEFAULT_MPS 230 - struct l2cap_le_conn_req { __le16 psm; __le16 scid; diff --git a/include/net/bonding.h b/include/net/bonding.h index a2d058170ea3..b46d68acf701 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -139,12 +139,6 @@ struct bond_parm_tbl { int mode; }; -struct netdev_notify_work { - struct delayed_work work; - struct net_device *dev; - struct netdev_bonding_info bonding_info; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -172,6 +166,7 @@ struct slave { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif + struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9a850973e09a..e0c41eb1c860 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -149,7 +149,7 @@ enum ieee80211_channel_flags { */ struct ieee80211_channel { enum nl80211_band band; - u16 center_freq; + u32 center_freq; u16 hw_value; u32 flags; int max_antenna_gain; @@ -775,6 +775,14 @@ struct cfg80211_crypto_settings { * @assocresp_ies_len: length of assocresp_ies in octets * @probe_resp_len: length of probe response template (@probe_resp) * @probe_resp: probe response template (AP mode only) + * @ftm_responder: enable FTM responder functionality; -1 for no change + * (which also implies no change in LCI/civic location data) + * @lci: Measurement Report element content, starting with Measurement Token + * (measurement type 8) + * @civicloc: Measurement Report element content, starting with Measurement + * Token (measurement type 11) + * @lci_len: LCI data length + * @civicloc_len: Civic location data length */ struct cfg80211_beacon_data { const u8 *head, *tail; @@ -782,12 +790,17 @@ struct cfg80211_beacon_data { const u8 *proberesp_ies; const u8 *assocresp_ies; const u8 *probe_resp; + const u8 *lci; + const u8 *civicloc; + s8 ftm_responder; size_t head_len, tail_len; size_t beacon_ies_len; size_t proberesp_ies_len; size_t assocresp_ies_len; size_t probe_resp_len; + size_t lci_len; + size_t civicloc_len; }; struct mac_address { @@ -849,6 +862,7 @@ struct cfg80211_bitrate_mask { * @beacon_rate: bitrate to be used for beacons * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) + * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT */ @@ -874,6 +888,7 @@ struct cfg80211_ap_settings { const struct ieee80211_ht_cap *ht_cap; const struct ieee80211_vht_cap *vht_cap; + const struct ieee80211_he_cap_elem *he_cap; bool ht_required, vht_required; }; @@ -1283,6 +1298,7 @@ struct cfg80211_tid_stats { * @rx_beacon: number of beacons received from this peer * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received * from this peer + * @connected_to_gate: true if mesh STA has a path to mesh gate * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. @@ -1290,6 +1306,10 @@ struct cfg80211_tid_stats { * @ack_signal: signal strength (in dBm) of the last ACK frame. * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has * been sent. + * @rx_mpdu_count: number of MPDUs received from this station + * @fcs_err_count: number of packets (MPDUs) received from this station with + * an FCS error. This counter should be incremented only when TA of the + * received packet with an FCS error matches the peer MAC address. */ struct station_info { u64 filled; @@ -1333,9 +1353,14 @@ struct station_info { u64 rx_beacon; u64 rx_duration; u8 rx_beacon_signal_avg; + u8 connected_to_gate; + struct cfg80211_tid_stats *pertid; s8 ack_signal; s8 avg_ack_signal; + + u32 rx_mpdu_count; + u32 fcs_err_count; }; #if IS_ENABLED(CONFIG_CFG80211) @@ -1539,6 +1564,10 @@ struct bss_parameters { * @plink_timeout: If no tx activity is seen from a STA we've established * peering with for longer than this time (in seconds), then remove it * from the STA's list of peers. Default is 30 minutes. + * @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is + * connected to a mesh gate in mesh formation info. If false, the + * value in mesh formation is determined by the presence of root paths + * in the mesh path table */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1558,6 +1587,7 @@ struct mesh_config { u16 dot11MeshHWMPperrMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; + bool dot11MeshConnectedToMeshGate; u16 dot11MeshHWMPRannInterval; bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; @@ -2795,6 +2825,224 @@ struct cfg80211_external_auth_params { }; /** + * struct cfg80211_ftm_responder_stats - FTM responder statistics + * + * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to + * indicate the relevant values in this struct for them + * @success_num: number of FTM sessions in which all frames were successfully + * answered + * @partial_num: number of FTM sessions in which part of frames were + * successfully answered + * @failed_num: number of failed FTM sessions + * @asap_num: number of ASAP FTM sessions + * @non_asap_num: number of non-ASAP FTM sessions + * @total_duration_ms: total sessions durations - gives an indication + * of how much time the responder was busy + * @unknown_triggers_num: number of unknown FTM triggers - triggers from + * initiators that didn't finish successfully the negotiation phase with + * the responder + * @reschedule_requests_num: number of FTM reschedule requests - initiator asks + * for a new scheduling although it already has scheduled FTM slot + * @out_of_window_triggers_num: total FTM triggers out of scheduled window + */ +struct cfg80211_ftm_responder_stats { + u32 filled; + u32 success_num; + u32 partial_num; + u32 failed_num; + u32 asap_num; + u32 non_asap_num; + u64 total_duration_ms; + u32 unknown_triggers_num; + u32 reschedule_requests_num; + u32 out_of_window_triggers_num; +}; + +/** + * struct cfg80211_pmsr_ftm_result - FTM result + * @failure_reason: if this measurement failed (PMSR status is + * %NL80211_PMSR_STATUS_FAILURE), this gives a more precise + * reason than just "failure" + * @burst_index: if reporting partial results, this is the index + * in [0 .. num_bursts-1] of the burst that's being reported + * @num_ftmr_attempts: number of FTM request frames transmitted + * @num_ftmr_successes: number of FTM request frames acked + * @busy_retry_time: if failure_reason is %NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + * fill this to indicate in how many seconds a retry is deemed possible + * by the responder + * @num_bursts_exp: actual number of bursts exponent negotiated + * @burst_duration: actual burst duration negotiated + * @ftms_per_burst: actual FTMs per burst negotiated + * @lci_len: length of LCI information (if present) + * @civicloc_len: length of civic location information (if present) + * @lci: LCI data (may be %NULL) + * @civicloc: civic location data (may be %NULL) + * @rssi_avg: average RSSI over FTM action frames reported + * @rssi_spread: spread of the RSSI over FTM action frames reported + * @tx_rate: bitrate for transmitted FTM action frame response + * @rx_rate: bitrate of received FTM action frame + * @rtt_avg: average of RTTs measured (must have either this or @dist_avg) + * @rtt_variance: variance of RTTs measured (note that standard deviation is + * the square root of the variance) + * @rtt_spread: spread of the RTTs measured + * @dist_avg: average of distances (mm) measured + * (must have either this or @rtt_avg) + * @dist_variance: variance of distances measured (see also @rtt_variance) + * @dist_spread: spread of distances measured (see also @rtt_spread) + * @num_ftmr_attempts_valid: @num_ftmr_attempts is valid + * @num_ftmr_successes_valid: @num_ftmr_successes is valid + * @rssi_avg_valid: @rssi_avg is valid + * @rssi_spread_valid: @rssi_spread is valid + * @tx_rate_valid: @tx_rate is valid + * @rx_rate_valid: @rx_rate is valid + * @rtt_avg_valid: @rtt_avg is valid + * @rtt_variance_valid: @rtt_variance is valid + * @rtt_spread_valid: @rtt_spread is valid + * @dist_avg_valid: @dist_avg is valid + * @dist_variance_valid: @dist_variance is valid + * @dist_spread_valid: @dist_spread is valid + */ +struct cfg80211_pmsr_ftm_result { + const u8 *lci; + const u8 *civicloc; + unsigned int lci_len; + unsigned int civicloc_len; + enum nl80211_peer_measurement_ftm_failure_reasons failure_reason; + u32 num_ftmr_attempts, num_ftmr_successes; + s16 burst_index; + u8 busy_retry_time; + u8 num_bursts_exp; + u8 burst_duration; + u8 ftms_per_burst; + s32 rssi_avg; + s32 rssi_spread; + struct rate_info tx_rate, rx_rate; + s64 rtt_avg; + s64 rtt_variance; + s64 rtt_spread; + s64 dist_avg; + s64 dist_variance; + s64 dist_spread; + + u16 num_ftmr_attempts_valid:1, + num_ftmr_successes_valid:1, + rssi_avg_valid:1, + rssi_spread_valid:1, + tx_rate_valid:1, + rx_rate_valid:1, + rtt_avg_valid:1, + rtt_variance_valid:1, + rtt_spread_valid:1, + dist_avg_valid:1, + dist_variance_valid:1, + dist_spread_valid:1; +}; + +/** + * struct cfg80211_pmsr_result - peer measurement result + * @addr: address of the peer + * @host_time: host time (use ktime_get_boottime() adjust to the time when the + * measurement was made) + * @ap_tsf: AP's TSF at measurement time + * @status: status of the measurement + * @final: if reporting partial results, mark this as the last one; if not + * reporting partial results always set this flag + * @ap_tsf_valid: indicates the @ap_tsf value is valid + * @type: type of the measurement reported, note that we only support reporting + * one type at a time, but you can report multiple results separately and + * they're all aggregated for userspace. + */ +struct cfg80211_pmsr_result { + u64 host_time, ap_tsf; + enum nl80211_peer_measurement_status status; + + u8 addr[ETH_ALEN]; + + u8 final:1, + ap_tsf_valid:1; + + enum nl80211_peer_measurement_type type; + + union { + struct cfg80211_pmsr_ftm_result ftm; + }; +}; + +/** + * struct cfg80211_pmsr_ftm_request_peer - FTM request data + * @requested: indicates FTM is requested + * @preamble: frame preamble to use + * @burst_period: burst period to use + * @asap: indicates to use ASAP mode + * @num_bursts_exp: number of bursts exponent + * @burst_duration: burst duration + * @ftms_per_burst: number of FTMs per burst + * @ftmr_retries: number of retries for FTM request + * @request_lci: request LCI information + * @request_civicloc: request civic location information + * + * See also nl80211 for the respective attribute documentation. + */ +struct cfg80211_pmsr_ftm_request_peer { + enum nl80211_preamble preamble; + u16 burst_period; + u8 requested:1, + asap:1, + request_lci:1, + request_civicloc:1; + u8 num_bursts_exp; + u8 burst_duration; + u8 ftms_per_burst; + u8 ftmr_retries; +}; + +/** + * struct cfg80211_pmsr_request_peer - peer data for a peer measurement request + * @addr: MAC address + * @chandef: channel to use + * @report_ap_tsf: report the associated AP's TSF + * @ftm: FTM data, see &struct cfg80211_pmsr_ftm_request_peer + */ +struct cfg80211_pmsr_request_peer { + u8 addr[ETH_ALEN]; + struct cfg80211_chan_def chandef; + u8 report_ap_tsf:1; + struct cfg80211_pmsr_ftm_request_peer ftm; +}; + +/** + * struct cfg80211_pmsr_request - peer measurement request + * @cookie: cookie, set by cfg80211 + * @nl_portid: netlink portid - used by cfg80211 + * @drv_data: driver data for this request, if required for aborting, + * not otherwise freed or anything by cfg80211 + * @mac_addr: MAC address used for (randomised) request + * @mac_addr_mask: MAC address mask used for randomisation, bits that + * are 0 in the mask should be randomised, bits that are 1 should + * be taken from the @mac_addr + * @list: used by cfg80211 to hold on to the request + * @timeout: timeout (in milliseconds) for the whole operation, if + * zero it means there's no timeout + * @n_peers: number of peers to do measurements with + * @peers: per-peer measurement request data + */ +struct cfg80211_pmsr_request { + u64 cookie; + void *drv_data; + u32 n_peers; + u32 nl_portid; + + u32 timeout; + + u8 mac_addr[ETH_ALEN] __aligned(2); + u8 mac_addr_mask[ETH_ALEN] __aligned(2); + + struct list_head list; + + struct cfg80211_pmsr_request_peer peers[]; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -3126,6 +3374,11 @@ struct cfg80211_external_auth_params { * * @tx_control_port: TX a control port frame (EAPoL). The noencrypt parameter * tells the driver that the frame should not be encrypted. + * + * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. + * Statistics should be cumulative, currently no way to reset is provided. + * @start_pmsr: start peer measurement (e.g. FTM) + * @abort_pmsr: abort peer measurement */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3431,6 +3684,15 @@ struct cfg80211_ops { const u8 *buf, size_t len, const u8 *dest, const __be16 proto, const bool noencrypt); + + int (*get_ftm_responder_stats)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_ftm_responder_stats *ftm_stats); + + int (*start_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_pmsr_request *request); + void (*abort_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_pmsr_request *request); }; /* @@ -3803,6 +4065,42 @@ struct wiphy_iftype_ext_capab { }; /** + * struct cfg80211_pmsr_capabilities - cfg80211 peer measurement capabilities + * @max_peers: maximum number of peers in a single measurement + * @report_ap_tsf: can report assoc AP's TSF for radio resource measurement + * @randomize_mac_addr: can randomize MAC address for measurement + * @ftm.supported: FTM measurement is supported + * @ftm.asap: ASAP-mode is supported + * @ftm.non_asap: non-ASAP-mode is supported + * @ftm.request_lci: can request LCI data + * @ftm.request_civicloc: can request civic location data + * @ftm.preambles: bitmap of preambles supported (&enum nl80211_preamble) + * @ftm.bandwidths: bitmap of bandwidths supported (&enum nl80211_chan_width) + * @ftm.max_bursts_exponent: maximum burst exponent supported + * (set to -1 if not limited; note that setting this will necessarily + * forbid using the value 15 to let the responder pick) + * @ftm.max_ftms_per_burst: maximum FTMs per burst supported (set to 0 if + * not limited) + */ +struct cfg80211_pmsr_capabilities { + unsigned int max_peers; + u8 report_ap_tsf:1, + randomize_mac_addr:1; + + struct { + u32 preambles; + u32 bandwidths; + s8 max_bursts_exponent; + u8 max_ftms_per_burst; + u8 supported:1, + asap:1, + non_asap:1, + request_lci:1, + request_civicloc:1; + } ftm; +}; + +/** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, * note that if your driver uses wiphy_apply_custom_regulatory() @@ -3958,7 +4256,6 @@ struct wiphy_iftype_ext_capab { * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. * - * @cookie_counter: unique generic cookie counter, used to identify objects. * @nan_supported_bands: bands supported by the device in NAN mode, a * bitmap of &enum nl80211_band values. For instance, for * NL80211_BAND_2GHZ, bit 0 would be set @@ -3967,6 +4264,8 @@ struct wiphy_iftype_ext_capab { * @txq_limit: configuration of internal TX queue frame limit * @txq_memory_limit: configuration internal TX queue memory limit * @txq_quantum: configuration of internal TX queue scheduler quantum + * + * @pmsr_capa: peer measurement capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4097,14 +4396,14 @@ struct wiphy { u32 bss_select_support; - u64 cookie_counter; - u8 nan_supported_bands; u32 txq_limit; u32 txq_memory_limit; u32 txq_quantum; + const struct cfg80211_pmsr_capabilities *pmsr_capa; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -4307,6 +4606,9 @@ struct cfg80211_cqm_config; * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away * @cqm_config: (private) nl80211 RSSI monitor state + * @pmsr_list: (private) peer measurement requests + * @pmsr_lock: (private) peer measurements requests/results lock + * @pmsr_free_wk: (private) peer measurements cleanup work */ struct wireless_dev { struct wiphy *wiphy; @@ -4378,6 +4680,10 @@ struct wireless_dev { #endif struct cfg80211_cqm_config *cqm_config; + + struct list_head pmsr_list; + spinlock_t pmsr_lock; + struct work_struct pmsr_free_wk; }; static inline u8 *wdev_address(struct wireless_dev *wdev) @@ -4733,6 +5039,17 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len); /** + * cfg80211_send_layer2_update - send layer 2 update frame + * + * @dev: network device + * @addr: STA MAC address + * + * Wireless drivers can use this function to update forwarding tables in bridge + * devices upon STA association. + */ +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr); + +/** * DOC: Regulatory enforcement infrastructure * * TODO @@ -4852,8 +5169,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. * @freq: the freqency(in MHz) to be queried. - * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if - * irrelevant). This can be used later for deduplication. * @rule: pointer to store the wmm rule from the regulatory db. * * Self-managed wireless drivers can use this function to query @@ -4865,8 +5180,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * * Return: 0 on success. -ENODATA. */ -int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr, - struct ieee80211_wmm_rule *rule); +int reg_query_regdb_wmm(char *alpha2, int freq, + struct ieee80211_reg_rule *rule); /* * callbacks for asynchronous cfg80211 methods, notification @@ -5261,7 +5576,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, * cfg80211 then sends a notification to userspace. */ void cfg80211_notify_new_peer_candidate(struct net_device *dev, - const u8 *macaddr, const u8 *ie, u8 ie_len, gfp_t gfp); + const u8 *macaddr, const u8 *ie, u8 ie_len, + int sig_dbm, gfp_t gfp); /** * DOC: RFkill integration @@ -6563,6 +6879,31 @@ int cfg80211_external_auth_request(struct net_device *netdev, struct cfg80211_external_auth_params *params, gfp_t gfp); +/** + * cfg80211_pmsr_report - report peer measurement result data + * @wdev: the wireless device reporting the measurement + * @req: the original measurement request + * @result: the result data + * @gfp: allocation flags + */ +void cfg80211_pmsr_report(struct wireless_dev *wdev, + struct cfg80211_pmsr_request *req, + struct cfg80211_pmsr_result *result, + gfp_t gfp); + +/** + * cfg80211_pmsr_complete - report peer measurement completed + * @wdev: the wireless device reporting the measurement + * @req: the original measurement request + * @gfp: allocation flags + * + * Report that the entire measurement completed, after this + * the request pointer will no longer be valid. + */ +void cfg80211_pmsr_complete(struct wireless_dev *wdev, + struct cfg80211_pmsr_request *req, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/checksum.h b/include/net/checksum.h index aef2b2bb6603..0f319e13be2c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -30,7 +30,7 @@ static inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - if (access_ok(VERIFY_READ, src, len)) + if (access_ok(src, len)) return csum_partial_copy_from_user(src, dst, len, sum, err_ptr); if (len) @@ -46,7 +46,7 @@ static __inline__ __wsum csum_and_copy_to_user { sum = csum_partial(src, len, sum); - if (access_ok(VERIFY_WRITE, dst, len)) { + if (access_ok(dst, len)) { if (copy_to_user(dst, src, len) == 0) return sum; } diff --git a/include/net/devlink.h b/include/net/devlink.h index b9b89d6604d4..67f4293bc970 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -298,7 +298,7 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 -#define DEVLINK_PARAM_MAX_STRING_VALUE 32 +#define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, DEVLINK_PARAM_TYPE_U16, @@ -311,7 +311,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; - const char *vstr; + char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@ -362,6 +362,10 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MAX_MACS, DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -380,6 +384,18 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable" #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari" +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" +#define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -451,11 +467,14 @@ struct devlink_ops { u32 *p_cur, u32 *p_max); int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); - int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); - int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, + struct netlink_ext_ack *extack); int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -553,6 +572,8 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, const char *region_name, u32 region_max_snapshots, @@ -789,6 +810,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id) { } +static inline void +devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ +} + static inline struct devlink_region * devlink_region_create(struct devlink *devlink, const char *region_name, diff --git a/include/net/dsa.h b/include/net/dsa.h index 461e8a7661b7..b3eefe8e18fd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -35,7 +35,8 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_BRCM_PREPEND, DSA_TAG_PROTO_DSA, DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_KSZ, + DSA_TAG_PROTO_GSWIP, + DSA_TAG_PROTO_KSZ9477, DSA_TAG_PROTO_LAN9303, DSA_TAG_PROTO_MTK, DSA_TAG_PROTO_QCA, @@ -112,6 +113,7 @@ struct dsa_device_ops { struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + unsigned int overhead; }; struct dsa_switch_tree { diff --git a/include/net/dst.h b/include/net/dst.h index 7f735e76ca73..6cf0870414c7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu); } +static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, + struct dst_entry *encap_dst, + int headroom) +{ + u32 encap_mtu = dst_mtu(encap_dst); + + if (skb->len > encap_mtu - headroom) + skb_dst_update_pmtu(skb, encap_mtu - headroom); +} + #endif /* _NET_DST_H */ diff --git a/include/net/flow.h b/include/net/flow.h index 8ce21793094e..93f2c9a0f098 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,8 +38,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; - struct flowi_tunnel flowic_tun_key; kuid_t flowic_uid; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6a4586dcdede..2b26979efb48 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -209,8 +209,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ - FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ - FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ + FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ @@ -221,7 +221,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ - FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */ + FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 883bb9085f15..ca23860adbb9 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -10,7 +10,7 @@ struct gnet_stats_basic_cpu { struct gnet_stats_basic_packed bstats; struct u64_stats_sync syncp; -}; +} __aligned(2 * sizeof(u64)); struct net_rate_estimator; @@ -44,6 +44,10 @@ void __gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); +int gnet_stats_copy_basic_hw(const seqcount_t *running, + struct gnet_dump *d, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, diff --git a/include/net/genetlink.h b/include/net/genetlink.h index decf6012a401..aa2e5888f18d 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) static inline int genl_err_attr(struct genl_info *info, int err, - struct nlattr *attr) + const struct nlattr *attr) { info->extack->bad_attr = attr; diff --git a/include/net/geneve.h b/include/net/geneve.h index a7600ed55ea3..fc6a7e0a874a 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -60,6 +60,12 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline bool netif_is_geneve(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "geneve"); +} + #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/gre.h b/include/net/gre.h index 797142eee9cd..b60f212c16c6 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,8 +37,17 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs); -bool is_gretap_dev(const struct net_device *dev); -bool is_ip6gretap_dev(const struct net_device *dev); +static inline bool netif_is_gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "gretap"); +} + +static inline bool netif_is_ip6gretap(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); +} static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743a8eec..6ac3a5bd0117 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -41,7 +41,7 @@ struct net; void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); int icmp_rcv(struct sk_buff *skb); -void icmp_err(struct sk_buff *skb, u32 info); +int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); void icmp_out_count(struct net *net, unsigned char type); diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index feef706e1158..8014153bdd49 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -75,6 +75,8 @@ enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_TIMESTAMP = 22, IEEE80211_RADIOTAP_HE = 23, IEEE80211_RADIOTAP_HE_MU = 24, + IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26, + IEEE80211_RADIOTAP_LSIG = 27, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -325,6 +327,25 @@ enum ieee80211_radiotap_he_mu_bits { IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU = 0x0800, }; +enum ieee80211_radiotap_lsig_data1 { + IEEE80211_RADIOTAP_LSIG_DATA1_RATE_KNOWN = 0x0001, + IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN = 0x0002, +}; + +enum ieee80211_radiotap_lsig_data2 { + IEEE80211_RADIOTAP_LSIG_DATA2_RATE = 0x000f, + IEEE80211_RADIOTAP_LSIG_DATA2_LENGTH = 0xfff0, +}; + +struct ieee80211_radiotap_lsig { + __le16 data1, data2; +}; + +enum ieee80211_radiotap_zero_len_psdu_type { + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING = 0, + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff, +}; + /** * ieee80211_get_radiotap_len - get radiotap header length */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..c9c78c15bce0 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -146,10 +146,12 @@ struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; + struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; + struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 6e91e38a31da..9db98af46985 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -115,9 +115,8 @@ int inet6_hash(struct sock *sk); ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 3ca969cbd161..975901a95c0f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -2,6 +2,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H +#include <linux/indirect_call_wrapper.h> + extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -54,4 +56,11 @@ static inline void inet_ctl_sock_destroy(struct sock *sk) sock_release(sk->sk_socket); } +#define indirect_call_gro_receive(f2, f1, cb, head, skb) \ +({ \ + unlikely(gro_recursion_inc_test(skb)) ? \ + NAPI_GRO_CB(skb)->flush |= 1, NULL : \ + INDIRECT_CALL_2(cb, f2, f1, head, skb); \ +}) + #endif diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 482a1b705362..c8e2bebd8d93 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ -static inline int INET_ECN_decapsulate(struct sk_buff *skb, - __u8 outer, __u8 inner) +static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { @@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb, } } - if (INET_ECN_is_ce(outer)) + *set_ce = INET_ECN_is_ce(outer); + return 0; +} + +static inline int INET_ECN_decapsulate(struct sk_buff *skb, + __u8 outer, __u8 inner) +{ + bool set_ce = false; + int rc; + + rc = __INET_ECN_decapsulate(outer, inner, &set_ce); + if (!rc && set_ce) INET_ECN_set_ce(skb); - return 0; + return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 9141e95529e7..babb14136705 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -79,6 +79,7 @@ struct inet_ehash_bucket { struct inet_bind_bucket { possible_net_t ib_net; + int l3mdev; unsigned short port; signed char fastreuse; signed char fastreuseport; @@ -188,10 +189,21 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - const unsigned short snum); + const unsigned short snum, int l3mdev); void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); @@ -225,6 +237,7 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit); +int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); @@ -282,9 +295,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ @@ -294,9 +306,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e03b93360f33..e8eef85006aa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,10 +130,25 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } -static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +static inline int inet_sk_bound_l3mdev(const struct sock *sk) { - return rcu_dereference_check(ireq->ireq_opt, - refcount_read(&ireq->req.rsk_refcnt) > 0); +#ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + + if (!net->ipv4.sysctl_tcp_l3mdev_accept) + return l3mdev_master_ifindex_by_index(net, + sk->sk_bound_dev_if); +#endif + + return 0; +} + +static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, + int dif, int sdif) +{ + if (!bound_dev_if) + return !sdif || l3mdev_accept; + return bound_dev_if == dif || bound_dev_if == sdif; } struct inet_cork { diff --git a/include/net/ip.h b/include/net/ip.h index e44b1a44f67a..8866bfce6121 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -155,6 +155,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -420,8 +421,36 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } -int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, - u32 *metrics); +struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, + int fc_mx_len, + struct netlink_ext_ack *extack); +static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) +{ + if (fib_metrics != &dst_default_metrics && + refcount_dec_and_test(&fib_metrics->refcnt)) + kfree(fib_metrics); +} + +/* ipv4 and ipv6 both use refcounted metrics if it is not the default */ +static inline +void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics) +{ + dst_init_metrics(dst, fib_metrics->metrics, true); + + if (fib_metrics != &dst_default_metrics) { + dst->_metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&fib_metrics->refcnt); + } +} + +static inline +void ip_dst_metrics_put(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); +} u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3d4930528db0..84097010237c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,10 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif + u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; @@ -182,7 +186,6 @@ struct rt6_info { struct in6_addr rt6i_gateway; struct inet6_dev *rt6i_idev; u32 rt6i_flags; - struct rt6key rt6i_prefsrc; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; @@ -408,11 +411,33 @@ struct fib6_node *fib6_locate(struct fib6_node *root, void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *arg), + void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); int fib6_del(struct fib6_info *rt, struct nl_info *info); +static inline +void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) +{ + const struct fib6_info *from; + + rcu_read_lock(); + + from = rcu_dereference(rt->from); + if (from) { + *addr = from->fib6_prefsrc.addr; + } else { + struct in6_addr in6_zero = {}; + + *addr = in6_zero; + } + + rcu_read_unlock(); +} + static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { return f6i->fib6_nh.nh_dev; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7b9c82de11cc..7ab119936e69 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -165,8 +165,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, kuid_t uid); -void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, - u32 mark); +void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; @@ -175,6 +174,7 @@ struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; + struct fib_dump_filter filter; }; int rt6_dump_route(struct fib6_info *f6i, void *p_arg); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 236e40ba06bf..69b4bcf880c9 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi6 *fl6); + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info); }; #ifdef CONFIG_INET diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69c91d1934c1..c5969762a8f4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -222,6 +222,17 @@ struct fib_table { unsigned long __data[0]; }; +struct fib_dump_filter { + u32 table_id; + /* filter_set is an optimization that an entry is set */ + bool filter_set; + bool dump_all_families; + unsigned char protocol; + unsigned char rt_type; + unsigned int flags; + struct net_device *dev; +}; + int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, @@ -229,7 +240,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, - struct netlink_callback *cb); + struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); @@ -373,6 +384,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); __be32 fib_compute_spec_dst(struct sk_buff *skb); +bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); @@ -394,6 +406,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, @@ -451,4 +464,7 @@ static inline void fib_proc_exit(struct net *net) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); +int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, + struct netlink_callback *cb); #endif /* _NET_FIB_H */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index b0d022ff6ea1..34f019650941 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -144,25 +144,6 @@ struct ip_tunnel { bool ignore_df; }; -#define TUNNEL_CSUM __cpu_to_be16(0x01) -#define TUNNEL_ROUTING __cpu_to_be16(0x02) -#define TUNNEL_KEY __cpu_to_be16(0x04) -#define TUNNEL_SEQ __cpu_to_be16(0x08) -#define TUNNEL_STRICT __cpu_to_be16(0x10) -#define TUNNEL_REC __cpu_to_be16(0x20) -#define TUNNEL_VERSION __cpu_to_be16(0x40) -#define TUNNEL_NO_KEY __cpu_to_be16(0x80) -#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) -#define TUNNEL_OAM __cpu_to_be16(0x0200) -#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) -#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) -#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) -#define TUNNEL_NOCACHE __cpu_to_be16(0x2000) -#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) - -#define TUNNEL_OPTIONS_PRESENT \ - (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) - struct tnl_ptk_info { __be16 flags; __be16 proto; @@ -311,6 +292,7 @@ struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); + int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 @@ -326,6 +308,26 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + int nhlen; + + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + default: + nhlen = 0; + } + + return pskb_network_may_pull(skb, nhlen); +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ff33f498c137..daf80863d3a5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -975,6 +975,8 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final); int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -1089,8 +1091,6 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; } #endif #ifdef CONFIG_SYSCTL -extern struct ctl_table ipv6_route_table_template[]; - struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); struct ctl_table *ipv6_route_sysctl_init(struct net *net); int ipv6_sysctl_register(void); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index f4c21b5a1242..14a490246be9 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -80,6 +80,11 @@ struct af_iucv_trans_hdr { u8 pad; /* total 104 bytes */ } __packed; +static inline struct af_iucv_trans_hdr *iucv_trans_hdr(struct sk_buff *skb) +{ + return (struct af_iucv_trans_hdr *)skb_network_header(skb); +} + enum iucv_tx_notify { /* transmission of skb is completed and was successful */ TX_NOTIFY_OK = 0, diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099289c5..78fa0ac4613c 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -101,6 +101,17 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex); +static inline +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) +{ + rcu_read_lock(); + ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex); + rcu_read_unlock(); + + return ifindex; +} + u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -208,6 +219,17 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline +int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) +{ + return 0; +} +static inline +int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + +static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; diff --git a/include/net/llc.h b/include/net/llc.h index 890a87318014..df282d9b4017 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -66,6 +66,7 @@ struct llc_sap { int sk_count; struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES]; struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES]; + struct rcu_head rcu; }; static inline diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5790f55c241d..88219cc137c3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -101,8 +101,9 @@ * Drivers indicate that they use this model by implementing the .wake_tx_queue * driver operation. * - * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with a - * single per-vif queue for multicast data frames. + * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with + * another per-sta for non-data/non-mgmt and bufferable management frames, and + * a single per-vif queue for multicast data frames. * * The driver is expected to initialize its private per-queue data for stations * and interfaces in the .add_interface and .sta_add ops. @@ -308,6 +309,8 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected * keep alive) changed. * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface + * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder + * functionality changed for this BSS (AP mode). * */ enum ieee80211_bss_change { @@ -337,6 +340,7 @@ enum ieee80211_bss_change { BSS_CHANGED_MU_GROUPS = 1<<23, BSS_CHANGED_KEEP_ALIVE = 1<<24, BSS_CHANGED_MCAST_RATE = 1<<25, + BSS_CHANGED_FTM_RESPONDER = 1<<26, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -463,6 +467,21 @@ struct ieee80211_mu_group_data { }; /** + * struct ieee80211_ftm_responder_params - FTM responder parameters + * + * @lci: LCI subelement content + * @civicloc: CIVIC location subelement content + * @lci_len: LCI data length + * @civicloc_len: Civic data length + */ +struct ieee80211_ftm_responder_params { + const u8 *lci; + const u8 *civicloc; + size_t lci_len; + size_t civicloc_len; +}; + +/** * struct ieee80211_bss_conf - holds the BSS's changing parameters * * This structure keeps information about a BSS (and an association @@ -477,6 +496,8 @@ struct ieee80211_mu_group_data { * @uora_ocw_range: UORA element's OCW Range field * @frame_time_rts_th: HE duration RTS threshold, in units of 32us * @he_support: does this BSS support HE + * @twt_requester: does this BSS support TWT requester (relevant for managed + * mode only, set if the AP advertises TWT responder role) * @assoc: association status * @ibss_joined: indicates whether this station is part of an IBSS * or not @@ -561,6 +582,9 @@ struct ieee80211_mu_group_data { * @protected_keep_alive: if set, indicates that the station should send an RSN * protected frame to the AP to reset the idle timer at the AP for the * station. + * @ftm_responder: whether to enable or disable fine timing measurement FTM + * responder functionality. + * @ftmr_params: configurable lci/civic parameter when enabling FTM responder. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -572,6 +596,7 @@ struct ieee80211_bss_conf { u8 uora_ocw_range; u16 frame_time_rts_th; bool he_support; + bool twt_requester; /* association related data */ bool assoc, ibss_joined; bool ibss_creator; @@ -611,6 +636,8 @@ struct ieee80211_bss_conf { bool allow_p2p_go_ps; u16 max_idle_period; bool protected_keep_alive; + bool ftm_responder; + struct ieee80211_ftm_responder_params *ftmr_params; }; /** @@ -1140,6 +1167,11 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * from the RX info data, so leave those zeroed when building this data) * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present * (&struct ieee80211_radiotap_he_mu) + * @RX_FLAG_RADIOTAP_LSIG: L-SIG radiotap data is present + * @RX_FLAG_NO_PSDU: use the frame only for radiotap reporting, with + * the "0-length PSDU" field included there. The value for it is + * in &struct ieee80211_rx_status. Note that if this value isn't + * known the frame shouldn't be reported. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1170,6 +1202,8 @@ enum mac80211_rx_flags { RX_FLAG_AMPDU_EOF_BIT_KNOWN = BIT(25), RX_FLAG_RADIOTAP_HE = BIT(26), RX_FLAG_RADIOTAP_HE_MU = BIT(27), + RX_FLAG_RADIOTAP_LSIG = BIT(28), + RX_FLAG_NO_PSDU = BIT(29), }; /** @@ -1242,6 +1276,7 @@ enum mac80211_rx_encoding { * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU * @ampdu_delimiter_crc: A-MPDU delimiter CRC + * @zero_length_psdu_type: radiotap type of the 0-length PSDU */ struct ieee80211_rx_status { u64 mactime; @@ -1262,6 +1297,7 @@ struct ieee80211_rx_status { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 ampdu_delimiter_crc; + u8 zero_length_psdu_type; }; /** @@ -1504,6 +1540,8 @@ enum ieee80211_vif_flags { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) + * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, + * protected by fq->lock. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1528,6 +1566,8 @@ struct ieee80211_vif { unsigned int probe_req_reg; + bool txqs_stopped[IEEE80211_NUM_ACS]; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; @@ -1839,7 +1879,9 @@ struct ieee80211_sta_rates { * unlimited. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. - * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) + * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID + * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that + * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames */ struct ieee80211_sta { u32 supp_rates[NUM_NL80211_BANDS]; @@ -1879,8 +1921,9 @@ struct ieee80211_sta { u16 max_amsdu_len; bool support_p2p_ps; u16 max_rc_amsdu_len; + u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; - struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; + struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); @@ -1914,7 +1957,8 @@ struct ieee80211_tx_control { * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @sta: station table entry, %NULL for per-vif queue - * @tid: the TID for this queue (unused for per-vif queue) + * @tid: the TID for this queue (unused for per-vif queue), + * %IEEE80211_NUM_TIDS for non-data (if enabled) * @ac: the AC for this queue * @drv_priv: driver private area, sized by hw->txq_data_size * @@ -2127,6 +2171,19 @@ struct ieee80211_txq { * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * + * @IEEE80211_HW_BUFF_MMPDU_TXQ: use the TXQ for bufferable MMPDUs, this of + * course requires the driver to use TXQs to start with. + * + * @IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW: (Hardware) rate control supports VHT + * extended NSS BW (dot11VHTExtendedNSSBWCapable). This flag will be set if + * the selected rate control algorithm sets %RATE_CTRL_CAPA_VHT_EXT_NSS_BW + * but if the rate control is built-in then it must be set by the driver. + * See also the documentation for that flag. + * + * @IEEE80211_HW_STA_MMPDU_TXQ: use the extra non-TID per-station TXQ for all + * MMPDUs on station interfaces. This of course requires the driver to use + * TXQs to start with. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2172,6 +2229,9 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, + IEEE80211_HW_BUFF_MMPDU_TXQ, + IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, + IEEE80211_HW_STA_MMPDU_TXQ, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2290,6 +2350,10 @@ enum ieee80211_hw_flags { * supported by HW. * @max_nan_de_entries: maximum number of NAN DE functions supported by the * device. + * + * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from + * them are encountered. The default should typically not be changed, + * unless the driver has good reasons for needing more buffers. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2325,6 +2389,7 @@ struct ieee80211_hw { u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; u8 max_nan_de_entries; + u8 tx_sk_pacing_shift; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -2506,6 +2571,19 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The set_default_unicast_key() call updates the default WEP key index * configured to the hardware for WEP encryption type. This is required * for devices that support offload of data packets (e.g. ARP responses). + * + * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag + * when they are able to replace in-use PTK keys according to to following + * requirements: + * 1) They do not hand over frames decrypted with the old key to + mac80211 once the call to set_key() with command %DISABLE_KEY has been + completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key, + 2) either drop or continue to use the old key for any outgoing frames queued + at the time of the key deletion (including re-transmits), + 3) never send out a frame queued prior to the set_key() %SET_KEY command + encrypted with the new key and + 4) never send out a frame unencrypted when it should be encrypted. + Mac80211 will not queue any new frames for a deleted key to the driver. */ /** @@ -3164,6 +3242,11 @@ enum ieee80211_reconfig_type { * When the scan finishes, ieee80211_scan_completed() must be called; * note that it also must be called when the scan cannot finish due to * any error unless this callback returned a negative error code. + * This callback is also allowed to return the special return value 1, + * this indicates that hardware scan isn't desirable right now and a + * software scan should be done instead. A driver wishing to use this + * capability must ensure its (hardware) scan capabilities aren't + * advertised as more capable than mac80211's software scan is. * The callback can sleep. * * @cancel_hw_scan: Ask the low-level tp cancel the active hw scan. @@ -3542,6 +3625,15 @@ enum ieee80211_reconfig_type { * @del_nan_func: Remove a NAN function. The driver must call * ieee80211_nan_func_terminated() with * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. + * @can_aggregate_in_amsdu: Called in order to determine if HW supports + * aggregating two specific frames in the same A-MSDU. The relation + * between the skbs should be symmetric and transitive. Note that while + * skb is always a real frame, head may or may not be an A-MSDU. + * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. + * Statistics should be cumulative, currently no way to reset is provided. + * + * @start_pmsr: start peer measurement (e.g. FTM) (this call can sleep) + * @abort_pmsr: abort peer measurement (this call can sleep) */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3824,6 +3916,16 @@ struct ieee80211_ops { void (*del_nan_func)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 instance_id); + bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw, + struct sk_buff *head, + struct sk_buff *skb); + int (*get_ftm_responder_stats)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_ftm_responder_stats *ftm_stats); + int (*start_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct cfg80211_pmsr_request *request); + void (*abort_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct cfg80211_pmsr_request *request); }; /** @@ -4293,6 +4395,21 @@ void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, u32 thr); /** + * ieee80211_tx_rate_update - transmit rate update callback + * + * Drivers should call this functions with a non-NULL pub sta + * This function can be used in drivers that does not have provision + * in updating the tx rate in data path. + * + * @hw: the hardware the frame was transmitted by + * @pubsta: the station to update the tx rate for. + * @info: tx status information + */ +void ieee80211_tx_rate_update(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_tx_info *info); + +/** * ieee80211_tx_status - transmit status callback * * Call this function for all transmitted frames after they have been @@ -5644,7 +5761,22 @@ struct ieee80211_tx_rate_control { bool bss; }; +/** + * enum rate_control_capabilities - rate control capabilities + */ +enum rate_control_capabilities { + /** + * @RATE_CTRL_CAPA_VHT_EXT_NSS_BW: + * Support for extended NSS BW support (dot11VHTExtendedNSSCapable) + * Note that this is only looked at if the minimum number of chains + * that the AP uses is < the number of TX chains the hardware has, + * otherwise the NSS difference doesn't bother us. + */ + RATE_CTRL_CAPA_VHT_EXT_NSS_BW = BIT(0), +}; + struct rate_control_ops { + unsigned long capa; const char *name; void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); void (*free)(void *priv); @@ -5974,6 +6106,14 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid); * @txq: pointer obtained from station or virtual interface * * Returns the skb if successful, %NULL if no frame was available. + * + * Note that this must be called in an rcu_read_lock() critical section, + * which can only be released after the SKB was handled. Some pointers in + * skb->cb, e.g. the key pointer, are protected by by RCU and thus the + * critical section must persist not just for the duration of this call + * but for the duration of the frame handling. + * However, also note that while in the wake_tx_queue() method, + * rcu_read_lock() is already held. */ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_txq *txq); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6c1eecd56a4d..7c1ab9edba03 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -140,8 +140,8 @@ struct neighbour { unsigned long updated; rwlock_t lock; refcount_t refcnt; - struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; + struct sk_buff_head arp_queue; struct timer_list timer; unsigned long used; atomic_t probes; @@ -149,11 +149,13 @@ struct neighbour { __u8 nud_state; __u8 type; __u8 dead; + u8 protocol; seqlock_t ha_lock; - unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; + unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; + struct list_head gc_list; struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; @@ -172,6 +174,7 @@ struct pneigh_entry { possible_net_t net; struct net_device *dev; u8 flags; + u8 protocol; u8 key[0]; }; @@ -214,6 +217,8 @@ struct neigh_table { struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; + atomic_t gc_entries; + struct list_head gc_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; @@ -250,6 +255,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 +extern const struct nla_policy nda_policy[]; static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) { @@ -323,6 +329,7 @@ void __neigh_set_probe_once(struct neighbour *neigh); bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb); @@ -453,6 +460,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; @@ -460,16 +468,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + hh_alen = HH_DATA_MOD; + + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } } else { - unsigned int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } @@ -527,20 +552,17 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } -static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, - int *notify) +static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, + int *notify) { u8 ndm_flags = 0; - if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return; - - ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { - if (ndm_flags & NTF_EXT_LEARNED) - neigh->flags |= NTF_EXT_LEARNED; + ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0; + if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) { + if (ndm_flags & NTF_ROUTER) + neigh->flags |= NTF_ROUTER; else - neigh->flags &= ~NTF_EXT_LEARNED; + neigh->flags &= ~NTF_ROUTER; *notify = 1; } } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9b5fdc50519a..99d4148e0f90 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -43,6 +43,7 @@ struct ctl_table_header; struct net_generic; struct uevent_sock; struct netns_ipvs; +struct bpf_prog; #define NETDEV_HASHBITS 8 @@ -145,6 +146,8 @@ struct net { #endif struct net_generic __rcu *gen; + struct bpf_prog __rcu *flow_dissector_prog; + /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 74af19c3a8f7..4cd56808ac4e 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -6,12 +6,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) { - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF); - if (likely(skb->nf_bridge)) - refcount_set(&(skb->nf_bridge->use), 1); + if (b) + memset(b, 0, sizeof(*b)); - return skb->nf_bridge; + return b; } void nf_bridge_update_protocol(struct sk_buff *skb); @@ -22,12 +22,6 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, int (*okfn)(struct net *, struct sock *, struct sk_buff *)); -static inline struct nf_bridge_info * -nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); static inline void nf_bridge_push_encap_header(struct sk_buff *skb) diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index c84b51682f08..135ee702c7b0 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -10,20 +10,17 @@ #ifndef _NF_CONNTRACK_IPV4_H #define _NF_CONNTRACK_IPV4_H -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif -int nf_conntrack_ipv4_compat_init(void); -void nf_conntrack_ipv4_compat_fini(void); - #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h index cd24be4c4a99..13d55206bb9f 100644 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h @@ -9,7 +9,7 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct nf_nat_range2 *range, const struct net_device *out); -void nf_nat_masquerade_ipv4_register_notifier(void); +int nf_nat_masquerade_ipv4_register_notifier(void); void nf_nat_masquerade_ipv4_unregister_notifier(void); #endif /*_NF_NAT_MASQUERADE_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index effa8dfba68c..7b3c873f8839 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -2,20 +2,7 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; - -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -#ifdef CONFIG_NF_CT_PROTO_DCCP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; -#endif -#ifdef CONFIG_NF_CT_PROTO_SCTP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; -#endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; -#endif #include <linux/sysctl.h> extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h index 0c3b5ebf0bb8..2917bf95c437 100644 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h @@ -5,7 +5,7 @@ unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out); -void nf_nat_masquerade_ipv6_register_notifier(void); +int nf_nat_masquerade_ipv6_register_notifier(void); void nf_nat_masquerade_ipv6_unregister_notifier(void); #endif /* _NF_NAT_MASQUERADE_IPV6_H_ */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7e012312cd61..249d0a5b12b8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -27,12 +27,17 @@ #include <net/netfilter/nf_conntrack_tuple.h> +struct nf_ct_udp { + unsigned long stream_ts; +}; + /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; + struct nf_ct_udp udp; struct nf_ct_gre gre; unsigned int tmpl_padto; }; diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 79d8d16732b4..bc6745d3010e 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -46,9 +46,6 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) return acct; }; -unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, - int dir); - /* Check if connection tracking accounting is enabled */ static inline bool nf_ct_acct_enabled(struct net *net) { @@ -61,8 +58,7 @@ static inline void nf_ct_set_acct(struct net *net, bool enable) net->ct.sysctl_acct = enable; } -int nf_conntrack_acct_pernet_init(struct net *net); -void nf_conntrack_acct_pernet_fini(struct net *net); +void nf_conntrack_acct_pernet_init(struct net *net); int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 2a3e0974a6af..afc9b3620473 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,8 +20,7 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, - struct sk_buff *skb); +unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state); int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 4b2b2baf8ab4..f32fc8289473 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -5,17 +5,10 @@ struct nf_conncount_data; -enum nf_conncount_list_add { - NF_CONNCOUNT_ADDED, /* list add was ok */ - NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ - NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ -}; - struct nf_conncount_list { spinlock_t list_lock; struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ - bool dead; }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, @@ -29,18 +22,12 @@ unsigned int nf_conncount_count(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); -void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit); +int nf_conncount_add(struct net *net, struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_list_init(struct nf_conncount_list *list); -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); - bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 3f1ce9a8776e..52b44192b43f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -142,7 +142,7 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); -int nf_conntrack_ecache_pernet_init(struct net *net); +void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); int nf_conntrack_ecache_init(void); @@ -182,10 +182,7 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, u32 portid, int report) {} -static inline int nf_conntrack_ecache_pernet_init(struct net *net) -{ - return 0; -} +static inline void nf_conntrack_ecache_pernet_init(struct net *net) {} static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2492120b8097..ec52a8dc32fd 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -124,8 +124,7 @@ static inline void *nfct_help_data(const struct nf_conn *ct) return (void *)help->data; } -int nf_conntrack_helper_pernet_init(struct net *net); -void nf_conntrack_helper_pernet_fini(struct net *net); +void nf_conntrack_helper_pernet_init(struct net *net); int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 8465263b297d..ae7b86f587f2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -18,9 +18,6 @@ struct seq_file; struct nf_conntrack_l4proto { - /* L3 Protocol number. */ - u_int16_t l3proto; - /* L4 Protocol number. */ u_int8_t l4proto; @@ -43,22 +40,14 @@ struct nf_conntrack_l4proto { /* Returns verdict for packet, or -1 for invalid. */ int (*packet)(struct nf_conn *ct, - const struct sk_buff *skb, + struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info ctinfo); - - /* Called when a new connection for this protocol found; - * returns TRUE if it's OK. If so, packet() called next. */ - bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff); + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, - u_int8_t pf, unsigned int hooknum); - /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); @@ -92,7 +81,7 @@ struct nf_conntrack_l4proto { #endif unsigned int *net_id; /* Init l4proto pernet data */ - int (*init_net)(struct net *net, u_int16_t proto); + int (*init_net)(struct net *net); /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); @@ -101,16 +90,23 @@ struct nf_conntrack_l4proto { struct module *me; }; +int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; -#define MAX_NF_CT_PROTO 256 +#define MAX_NF_CT_PROTO IPPROTO_UDPLITE -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, - u_int8_t l4proto); +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, - u_int8_t l4proto); +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ @@ -157,4 +153,43 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ +static inline struct nf_generic_net *nf_generic_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.generic; +} + +static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + +static inline struct nf_udp_net *nf_udp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.udp; +} + +static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + +static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + +#ifdef CONFIG_NF_CT_PROTO_DCCP +static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.dccp; +} +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.sctp; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d5f62cc6c2ae..3394d75e1c80 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -30,7 +30,7 @@ struct nf_conn_timeout { }; static inline unsigned int * -nf_ct_timeout_data(struct nf_conn_timeout *t) +nf_ct_timeout_data(const struct nf_conn_timeout *t) { struct nf_ct_timeout *timeout; diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 3b661986be8f..0ed617bf0a3d 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -49,21 +49,12 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable) } #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -int nf_conntrack_tstamp_pernet_init(struct net *net); -void nf_conntrack_tstamp_pernet_fini(struct net *net); +void nf_conntrack_tstamp_pernet_init(struct net *net); int nf_conntrack_tstamp_init(void); void nf_conntrack_tstamp_fini(void); #else -static inline int nf_conntrack_tstamp_pernet_init(struct net *net) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_pernet_fini(struct net *net) -{ - return; -} +static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} static inline int nf_conntrack_tstamp_init(void) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 0e355f4a3d76..7d5cda7ce32a 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -95,11 +95,7 @@ void flow_offload_free(struct flow_offload *flow); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); -int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), - void *data); - -void nf_flow_table_cleanup(struct net *net, struct net_device *dev); +void nf_flow_table_cleanup(struct net_device *dev); int nf_flow_table_init(struct nf_flowtable *flow_table); void nf_flow_table_free(struct nf_flowtable *flow_table); diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index d300b8f03972..d774ca0c4c5e 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -2,18 +2,11 @@ #ifndef _NF_NAT_L3PROTO_H #define _NF_NAT_L3PROTO_H -struct nf_nat_l4proto; struct nf_nat_l3proto { u8 l3proto; - bool (*in_range)(const struct nf_conntrack_tuple *t, - const struct nf_nat_range2 *range); - - u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); - bool (*manip_pkt)(struct sk_buff *skb, unsigned int iphdroff, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype); diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index b4d6b29bca62..95a4655bd1ad 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -5,78 +5,12 @@ #include <net/netfilter/nf_nat.h> #include <linux/netfilter/nfnetlink_conntrack.h> -struct nf_nat_range; struct nf_nat_l3proto; -struct nf_nat_l4proto { - /* Protocol number. */ - u8 l4proto; - - /* Translate a packet to the target according to manip type. - * Return true if succeeded. - */ - bool (*manip_pkt)(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype); - - /* Is the manipable part of the tuple between min and max incl? */ - bool (*in_range)(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - - /* Alter the per-proto part of the tuple (depending on - * maniptype), to give a unique tuple in the given range if - * possible. Per-protocol part of tuple is initialized to the - * incoming packet. - */ - void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range2 *range); -}; - -/* Protocol registration. */ -int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto); -void nf_nat_l4proto_unregister(u8 l3proto, - const struct nf_nat_l4proto *l4proto); - -const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto); - -/* Built-in protocols. */ -extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; -extern const struct nf_nat_l4proto nf_nat_l4proto_udp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; -extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; -#ifdef CONFIG_NF_NAT_PROTO_DCCP -extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_SCTP -extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE -extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; -#endif - -bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - -void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, u16 *rover); - -int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range); - +/* Translate a packet to the target according to manip type. Return on success. */ +bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype); #endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f39ac487012..841835a387e1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); +void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** * enum nft_set_extensions - set extension type IDs @@ -724,7 +727,9 @@ struct nft_expr_type { * @eval: Expression evaluation function * @size: full expression size, including private data size * @init: initialization function - * @destroy: destruction function + * @activate: activate expression in the next generation + * @deactivate: deactivate expression in next generation + * @destroy: destruction function, called after synchronize_rcu * @dump: function to dump parameters * @type: expression type * @validate: validate expression, called during loop detection @@ -1293,12 +1298,14 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) * * @list: used internally * @msg_type: message type + * @put_net: ctx->net needs to be put * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { struct list_head list; int msg_type; + bool put_net; struct nft_ctx ctx; char data[0]; }; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8da837d2aaf9..2046d104f323 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -16,6 +16,10 @@ extern struct nft_expr_type nft_meta_type; extern struct nft_expr_type nft_rt_type; extern struct nft_expr_type nft_exthdr_type; +#ifdef CONFIG_NETWORK_SECMARK +extern struct nft_object_type nft_secmark_obj_type; +#endif + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/net/netfilter/nfnetlink_log.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 0c154f98e987..4c1e99303b5a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -153,7 +153,7 @@ * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs - * nla_parse_nested() parse nested attribuets + * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -172,7 +172,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, - NLA_NESTED_COMPAT, + NLA_NESTED_ARRAY, NLA_NUL_STRING, NLA_BINARY, NLA_S8, @@ -180,14 +180,28 @@ enum { NLA_S32, NLA_S64, NLA_BITFIELD32, + NLA_REJECT, + NLA_EXACT_LEN, + NLA_EXACT_LEN_WARN, __NLA_TYPE_MAX, }; #define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) +enum nla_policy_validation { + NLA_VALIDATE_NONE, + NLA_VALIDATE_RANGE, + NLA_VALIDATE_MIN, + NLA_VALIDATE_MAX, + NLA_VALIDATE_FUNCTION, +}; + /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC + * @validation_type: type of attribute validation done in addition to + * type-specific validation (e.g. range, function call), see + * &enum nla_policy_validation * @len: Type specific length of payload * * Policies are defined as arrays of this struct, the array must be @@ -198,9 +212,11 @@ enum { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload - * NLA_NESTED Don't use `len' field -- length verification is - * done by checking len of nested header (or empty) - * NLA_NESTED_COMPAT Minimum length of structure payload + * NLA_NESTED, + * NLA_NESTED_ARRAY Length verification is done by checking len of + * nested header (or empty); len field is used if + * validation_data is also used, for the max attr + * number in the nested policy. * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, @@ -208,9 +224,59 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" - * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute + * NLA_BITFIELD32 Unused + * NLA_REJECT Unused + * NLA_EXACT_LEN Attribute must have exactly this length, otherwise + * it is rejected. + * NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning + * is logged if it is longer, shorter is rejected. * All other Minimum length of attribute payload * + * Meaning of `validation_data' field: + * NLA_BITFIELD32 This is a 32-bit bitmap/bitselector attribute and + * validation data must point to a u32 value of valid + * flags + * NLA_REJECT This attribute is always rejected and validation data + * may point to a string to report as the error instead + * of the generic one in extended ACK. + * NLA_NESTED Points to a nested policy to validate, must also set + * `len' to the max attribute number. + * Note that nla_parse() will validate, but of course not + * parse, the nested sub-policies. + * NLA_NESTED_ARRAY Points to a nested policy to validate, must also set + * `len' to the max attribute number. The difference to + * NLA_NESTED is the structure - NLA_NESTED has the + * nested attributes directly inside, while an array has + * the nested attributes at another level down and the + * attributes directly in the nesting don't matter. + * All other Unused - but note that it's a union + * + * Meaning of `min' and `max' fields, use via NLA_POLICY_MIN, NLA_POLICY_MAX + * and NLA_POLICY_RANGE: + * NLA_U8, + * NLA_U16, + * NLA_U32, + * NLA_U64, + * NLA_S8, + * NLA_S16, + * NLA_S32, + * NLA_S64 These are used depending on the validation_type + * field, if that is min/max/range then the minimum, + * maximum and both are used (respectively) to check + * the value of the integer attribute. + * Note that in the interest of code simplicity and + * struct size both limits are s16, so you cannot + * enforce a range that doesn't fall within the range + * of s16 - do that as usual in the code instead. + * All other Unused - but note that it's a union + * + * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: + * NLA_BINARY Validation function called for the attribute, + * not compatible with use of the validation_data + * as in NLA_BITFIELD32, NLA_REJECT, NLA_NESTED and + * NLA_NESTED_ARRAY. + * All other Unused - but note that it's a union + * * Example: * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, @@ -220,11 +286,69 @@ enum { * }; */ struct nla_policy { - u16 type; + u8 type; + u8 validation_type; u16 len; - void *validation_data; + union { + const void *validation_data; + struct { + s16 min, max; + }; + int (*validate)(const struct nlattr *attr, + struct netlink_ext_ack *extack); + }; }; +#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } +#define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \ + .len = _len } + +#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) +#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) + +#define NLA_POLICY_NESTED(maxattr, policy) \ + { .type = NLA_NESTED, .validation_data = policy, .len = maxattr } +#define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ + { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr } + +#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) +#define NLA_ENSURE_INT_TYPE(tp) \ + (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \ + tp == NLA_S16 || tp == NLA_U16 || \ + tp == NLA_S32 || tp == NLA_U32 || \ + tp == NLA_S64 || tp == NLA_U64) + tp) +#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ + (__NLA_ENSURE(tp != NLA_BITFIELD32 && \ + tp != NLA_REJECT && \ + tp != NLA_NESTED && \ + tp != NLA_NESTED_ARRAY) + tp) + +#define NLA_POLICY_RANGE(tp, _min, _max) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_RANGE, \ + .min = _min, \ + .max = _max \ +} + +#define NLA_POLICY_MIN(tp, _min) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MIN, \ + .min = _min, \ +} + +#define NLA_POLICY_MAX(tp, _max) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MAX, \ + .max = _max, \ +} + +#define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \ + .type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \ + .validation_type = NLA_VALIDATE_FUNCTION, \ + .validate = fn, \ + .len = __VA_ARGS__ + 0, \ +} + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request @@ -249,6 +373,9 @@ int nla_validate(const struct nlattr *head, int len, int maxtype, int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, struct netlink_ext_ack *extack); +int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -392,13 +519,29 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { + NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; + } return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), policy, extack); } +static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { + NL_SET_ERR_MSG(extack, "Invalid header length"); + return -EINVAL; + } + + return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, extack); +} + /** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9795d628a127..51cba0b8adf5 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -97,18 +97,14 @@ struct netns_ct { struct delayed_work ecache_dwork; bool ecache_dwork_pending; #endif + bool auto_assign_helper_warned; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; - struct ctl_table_header *acct_sysctl_header; - struct ctl_table_header *tstamp_sysctl_header; - struct ctl_table_header *event_sysctl_header; - struct ctl_table_header *helper_sysctl_header; #endif unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_events; int sysctl_acct; int sysctl_auto_assign_helper; - bool auto_assign_helper_warned; int sysctl_tstamp; int sysctl_checksum; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e47503b4e4d1..104a6669e344 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -103,6 +103,9 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_raw_l3mdev_accept; +#endif int sysctl_tcp_early_demux; int sysctl_udp_early_demux; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index f0e396ab9bec..ef1ed529f33c 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -45,6 +45,7 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + bool skip_notify_on_dev_down; }; struct netns_ipv6 { diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 9991e5ef52cc..59f45b1e9dac 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -5,6 +5,7 @@ #include <linux/list.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <linux/rhashtable-types.h> #include <linux/xfrm.h> #include <net/dst_ops.h> @@ -53,6 +54,7 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; struct xfrm_policy_hthresh policy_hthresh; + struct list_head inexact_bins; struct sock *nlsk; diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 316694dafa5b..008f466d1da7 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -87,7 +87,7 @@ struct nfc_hci_pipe { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NFC_HCI_MAX_PIPES 127 +#define NFC_HCI_MAX_PIPES 128 struct nfc_hci_init_data { u8 gate_count; struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES]; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ef727f71336e..40965fbbcd31 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -65,11 +65,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -static inline struct net_device *tcf_block_dev(struct tcf_block *block) -{ - return tcf_block_q(block)->dev_queue->dev; -} - void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident); @@ -86,6 +81,14 @@ void __tcf_block_cb_unregister(struct tcf_block *block, struct tcf_block_cb *block_cb); void tcf_block_cb_unregister(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident); +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident); +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -122,11 +125,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return NULL; } -static inline struct net_device *tcf_block_dev(struct tcf_block *block) -{ - return NULL; -} - static inline int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv) @@ -193,6 +191,32 @@ void tcf_block_cb_unregister(struct tcf_block *block, { } +static inline +int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + return 0; +} + +static inline +int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ + return 0; +} + +static inline +void __tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ +} + +static inline +void tc_indr_block_cb_unregister(struct net_device *dev, + tc_indr_block_bind_cb_t *cb, void *cb_ident) +{ +} + static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@ -298,19 +322,13 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) #endif } -static inline void tcf_exts_to_list(const struct tcf_exts *exts, - struct list_head *actions) -{ #ifdef CONFIG_NET_CLS_ACT - int i; - - for (i = 0; i < exts->nr_actions; i++) { - struct tc_action *a = exts->actions[i]; - - list_add_tail(&a->list, actions); - } +#define tcf_exts_for_each_action(i, a, exts) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) +#else +#define tcf_exts_for_each_action(i, a, exts) \ + for (; 0; (void)(i), (void)(a), (void)(exts)) #endif -} static inline void tcf_exts_stats_update(const struct tcf_exts *exts, @@ -324,7 +342,7 @@ tcf_exts_stats_update(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, lastuse); + tcf_action_stats_update(a, bytes, packets, lastuse, true); } preempt_enable(); @@ -361,6 +379,15 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->actions[0]; +#else + return NULL; +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer @@ -592,8 +619,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ -int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, - enum tc_setup_type type, void *type_data, bool err_stop); +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop); enum tc_block_command { TC_BLOCK_BIND, @@ -616,6 +643,7 @@ struct tc_cls_common_offload { struct tc_cls_u32_knode { struct tcf_exts *exts; + struct tcf_result *res; struct tc_u32_sel *sel; u32 handle; u32 val; @@ -794,12 +822,21 @@ enum tc_mq_command { TC_MQ_CREATE, TC_MQ_DESTROY, TC_MQ_STATS, + TC_MQ_GRAFT, +}; + +struct tc_mq_opt_offload_graft_params { + unsigned long queue; + u32 child_handle; }; struct tc_mq_qopt_offload { enum tc_mq_command command; u32 handle; - struct tc_qopt_offload_stats stats; + union { + struct tc_qopt_offload_stats stats; + struct tc_mq_opt_offload_graft_params graft_params; + }; }; enum tc_red_command { @@ -807,13 +844,16 @@ enum tc_red_command { TC_RED_DESTROY, TC_RED_STATS, TC_RED_XSTATS, + TC_RED_GRAFT, }; struct tc_red_qopt_offload_params { u32 min; u32 max; u32 probability; + u32 limit; bool is_ecn; + bool is_harddrop; struct gnet_stats_queue *qstats; }; @@ -825,6 +865,51 @@ struct tc_red_qopt_offload { struct tc_red_qopt_offload_params set; struct tc_qopt_offload_stats stats; struct red_stats *xstats; + u32 child_handle; + }; +}; + +enum tc_gred_command { + TC_GRED_REPLACE, + TC_GRED_DESTROY, + TC_GRED_STATS, +}; + +struct tc_gred_vq_qopt_offload_params { + bool present; + u32 limit; + u32 prio; + u32 min; + u32 max; + bool is_ecn; + bool is_harddrop; + u32 probability; + /* Only need backlog, see struct tc_prio_qopt_offload_params */ + u32 *backlog; +}; + +struct tc_gred_qopt_offload_params { + bool grio_on; + bool wred_on; + unsigned int dp_cnt; + unsigned int dp_def; + struct gnet_stats_queue *qstats; + struct tc_gred_vq_qopt_offload_params tab[MAX_DPs]; +}; + +struct tc_gred_qopt_offload_stats { + struct gnet_stats_basic_packed bstats[MAX_DPs]; + struct gnet_stats_queue qstats[MAX_DPs]; + struct red_stats *xstats[MAX_DPs]; +}; + +struct tc_gred_qopt_offload { + enum tc_gred_command command; + u32 handle; + u32 parent; + union { + struct tc_gred_qopt_offload_params set; + struct tc_gred_qopt_offload_stats stats; }; }; @@ -861,4 +946,14 @@ struct tc_prio_qopt_offload { }; }; +enum tc_root_command { + TC_ROOT_GRAFT, +}; + +struct tc_root_qopt_offload { + enum tc_root_command command; + u32 handle; + bool ingress; +}; + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7dc769e5452b..a16fbe9a2a67 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -102,6 +102,7 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); diff --git a/include/net/protocol.h b/include/net/protocol.h index 4fc75f7ae23b..92b3eaad6088 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -42,7 +42,10 @@ struct net_protocol { int (*early_demux)(struct sk_buff *skb); int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, u32 info); + + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, u32 info); + unsigned int no_policy:1, netns_ok:1, /* does the protocol do more stringent @@ -58,10 +61,12 @@ struct inet6_protocol { void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, + /* This returns an error if we weren't able to handle the error. */ + int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); + unsigned int flags; /* INET6_PROTO_xxx */ }; diff --git a/include/net/raw.h b/include/net/raw.h index 9c9fa98a91a4..821ff4887f77 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,7 +17,7 @@ #ifndef _RAW_H #define _RAW_H - +#include <net/inet_sock.h> #include <net/protocol.h> #include <linux/icmp.h> @@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v); int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); +void raw_init(void); struct raw_sock { /* inet_sock has to be the first member */ @@ -74,4 +75,15 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + #endif /* _RAW_H */ diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 60f8cc86a447..3469750df0f4 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -217,15 +217,15 @@ struct ieee80211_wmm_rule { struct ieee80211_reg_rule { struct ieee80211_freq_range freq_range; struct ieee80211_power_rule power_rule; - struct ieee80211_wmm_rule *wmm_rule; + struct ieee80211_wmm_rule wmm_rule; u32 flags; u32 dfs_cac_ms; + bool has_wmm; }; struct ieee80211_regdomain { struct rcu_head rcu_head; u32 n_reg_rules; - u32 n_wmm_rules; char alpha2[3]; enum nl80211_dfs_regions dfs_region; struct ieee80211_reg_rule reg_rules[]; diff --git a/include/net/route.h b/include/net/route.h index bb53cdba38dc..9883dc82f723 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,10 +201,9 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, - u32 mark, u8 protocol, int flow_flags); + u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); -void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, - u8 protocol, int flow_flags); +void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 0bbaa5488423..e2091bb2b3a8 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -159,12 +159,14 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, - struct nlattr *tb[]); + struct nlattr *tb[], + struct netlink_ext_ack *extack); int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); +struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a6d00093f35e..9481f2c142e2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -19,10 +19,14 @@ struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; +struct bpf_flow_keys; typedef int tc_setup_cb_t(enum tc_setup_type type, void *type_data, void *cb_priv); +typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, + enum tc_setup_type type, void *type_data); + struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; @@ -105,6 +109,7 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + struct rcu_head rcu; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -114,6 +119,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +/* Intended to be used by unlocked users, when concurrent qdisc release is + * possible. + */ + +static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return qdisc; + if (refcount_inc_not_zero(&qdisc->refcnt)) + return qdisc; + return NULL; +} + static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) @@ -307,9 +325,14 @@ struct tcf_proto { }; struct qdisc_skb_cb { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; + union { + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; + }; + struct bpf_flow_keys *flow_keys; + }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; }; @@ -331,7 +354,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; u32 index; /* block index for shared blocks */ - unsigned int refcnt; + refcount_t refcnt; struct net *net; struct Qdisc *q; struct list_head cb_list; @@ -343,6 +366,7 @@ struct tcf_block { struct tcf_chain *chain; struct list_head filter_chain_list; } chain0; + struct rcu_head rcu; }; static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) @@ -362,7 +386,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) } static inline void -tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt, +tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt, u32 *flags, bool add) { if (add) { @@ -554,9 +578,34 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); +void qdisc_put(struct Qdisc *qdisc); +void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); +#ifdef CONFIG_NET_SCHED +int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, + void *type_data); +void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + enum tc_setup_type type, void *type_data, + struct netlink_ext_ack *extack); +#else +static inline int +qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, + void *type_data) +{ + q->flags &= ~TCQ_F_OFFLOADED; + return 0; +} + +static inline void +qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, + struct Qdisc *new, struct Qdisc *old, + enum tc_setup_type type, void *type_data, + struct netlink_ext_ack *extack) +{ +} +#endif struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack); @@ -828,8 +877,8 @@ static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) qh->qlen = 0; } -static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct qdisc_skb_head *qh) +static inline void __qdisc_enqueue_tail(struct sk_buff *skb, + struct qdisc_skb_head *qh) { struct sk_buff *last = qh->tail; @@ -842,14 +891,24 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, qh->head = skb; } qh->qlen++; - qdisc_qstats_backlog_inc(sch, skb); +} +static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) +{ + __qdisc_enqueue_tail(skb, &sch->q); + qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; } -static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) +static inline void __qdisc_enqueue_head(struct sk_buff *skb, + struct qdisc_skb_head *qh) { - return __qdisc_enqueue_tail(skb, sch, &sch->q); + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; } static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) diff --git a/include/net/scm.h b/include/net/scm.h index 903771c8d4e3..1ce365f4c256 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -8,6 +8,7 @@ #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> +#include <linux/sched/signal.h> /* Well, we should have at least one descriptor open * to accept passed FDs 8) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 86f034b524d4..4588bdc2b8f0 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -71,7 +71,7 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ -enum sctp_event { +enum sctp_event_type { SCTP_EVENT_T_CHUNK = 1, SCTP_EVENT_T_TIMEOUT, SCTP_EVENT_T_OTHER, @@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ a->chunk_hdr->type == SCTP_CID_I_DATA) -/* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ - (unsigned long)(c->chunk_hdr) - \ - sctp_datachk_len(&c->asoc->stream))) - /* Internal error codes */ enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 8c2caa370e0f..1d13ec3f2707 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -151,8 +151,8 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, * sctp/input.c */ int sctp_rcv(struct sk_buff *skb); -void sctp_v4_err(struct sk_buff *skb, u32 info); -void sctp_hash_endpoint(struct sctp_endpoint *); +int sctp_v4_err(struct sk_buff *skb, u32 info); +int sctp_hash_endpoint(struct sctp_endpoint *ep); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, @@ -608,4 +608,21 @@ static inline __u32 sctp_dst_mtu(const struct dst_entry *dst) SCTP_DEFAULT_MINSEGMENT)); } +static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +{ + __u32 pmtu = sctp_dst_mtu(t->dst); + + if (t->pathmtu == pmtu) + return true; + + t->pathmtu = pmtu; + + return false; +} + +static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) +{ + return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize); +} + #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5ef1bad81ef5..24825a81829e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -173,7 +173,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, - enum sctp_event event_type, + enum sctp_event_type event_type, enum sctp_state state, union sctp_subtype event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); @@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ -int sctp_do_sm(struct net *net, enum sctp_event event_type, +int sctp_do_sm(struct net *net, enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); @@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sctp_datahdr_len(&chunk->asoc->stream); + size -= sctp_datachk_len(&chunk->asoc->stream); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 28a7c8e44636..003020eb6e66 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -96,7 +96,9 @@ struct sctp_stream; struct sctp_bind_bucket { unsigned short port; - unsigned short fastreuse; + signed char fastreuse; + signed char fastreuseport; + kuid_t fastuid; struct hlist_node node; struct hlist_head owner; struct net *net; @@ -215,7 +217,7 @@ struct sctp_sock { * These two structures must be grouped together for the usercopy * whitelist region. */ - struct sctp_event_subscribe subscribe; + __u16 subscribe; struct sctp_initmsg initmsg; int user_frag; @@ -876,6 +878,8 @@ struct sctp_transport { unsigned long sackdelay; __u32 sackfreq; + atomic_t mtu_info; + /* When was the last time that we heard from this transport? We use * this to pick new active and retran paths. */ @@ -1188,6 +1192,8 @@ int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *, struct sctp_sock *); int sctp_bind_addr_state(const struct sctp_bind_addr *bp, const union sctp_addr *addr); +int sctp_bind_addrs_check(struct sctp_sock *sp, + struct sctp_sock *sp2, int cnt2); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, const union sctp_addr *addrs, int addrcnt, @@ -2071,8 +2077,12 @@ struct sctp_association { int sent_cnt_removable; + __u16 subscribe; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + + struct rcu_head rcu; }; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 51b4e0626c34..bd922a0fe914 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -164,30 +164,39 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); +static inline void sctp_ulpevent_type_set(__u16 *subscribe, + __u16 sn_type, __u8 on) +{ + if (sn_type > SCTP_SN_TYPE_MAX) + return; + + if (on) + *subscribe |= (1 << (sn_type - SCTP_SN_TYPE_BASE)); + else + *subscribe &= ~(1 << (sn_type - SCTP_SN_TYPE_BASE)); +} + /* Is this event type enabled? */ -static inline int sctp_ulpevent_type_enabled(__u16 sn_type, - struct sctp_event_subscribe *mask) +static inline bool sctp_ulpevent_type_enabled(__u16 subscribe, __u16 sn_type) { - int offset = sn_type - SCTP_SN_TYPE_BASE; - char *amask = (char *) mask; + if (sn_type > SCTP_SN_TYPE_MAX) + return false; - if (offset >= sizeof(struct sctp_event_subscribe)) - return 0; - return amask[offset]; + return subscribe & (1 << (sn_type - SCTP_SN_TYPE_BASE)); } /* Given an event subscription, is this event enabled? */ -static inline int sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event, - struct sctp_event_subscribe *mask) +static inline bool sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event, + __u16 subscribe) { __u16 sn_type; - int enabled = 1; - if (sctp_ulpevent_is_notification(event)) { - sn_type = sctp_ulpevent_get_notification_type(event); - enabled = sctp_ulpevent_type_enabled(sn_type, mask); - } - return enabled; + if (!sctp_ulpevent_is_notification(event)) + return true; + + sn_type = sctp_ulpevent_get_notification_type(event); + + return sctp_ulpevent_type_enabled(subscribe, sn_type); } #endif /* __sctp_ulpevent_h__ */ diff --git a/include/net/seg6.h b/include/net/seg6.h index 2567941a2f32..8b2dc6869fd1 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -16,7 +16,6 @@ #include <linux/net.h> #include <linux/ipv6.h> -#include <net/lwtunnel.h> #include <linux/seg6.h> #include <linux/rhashtable-types.h> diff --git a/include/net/sock.h b/include/net/sock.h index 433f45fc2d68..2b229f7be8eb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -298,6 +298,7 @@ struct sock_common { * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received + * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications @@ -422,8 +423,8 @@ struct sock { struct timer_list sk_timer; __u32 sk_priority; __u32 sk_mark; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; + unsigned long sk_pacing_rate; /* bytes per second */ + unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; @@ -474,6 +475,9 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; ktime_t sk_stamp; +#if BITS_PER_LONG==32 + seqlock_t sk_stamp_seq; +#endif u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; @@ -800,6 +804,7 @@ enum sock_flags { SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ SOCK_TXTIME, + SOCK_XDP, /* XDP is attached */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -1109,7 +1114,7 @@ struct proto { unsigned int inuse_idx; #endif - bool (*stream_memory_free)(const struct sock *sk); + bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*stream_memory_read)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); @@ -1191,19 +1196,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -static inline bool sk_stream_memory_free(const struct sock *sk) +static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (sk->sk_wmem_queued >= sk->sk_sndbuf) return false; return sk->sk_prot->stream_memory_free ? - sk->sk_prot->stream_memory_free(sk) : true; + sk->sk_prot->stream_memory_free(sk, wake) : true; } -static inline bool sk_stream_is_writeable(const struct sock *sk) +static inline bool sk_stream_memory_free(const struct sock *sk) +{ + return __sk_stream_memory_free(sk, 0); +} + +static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake) { return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && - sk_stream_memory_free(sk); + __sk_stream_memory_free(sk, wake); +} + +static inline bool sk_stream_is_writeable(const struct sock *sk) +{ + return __sk_stream_is_writeable(sk, 0); } static inline int sk_under_cgroup_hierarchy(struct sock *sk, @@ -1491,6 +1506,7 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } +void __release_sock(struct sock *sk); void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ @@ -2057,14 +2073,20 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) /** * sock_poll_wait - place memory barrier behind the poll_wait call. * @filp: file + * @sock: socket to wait on * @p: poll_table * * See the comments in the wq_has_sleeper function. + * + * Do not derive sock from filp->private_data here. An SMC socket establishes + * an internal TCP socket that is used in the fallback case. All socket + * operations on the SMC socket are then forwarded to the TCP socket. In case of + * poll, the filp->private_data pointer references the SMC socket because the + * TCP socket has no file assigned. */ -static inline void sock_poll_wait(struct file *filp, poll_table *p) +static inline void sock_poll_wait(struct file *filp, struct socket *sock, + poll_table *p) { - struct socket *sock = filp->private_data; - if (!poll_does_not_wait(p)) { poll_wait(filp, &sock->wq->wait, p); /* We need to be sure we are in sync with the @@ -2212,10 +2234,6 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, - int sg_start, int *sg_curr, unsigned int *sg_size, - int first_coalesce); - /* * Default write policy as shown to user space via poll/select/SIGIO */ @@ -2283,6 +2301,34 @@ static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) atomic_add(segs, &sk->sk_drops); } +static inline ktime_t sock_read_timestamp(struct sock *sk) +{ +#if BITS_PER_LONG==32 + unsigned int seq; + ktime_t kt; + + do { + seq = read_seqbegin(&sk->sk_stamp_seq); + kt = sk->sk_stamp; + } while (read_seqretry(&sk->sk_stamp_seq, seq)); + + return kt; +#else + return sk->sk_stamp; +#endif +} + +static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) +{ +#if BITS_PER_LONG==32 + write_seqlock(&sk->sk_stamp_seq); + sk->sk_stamp = kt; + write_sequnlock(&sk->sk_stamp_seq); +#else + sk->sk_stamp = kt; +#endif +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2307,7 +2353,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else - sk->sk_stamp = kt; + sock_write_timestamp(sk, kt); if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) __sock_recv_wifi_status(msg, sk, skb); @@ -2328,30 +2374,47 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) - sk->sk_stamp = skb->tstamp; + sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) - sk->sk_stamp = 0; + sock_write_timestamp(sk, 0); } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** - * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped + * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping + * @tskey: filled in with next sk_tskey (not for TCP, which uses seqno) * * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, - __u8 *tx_flags) +static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, + __u8 *tx_flags, __u32 *tskey) { - if (unlikely(tsflags)) + if (unlikely(tsflags)) { __sock_tx_timestamp(tsflags, tx_flags); + if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) + *tskey = sk->sk_tskey++; + } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } +static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags, + __u8 *tx_flags) +{ + _sock_tx_timestamp(sk, tsflags, tx_flags, NULL); +} + +static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) +{ + _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags, + &skb_shinfo(skb)->tskey); +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d574ce63bf22..a7fdab5ee6c3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -95,8 +95,8 @@ struct switchdev_obj_port_vlan { u16 vid_end; }; -#define SWITCHDEV_OBJ_PORT_VLAN(obj) \ - container_of(obj, struct switchdev_obj_port_vlan, obj) +#define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ + container_of((OBJ), struct switchdev_obj_port_vlan, obj) /* SWITCHDEV_OBJ_ID_PORT_MDB */ struct switchdev_obj_port_mdb { @@ -105,8 +105,8 @@ struct switchdev_obj_port_mdb { u16 vid; }; -#define SWITCHDEV_OBJ_PORT_MDB(obj) \ - container_of(obj, struct switchdev_obj_port_mdb, obj) +#define SWITCHDEV_OBJ_PORT_MDB(OBJ) \ + container_of((OBJ), struct switchdev_obj_port_mdb, obj) void switchdev_trans_item_enqueue(struct switchdev_trans *trans, void *data, void (*destructor)(void const *), @@ -121,10 +121,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). * * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). - * - * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*). - * - * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -132,11 +128,6 @@ struct switchdev_ops { int (*switchdev_port_attr_set)(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans); - int (*switchdev_port_obj_add)(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans); - int (*switchdev_port_obj_del)(struct net_device *dev, - const struct switchdev_obj *obj); }; enum switchdev_notifier_type { @@ -145,17 +136,35 @@ enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_DEVICE, SWITCHDEV_FDB_DEL_TO_DEVICE, SWITCHDEV_FDB_OFFLOADED, + + SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */ + SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */ + + SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE, + SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE, + SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_OFFLOADED, }; struct switchdev_notifier_info { struct net_device *dev; + struct netlink_ext_ack *extack; }; struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; - bool added_by_user; + u8 added_by_user:1, + offloaded:1; +}; + +struct switchdev_notifier_port_obj_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_obj *obj; + struct switchdev_trans *trans; + bool handled; }; static inline struct net_device * @@ -164,6 +173,12 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) return info->dev; } +static inline struct netlink_ext_ack * +switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) +{ + return info->extack; +} + #ifdef CONFIG_NET_SWITCHDEV void switchdev_deferred_process(void); @@ -172,13 +187,22 @@ int switchdev_port_attr_get(struct net_device *dev, int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj); + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack); int switchdev_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj); + int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); + +int register_switchdev_blocking_notifier(struct notifier_block *nb); +int unregister_switchdev_blocking_notifier(struct notifier_block *nb); +int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack); + void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -186,6 +210,19 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b); +int switchdev_handle_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*add_cb)(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans, + struct netlink_ext_ack *extack)); +int switchdev_handle_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*del_cb)(struct net_device *dev, + const struct switchdev_obj *obj)); + #define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else @@ -206,7 +243,8 @@ static inline int switchdev_port_attr_set(struct net_device *dev, } static inline int switchdev_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj) + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -234,12 +272,55 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } +static inline int +register_switchdev_blocking_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int +unregister_switchdev_blocking_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int +call_switchdev_blocking_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) +{ + return NOTIFY_DONE; +} + static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { return false; } +static inline int +switchdev_handle_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*add_cb)(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans, + struct netlink_ext_ack *extack)) +{ + return 0; +} + +static inline int +switchdev_handle_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + bool (*check_cb)(const struct net_device *dev), + int (*del_cb)(struct net_device *dev, + const struct switchdev_obj *obj)) +{ + return 0; +} + #define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 770917d0caa7..e0a65c067662 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,7 +313,7 @@ extern struct proto tcp_prot; void tcp_tasklet_init(void); -void tcp_v4_err(struct sk_buff *skb, u32); +int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); @@ -732,7 +732,7 @@ void tcp_send_window_probe(struct sock *sk); static inline u64 tcp_clock_ns(void) { - return local_clock(); + return ktime_get_ns(); } static inline u64 tcp_clock_us(void) @@ -752,17 +752,7 @@ static inline u32 tcp_time_stamp_raw(void) return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); } - -/* Refresh 1us clock of a TCP socket, - * ensuring monotically increasing values. - */ -static inline void tcp_mstamp_refresh(struct tcp_sock *tp) -{ - u64 val = tcp_clock_us(); - - if (val > tp->tcp_mstamp) - tp->tcp_mstamp = val; -} +void tcp_mstamp_refresh(struct tcp_sock *tp); static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) { @@ -771,7 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ); +} + +/* provide the departure time in us unit */ +static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) +{ + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } @@ -817,7 +813,7 @@ struct tcp_skb_cb { #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ #define TCPCB_TAGBITS 0x07 /* All tag bits */ -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ TCPCB_REPAIRED) @@ -862,6 +858,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); } +static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; +} + +static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->bpf.sk_redir; +} + +static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->bpf.sk_redir = NULL; +} + #if IS_ENABLED(CONFIG_IPV6) /* This is the variant of inet6_iif() that must be used by TCP, * as TCP moves IP6CB into a different location in skb->cb[] @@ -1113,7 +1124,7 @@ void tcp_rate_check_app_limited(struct sock *sk); */ static inline int tcp_is_sack(const struct tcp_sock *tp) { - return tp->rx_opt.sack_ok; + return likely(tp->rx_opt.sack_ok); } static inline bool tcp_is_reno(const struct tcp_sock *tp) @@ -1234,8 +1245,31 @@ static inline bool tcp_needs_internal_pacing(const struct sock *sk) return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; } +/* Return in jiffies the delay before one skb is sent. + * If @skb is NULL, we look at EDT for next packet being sent on the socket. + */ +static inline unsigned long tcp_pacing_delay(const struct sock *sk, + const struct sk_buff *skb) +{ + s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns; + + pacing_delay -= tcp_sk(sk)->tcp_clock_cache; + + return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0; +} + +static inline void tcp_reset_xmit_timer(struct sock *sk, + const int what, + unsigned long when, + const unsigned long max_when, + const struct sk_buff *skb) +{ + inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb), + max_when); +} + /* Something is really bad, we could not queue an additional packet, - * because qdisc is full or receiver sent a 0 window. + * because qdisc is full or receiver sent a 0 window, or we are paced. * We do not want to add fuel to the fire, or abort too early, * so make sure the timer we arm now is at least 200ms in the future, * regardless of current icsk_rto value (as it could be ~2ms) @@ -1257,8 +1291,9 @@ static inline unsigned long tcp_probe0_when(const struct sock *sk, static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_base(sk), TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + tcp_probe0_base(sk), TCP_RTO_MAX, + NULL); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) @@ -1280,33 +1315,16 @@ static inline __sum16 tcp_v4_check(int len, __be32 saddr, return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } -static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) -{ - return __skb_checksum_complete(skb); -} - static inline bool tcp_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete(skb); + __skb_checksum_complete(skb); } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); - -#undef STATE_TRACE - -#ifdef STATE_TRACE -static const char *statename[]={ - "Unused","Established","Syn Sent","Syn Recv", - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", - "Close Wait","Last ACK","Listen","Closing" -}; -#endif void tcp_set_state(struct sock *sk, int state); - void tcp_done(struct sock *sk); - int tcp_abort(struct sock *sk, int err); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) @@ -1350,7 +1368,7 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - + return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len - atomic_read(&sk->sk_rmem_alloc)); } @@ -1537,9 +1555,21 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, - const union tcp_md5_addr *addr, - int family); +#include <linux/jump_label.h> +extern struct static_key tcp_md5_needed; +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, + const union tcp_md5_addr *addr, + int family); +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup(const struct sock *sk, + const union tcp_md5_addr *addr, + int family) +{ + if (!static_key_false(&tcp_md5_needed)) + return NULL; + return __tcp_md5_do_lookup(sk, addr, family); +} + #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, @@ -1840,12 +1870,16 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } -static inline bool tcp_stream_memory_free(const struct sock *sk) +/* @wake is one when sk_stream_write_space() calls us. + * This sends EPOLLOUT only if notsent_bytes is half the limit. + * This mimics the strategy used in sock_def_write_space(). + */ +static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) { const struct tcp_sock *tp = tcp_sk(sk); u32 notsent_bytes = tp->write_seq - tp->snd_nxt; - return notsent_bytes < tcp_notsent_lowat(tp); + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); } #ifdef CONFIG_PROC_FS @@ -1940,7 +1974,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; - u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } @@ -2040,11 +2074,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) -enum { - TCP_ULP_TLS, - TCP_ULP_BPF, -}; - struct tcp_ulp_ops { struct list_head list; @@ -2053,15 +2082,12 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); - int uid; char name[TCP_ULP_NAME_MAX]; - bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); -int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); @@ -2069,6 +2095,18 @@ void tcp_cleanup_ulp(struct sock *sk); __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) +struct sk_msg; +struct sk_psock; + +int tcp_bpf_init(struct sock *sk); +void tcp_bpf_reinit(struct sock *sk); +int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, + int flags); +int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len); +int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, + struct msghdr *msg, int len, int flags); + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF diff --git a/include/net/tls.h b/include/net/tls.h index d5c683e8bb22..2a6ac8d642af 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -39,9 +39,11 @@ #include <linux/crypto.h> #include <linux/socket.h> #include <linux/tcp.h> +#include <linux/skmsg.h> + #include <net/tcp.h> #include <net/strparser.h> - +#include <crypto/aead.h> #include <uapi/linux/tls.h> @@ -74,6 +76,10 @@ * * void (*unhash)(struct tls_device *device, struct sock *sk); * This function cleans listen state set by Inline TLS driver + * + * void (*release)(struct kref *kref); + * Release the registered device and allocated resources + * @kref: Number of reference to tls_device */ struct tls_device { char name[TLS_DEVICE_NAME_MAX]; @@ -81,6 +87,8 @@ struct tls_device { int (*feature)(struct tls_device *device); int (*hash)(struct tls_device *device, struct sock *sk); void (*unhash)(struct tls_device *device, struct sock *sk); + void (*release)(struct kref *kref); + struct kref kref; }; enum { @@ -93,24 +101,45 @@ enum { TLS_NUM_CONFIG, }; -struct tls_sw_context_tx { - struct crypto_aead *aead_send; - struct crypto_wait async_wait; - - char aad_space[TLS_AAD_SPACE_SIZE]; - - unsigned int sg_plaintext_size; - int sg_plaintext_num_elem; - struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; +/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages + * allocated or mapped for each TLS record. After encryption, the records are + * stores in a linked list. + */ +struct tls_rec { + struct list_head list; + int tx_ready; + int tx_flags; + int inplace_crypto; - unsigned int sg_encrypted_size; - int sg_encrypted_num_elem; - struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; + struct sk_msg msg_plaintext; + struct sk_msg msg_encrypted; - /* AAD | sg_plaintext_data | sg_tag */ + /* AAD | msg_plaintext.sg.data | sg_tag */ struct scatterlist sg_aead_in[2]; - /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ + /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */ struct scatterlist sg_aead_out[2]; + + char aad_space[TLS_AAD_SPACE_SIZE]; + struct aead_request aead_req; + u8 aead_req_ctx[]; +}; + +struct tx_work { + struct delayed_work work; + struct sock *sk; +}; + +struct tls_sw_context_tx { + struct crypto_aead *aead_send; + struct crypto_wait async_wait; + struct tx_work tx_work; + struct tls_rec *open_rec; + struct list_head tx_list; + atomic_t encrypt_pending; + int async_notify; + +#define BIT_TX_SCHEDULED 0 + unsigned long tx_bitmask; }; struct tls_sw_context_rx { @@ -119,11 +148,12 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); + struct sk_buff *recv_pkt; u8 control; bool decrypted; + atomic_t decrypt_pending; + bool async_notify; }; struct tls_record_info { @@ -171,15 +201,14 @@ struct cipher_context { char *rec_seq; }; +union tls_crypto_context { + struct tls_crypto_info info; + struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; +}; + struct tls_context { - union { - struct tls_crypto_info crypto_send; - struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; - }; - union { - struct tls_crypto_info crypto_recv; - struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128; - }; + union tls_crypto_context crypto_send; + union tls_crypto_context crypto_recv; struct list_head list; struct net_device *netdev; @@ -196,10 +225,11 @@ struct tls_context { struct scatterlist *partially_sent_record; u16 partially_sent_offset; + unsigned long flags; bool in_tcp_sendpages; + bool pending_open_record_frags; - u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -247,8 +277,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -unsigned int tls_sw_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); +bool tls_sw_stream_read(const struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -260,6 +289,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, void tls_device_sk_destruct(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); +int tls_tx_records(struct sock *sk, int flags); struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); @@ -278,6 +308,9 @@ void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); +int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, + int flags); + int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, int flags, long *timeo); @@ -311,6 +344,17 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } +static inline bool is_tx_ready(struct tls_sw_context_tx *ctx) +{ + struct tls_rec *rec; + + rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); + if (!rec) + return false; + + return READ_ONCE(rec->tx_ready); +} + struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); @@ -367,8 +411,8 @@ static inline void tls_fill_prepend(struct tls_context *ctx, * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE */ buf[0] = record_type; - buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); - buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.info.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.info.version); /* we can use IV for nonce explicit according to spec */ buf[3] = pkt_len >> 8; buf[4] = pkt_len & 0xFF; @@ -416,6 +460,15 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx) return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx; } +static inline bool tls_sw_has_ctx_tx(const struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (!ctx) + return false; + return !!tls_sw_ctx_tx(ctx); +} + static inline struct tls_offload_context_rx * tls_offload_ctx_rx(const struct tls_context *tls_ctx) { diff --git a/include/net/udp.h b/include/net/udp.h index 8482a990b0bb..fd6d948755c8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -252,6 +252,17 @@ static inline int udp_rqueue_get(struct sock *sk) return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } +static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + /* net/ipv4/udp.c */ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); @@ -272,7 +283,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -void udp_err(struct sk_buff *, u32); +int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); @@ -406,17 +417,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_INC_STATS(sk, field) \ -do { \ - if ((sk)->sk_family == AF_INET) \ - __UDP_INC_STATS(sock_net(sk), field, 0); \ - else \ - __UDP6_INC_STATS(sock_net(sk), field, 0); \ -} while (0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics) : \ + (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ + sock_net(sk)->mib.udp_stats_in6); \ +}) #else -#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics; \ +}) #endif +#define __UDPX_INC_STATS(sk, field) \ + __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) + #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; @@ -443,9 +461,33 @@ int udpv4_offload_init(void); void udp_init(void); +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); #if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif +static inline struct sk_buff *udp_rcv_segment(struct sock *sk, + struct sk_buff *skb, bool ipv4) +{ + struct sk_buff *segs; + + /* the GSO CB lays after the UDP one, no need to save and restore any + * CB fragment + */ + segs = __skb_gso_segment(skb, NETIF_F_SG, false); + if (unlikely(IS_ERR_OR_NULL(segs))) { + int segs_nr = skb_shinfo(skb)->gso_segs; + + atomic_add(segs_nr, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); + kfree_skb(skb); + return NULL; + } + + consume_skb(skb); + return segs; +} + #endif /* _UDP_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fe680ab6b15a..b8137953fea3 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -30,6 +30,7 @@ struct udp_port_cfg { __be16 local_udp_port; __be16 peer_udp_port; + int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, @@ -64,6 +65,8 @@ static inline int udp_sock_create(struct net *net, } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); +typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, + struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, @@ -76,6 +79,7 @@ struct udp_tunnel_sock_cfg { /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; + udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; @@ -165,6 +169,12 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) static inline void udp_tunnel_encap_enable(struct socket *sock) { + struct udp_sock *up = udp_sk(sock->sk); + + if (up->encap_enabled) + return; + + up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b99a02ae3934..236403eb5ba6 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -5,7 +5,8 @@ #include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/dst_metadata.h> -#include <net/udp_tunnel.h> +#include <net/rtnetlink.h> +#include <net/switchdev.h> /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -191,6 +192,7 @@ union vxlan_addr { struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; + u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; struct list_head list; @@ -214,6 +216,7 @@ struct vxlan_config { unsigned long age_interval; unsigned int addrmax; bool no_share; + enum ifla_vxlan_df df; }; struct vxlan_dev_node { @@ -371,4 +374,81 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) return vs->sock->sk->sk_family; } +#if IS_ENABLED(CONFIG_IPV6) + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_any(&ipa->sin6.sin6_addr); + else + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); + else + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#else /* !IS_ENABLED(CONFIG_IPV6) */ + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#endif /* IS_ENABLED(CONFIG_IPV6) */ + +static inline bool netif_is_vxlan(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "vxlan"); +} + +struct switchdev_notifier_vxlan_fdb_info { + struct switchdev_notifier_info info; /* must be first */ + union vxlan_addr remote_ip; + __be16 remote_port; + __be32 remote_vni; + u32 remote_ifindex; + u8 eth_addr[ETH_ALEN]; + __be32 vni; + bool offloaded; + bool added_by_user; +}; + +#if IS_ENABLED(CONFIG_VXLAN) +int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info); +int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, + struct notifier_block *nb); +void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); + +#else +static inline int +vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info) +{ + return -ENOENT; +} + +static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, + struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} + +static inline void +vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) +{ +} +#endif + #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index 76b95256c266..0f25b3675c5c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); + /* Convert xdp_buff to xdp_frame */ static inline struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) @@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) int metasize; int headroom; - /* TODO: implement clone, copy, use "native" MEM_TYPE */ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) - return NULL; + return xdp_convert_zc_to_xdp_frame(xdp); /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; @@ -135,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7161856bcf9c..13acb9803a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -16,21 +16,23 @@ struct net_device; struct xsk_queue; -struct xdp_umem_props { - u64 chunk_mask; - u64 size; -}; - struct xdp_umem_page { void *addr; dma_addr_t dma; }; +struct xdp_umem_fq_reuse { + u32 nentries; + u32 length; + u64 handles[]; +}; + struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; struct xdp_umem_page *pages; - struct xdp_umem_props props; + u64 chunk_mask; + u64 size; u32 headroom; u32 chunk_size_nohr; struct user_struct *user; @@ -41,6 +43,7 @@ struct xdp_umem { struct page **pgs; u32 npgs; struct net_device *dev; + struct xdp_umem_fq_reuse *fq_reuse; u16 queue_id; bool zc; spinlock_t xsk_list_lock; @@ -79,6 +82,50 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); void xsk_umem_consume_tx_done(struct xdp_umem *umem); +struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); +struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, + struct xdp_umem_fq_reuse *newq); +void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); +struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); +} + +/* Reuse-queue aware version of FILL queue helpers */ +static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (!rq->length) + return xsk_umem_peek_addr(umem, addr); + + *addr = rq->handles[rq->length - 1]; + return addr; +} + +static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (!rq->length) + xsk_umem_discard_addr(umem); + else + rq->length--; +} + +static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + rq->handles[rq->length++] = addr; +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -98,6 +145,74 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return false; } + +static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +{ +} + +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, + u32 *len) +{ + return false; +} + +static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) +{ +} + +static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries) +{ + return NULL; +} + +static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap( + struct xdp_umem *umem, + struct xdp_umem_fq_reuse *newq) +{ + return NULL; +} +static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) +{ +} + +static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) +{ + return NULL; +} + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return NULL; +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return 0; +} + +static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0eb390c205af..7298a53b9702 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -577,6 +577,7 @@ struct xfrm_policy { /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; + u32 pos; struct timer_list timer; atomic_t genid; @@ -589,6 +590,7 @@ struct xfrm_policy { struct xfrm_lifetime_cur curlft; struct xfrm_policy_walk_entry walk; struct xfrm_policy_queue polq; + bool bydst_reinsert; u8 type; u8 action; u8 flags; @@ -596,6 +598,7 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; + struct hlist_node bydst_inexact_list; struct rcu_head rcu; }; @@ -1093,7 +1096,6 @@ struct xfrm_offload { }; struct sec_path { - refcount_t refcnt; int len; int olen; @@ -1101,41 +1103,13 @@ struct sec_path { struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; -static inline int secpath_exists(struct sk_buff *skb) -{ -#ifdef CONFIG_XFRM - return skb->sp != NULL; -#else - return 0; -#endif -} - -static inline struct sec_path * -secpath_get(struct sec_path *sp) -{ - if (sp) - refcount_inc(&sp->refcnt); - return sp; -} - -void __secpath_destroy(struct sec_path *sp); - -static inline void -secpath_put(struct sec_path *sp) -{ - if (sp && refcount_dec_and_test(&sp->refcnt)) - __secpath_destroy(sp); -} - -struct sec_path *secpath_dup(struct sec_path *src); -int secpath_set(struct sk_buff *skb); +struct sec_path *secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) { #ifdef CONFIG_XFRM - secpath_put(skb->sp); - skb->sp = NULL; + skb_ext_del(skb, SKB_EXT_SEC_PATH); #endif } @@ -1191,7 +1165,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!net->xfrm.policy_count[dir] && !skb->sp) || + return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || (skb_dst(skb)->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1552,6 +1526,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); struct xfrm_state *xfrm_state_alloc(struct net *net); +void xfrm_state_free(struct xfrm_state *x); struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, const struct flowi *fl, @@ -1902,14 +1877,16 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) #ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { - return skb->sp->xvec[skb->sp->len - 1]; + struct sec_path *sp = skb_sec_path(skb); + + return sp->xvec[sp->len - 1]; } #endif static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { #ifdef CONFIG_XFRM - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); if (!sp || !sp->olen || sp->len != sp->olen) return NULL; @@ -1967,7 +1944,7 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x) static inline void xfrm_dev_state_free(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; - struct net_device *dev = xso->dev; + struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { if (dev->xfrmdev_ops->xdo_dev_state_free) |