summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c6
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/Kconfig7
-rw-r--r--net/Makefile2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c13
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_stp.c15
-rw-r--r--net/bridge/br_stp_bpdu.c4
-rw-r--r--net/can/j1939/socket.c10
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/filter.c141
-rw-r--r--net/core/flow_dissector.c10
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c7
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c10
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/timestamping.c20
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa2.c3
-rw-r--r--net/dsa/dsa_priv.h16
-rw-r--r--net/dsa/port.c38
-rw-r--r--net/dsa/tag_ar9331.c96
-rw-r--r--net/dsa/tag_ksz.c8
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/common.c171
-rw-r--r--net/ethtool/common.h22
-rw-r--r--net/ethtool/ioctl.c (renamed from net/core/ethtool.c)94
-rw-r--r--net/ipv4/fib_trie.c121
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/inet_hashtables.c16
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bbr.c3
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/ip6_fib.c108
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/route.c86
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/llc/llc_station.c4
-rw-r--r--net/mac80211/airtime.c2
-rw-r--r--net/mac80211/debugfs_sta.c76
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/mac80211/sta_info.h1
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_flow_table_offload.c83
-rw-r--r--net/netfilter/nf_queue.c2
-rw-r--r--net/netfilter/nf_tables_api.c22
-rw-r--r--net/netfilter/nf_tables_offload.c6
-rw-r--r--net/netfilter/nfnetlink_cthelper.c2
-rw-r--r--net/netfilter/nft_bitwise.c4
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_ct.c12
-rw-r--r--net/netfilter/nft_masq.c2
-rw-r--r--net/netfilter/nft_nat.c6
-rw-r--r--net/netfilter/nft_range.c10
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c21
-rw-r--r--net/netfilter/nft_tproxy.c4
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/actions.c30
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.h4
-rw-r--r--net/openvswitch/flow_netlink.c34
-rw-r--r--net/packet/af_packet.c31
-rw-r--r--net/rfkill/core.c7
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/sched/Kconfig17
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_ct.c4
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/cls_u32.c25
-rw-r--r--net/sched/sch_cake.c59
-rw-r--r--net/sched/sch_ets.c828
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/stream.c8
-rw-r--r--net/smc/af_smc.c14
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/socket.c16
-rw-r--r--net/tipc/bcast.c24
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/link.c8
-rw-r--r--net/tipc/name_table.c279
-rw-r--r--net/tipc/net.c56
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink.c6
-rw-r--r--net/tipc/socket.c32
-rw-r--r--net/tls/tls_device.c5
-rw-r--r--net/unix/af_unix.c58
-rw-r--r--net/vmw_vsock/Kconfig12
-rw-r--r--net/vmw_vsock/Makefile1
-rw-r--r--net/vmw_vsock/af_vsock.c45
-rw-r--r--net/vmw_vsock/virtio_transport.c61
-rw-r--r--net/vmw_vsock/virtio_transport_common.c28
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vsock_loopback.c180
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/nl80211.c3
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/x25/x25_in.c32
-rw-r--r--net/xdp/xsk.c22
122 files changed, 2655 insertions, 754 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 2cfdfbfbb2ed..bea6e43d45a0 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -523,7 +523,7 @@ int mrp_request_join(const struct net_device *dev,
struct mrp_attr *attr;
if (sizeof(struct mrp_skb_cb) + len >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return -ENOMEM;
spin_lock_bh(&app->lock);
@@ -548,7 +548,7 @@ void mrp_request_leave(const struct net_device *dev,
struct mrp_attr *attr;
if (sizeof(struct mrp_skb_cb) + len >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return;
spin_lock_bh(&app->lock);
@@ -692,7 +692,7 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
* advance to the next event in its Vector.
*/
if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return -1;
if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
mrp_cb(skb)->mh->attrlen) < 0)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e5bff5cc6f97..5ff8059837b4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -646,8 +646,8 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
struct phy_device *phydev = vlan->real_dev->phydev;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- return phydev->drv->ts_info(phydev, info);
+ if (phy_has_tsinfo(phydev)) {
+ return phy_ts_info(phydev, info);
} else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
diff --git a/net/Kconfig b/net/Kconfig
index bd191f978a23..52af65e5d28c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -108,9 +108,10 @@ config NETWORK_PHY_TIMESTAMPING
bool "Timestamping in PHY devices"
select NET_PTP_CLASSIFY
help
- This allows timestamping of network packets by PHYs with
- hardware timestamping capabilities. This option adds some
- overhead in the transmit and receive paths.
+ This allows timestamping of network packets by PHYs (or
+ other MII bus snooping devices) with hardware timestamping
+ capabilities. This option adds some overhead in the transmit
+ and receive paths.
If you are unsure how to answer this question, answer N.
diff --git a/net/Makefile b/net/Makefile
index 449fc0b221f8..848303d98d3d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5a77c235a212..b57368e70aab 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
dev->stats.tx_bytes += skb->len;
}
-static void lec_tx_timeout(struct net_device *dev)
+static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
pr_info("%s\n", dev->name);
netif_trans_update(dev);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4a89177def64..4811ec65bc43 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -548,7 +548,7 @@ static void batadv_recv_handler_init(void)
BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12);
BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8);
- i = FIELD_SIZEOF(struct sk_buff, cb);
+ i = sizeof_field(struct sk_buff, cb);
BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i);
/* broadcast packet */
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 1d4d7d415730..cc1cff63194f 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -112,7 +112,7 @@ static int bnep_net_set_mac_addr(struct net_device *dev, void *arg)
return 0;
}
-static void bnep_net_timeout(struct net_device *dev)
+static void bnep_net_timeout(struct net_device *dev, unsigned int txqueue)
{
BT_DBG("net_timeout");
netif_wake_queue(dev);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 8a8f9e5f264f..b6fe30e3768f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -312,7 +312,7 @@ static int __init br_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > sizeof_field(struct sk_buff, cb));
err = stp_proto_register(&br_stp_proto);
if (err < 0) {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index af7800103e51..59980ecfc962 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -662,6 +662,9 @@ static unsigned int br_nf_forward_arp(void *priv,
nf_bridge_pull_encap_header(skb);
}
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
+ return NF_DROP;
+
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
nf_bridge_push_encap_header(skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a0a54482aabc..60136575aea4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1607,6 +1607,19 @@ static int br_fill_linkxstats(struct sk_buff *skb,
br_multicast_get_stats(br, p, nla_data(nla));
}
#endif
+
+ if (p) {
+ nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_STP,
+ sizeof(p->stp_xstats),
+ BRIDGE_XSTATS_PAD);
+ if (!nla)
+ goto nla_put_failure;
+
+ spin_lock_bh(&br->lock);
+ memcpy(nla_data(nla), &p->stp_xstats, sizeof(p->stp_xstats));
+ spin_unlock_bh(&br->lock);
+ }
+
nla_nest_end(skb, nest);
*prividx = 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 36b0367ca1e0..f540f3bdf294 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -283,6 +283,8 @@ struct net_bridge_port {
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
+
+ struct bridge_stp_xstats stp_xstats;
};
#define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1f1410f8d312..6856a6d9282b 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -45,6 +45,17 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
br_info(p->br, "port %u(%s) entered %s state\n",
(unsigned int) p->port_no, p->dev->name,
br_port_state_names[p->state]);
+
+ if (p->br->stp_enabled == BR_KERNEL_STP) {
+ switch (p->state) {
+ case BR_STATE_BLOCKING:
+ p->stp_xstats.transition_blk++;
+ break;
+ case BR_STATE_FORWARDING:
+ p->stp_xstats.transition_fwd++;
+ break;
+ }
+ }
}
/* called under bridge lock */
@@ -484,6 +495,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
struct net_bridge *br;
int was_root;
+ p->stp_xstats.rx_bpdu++;
+
br = p->br;
was_root = br_is_root_bridge(br);
@@ -517,6 +530,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
/* called under bridge lock */
void br_received_tcn_bpdu(struct net_bridge_port *p)
{
+ p->stp_xstats.rx_tcn++;
+
if (br_is_designated_port(p)) {
br_info(p->br, "port %u(%s) received tcn bpdu\n",
(unsigned int) p->port_no, p->dev->name);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 7796dd9d42d7..0e4572f31330 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -118,6 +118,8 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
br_set_ticks(buf+33, bpdu->forward_delay);
br_send_bpdu(p, buf, 35);
+
+ p->stp_xstats.tx_bpdu++;
}
/* called under bridge lock */
@@ -133,6 +135,8 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
buf[2] = 0;
buf[3] = BPDU_TYPE_TCN;
br_send_bpdu(p, buf, 4);
+
+ p->stp_xstats.tx_tcn++;
}
/*
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index de09b0a65791..f7587428febd 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -423,9 +423,9 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct j1939_sock *jsk = j1939_sk(sock->sk);
- struct j1939_priv *priv = jsk->priv;
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct j1939_priv *priv;
+ struct sock *sk;
+ struct net *net;
int ret = 0;
ret = j1939_sk_sanity_check(addr, len);
@@ -434,6 +434,10 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
lock_sock(sock->sk);
+ priv = jsk->priv;
+ sk = sock->sk;
+ net = sock_net(sk);
+
/* Already bound to an interface? */
if (jsk->state & J1939_SOCK_BOUND) {
/* A re-bind() to a different interface is not
diff --git a/net/core/Makefile b/net/core/Makefile
index a104dc8faafc..3e2c378e5f31 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 255d3cf35360..f1b8afcfbc0f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10182,7 +10182,7 @@ static struct hlist_head * __net_init netdev_create_hash(void)
static int __net_init netdev_init(struct net *net)
{
BUILD_BUG_ON(GRO_HASH_BUCKETS >
- 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+ 8 * sizeof_field(struct napi_struct, gro_bitmask));
if (net != &init_net)
INIT_LIST_HEAD(&net->dev_base_head);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5163d900bb4f..dbaebbe573f0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_TX_OFF:
case HWTSTAMP_TX_ON:
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
tx_type_valid = 1;
break;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 217af9974c86..42fd17c48c5f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -274,7 +274,7 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
switch (skb_field) {
case SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
offsetof(struct sk_buff, mark));
@@ -289,14 +289,14 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
break;
case SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, queue_mapping));
break;
case SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
@@ -322,7 +322,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
/* A = *(u16 *) (CTX + offsetof(protocol)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@ -338,8 +338,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
+ BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
BPF_REG_TMP, BPF_REG_CTX,
@@ -361,7 +361,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
offsetof(struct sk_buff, hash));
@@ -385,7 +385,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_VLAN_TPID:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@ -2055,6 +2055,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
+ skb->tstamp = 0;
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -5548,8 +5549,8 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
+ sizeof_field(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct tcp_sock, FIELD)); \
@@ -5557,9 +5558,9 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_INET_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \
+ BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ sizeof_field(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct inet_connection_sock, \
FIELD), \
@@ -5574,7 +5575,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@ -5739,8 +5740,8 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_XDP_SOCK_GET(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
+ sizeof_field(struct bpf_xdp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct xdp_sock, FIELD)); \
@@ -7303,7 +7304,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, cb[0]) ...
offsetofend(struct __sk_buff, cb[4]) - 1:
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
offsetof(struct qdisc_skb_cb, data)) %
sizeof(__u64));
@@ -7322,7 +7323,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, tc_classid):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
off = si->off;
off -= offsetof(struct __sk_buff, tc_classid);
@@ -7393,7 +7394,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
#endif
break;
case offsetof(struct __sk_buff, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7404,7 +7405,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
2, target_size));
break;
case offsetof(struct __sk_buff, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7415,7 +7416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
4, target_size));
break;
case offsetof(struct __sk_buff, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
@@ -7429,7 +7430,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, remote_ip6[0]) ...
offsetof(struct __sk_buff, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -7449,7 +7450,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, local_ip6[0]) ...
offsetof(struct __sk_buff, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -7468,7 +7469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7483,7 +7484,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7494,7 +7495,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, tstamp):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_DW,
@@ -7532,7 +7533,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
case offsetof(struct __sk_buff, wire_len):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
off = si->off;
off -= offsetof(struct __sk_buff, wire_len);
@@ -7562,7 +7563,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7573,7 +7574,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, mark):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7584,7 +7585,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, priority):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7600,7 +7601,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_family,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_family),
target_size));
break;
@@ -7627,7 +7628,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_rcv_saddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_rcv_saddr),
target_size));
break;
@@ -7636,7 +7637,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_daddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_daddr),
target_size));
break;
@@ -7650,7 +7651,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(
struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]),
target_size) + off);
#else
@@ -7667,7 +7668,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_v6_daddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]),
target_size) + off);
#else
@@ -7681,7 +7682,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_num,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_num),
target_size));
break;
@@ -7691,7 +7692,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_dport,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_dport),
target_size));
break;
@@ -7701,7 +7702,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_state),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_state,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_state),
target_size));
break;
@@ -7796,7 +7797,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->src_reg, offsetof(S, F)); \
*insn++ = BPF_LDX_MEM( \
SIZE, si->dst_reg, si->dst_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
+ OFF); \
} while (0)
@@ -7827,7 +7828,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
+ OFF); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
@@ -7889,8 +7890,8 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
*/
BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
offsetof(struct sockaddr_in6, sin6_port));
- BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
- FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
+ sizeof_field(struct sockaddr_in6, sin6_port));
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
struct sockaddr_in6, uaddr,
sin6_port, tmp_reg);
@@ -7956,8 +7957,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
+ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@ -7990,8 +7991,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
int reg = BPF_REG_9; \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
+ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
if (si->dst_reg == reg || si->src_reg == reg) \
@@ -8032,12 +8033,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sock_ops, op) ...
offsetof(struct bpf_sock_ops, replylong[3]):
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, op) !=
+ sizeof_field(struct bpf_sock_ops_kern, op));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
+ sizeof_field(struct bpf_sock_ops_kern, reply));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
+ sizeof_field(struct bpf_sock_ops_kern, replylong));
off = si->off;
off -= offsetof(struct bpf_sock_ops, op);
off += offsetof(struct bpf_sock_ops_kern, op);
@@ -8050,7 +8051,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8061,7 +8062,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8072,7 +8073,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@ -8087,7 +8088,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
offsetof(struct bpf_sock_ops, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8108,7 +8109,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
offsetof(struct bpf_sock_ops, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8127,7 +8128,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8141,7 +8142,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8161,7 +8162,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, state):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8172,7 +8173,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@ -8183,7 +8184,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
offsetof(struct bpf_sock_ops_kern, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct tcp_sock, rtt_min) +
- FIELD_SIZEOF(struct minmax_sample, t));
+ sizeof_field(struct minmax_sample, t));
break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
@@ -8325,7 +8326,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_msg, data_end));
break;
case offsetof(struct sk_msg_md, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8336,7 +8337,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8347,7 +8348,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@ -8362,7 +8363,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct sk_msg_md, remote_ip6[0]) ...
offsetof(struct sk_msg_md, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8383,7 +8384,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct sk_msg_md, local_ip6[0]) ...
offsetof(struct sk_msg_md, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8402,7 +8403,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8416,7 +8417,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8806,7 +8807,7 @@ sk_reuseport_is_valid_access(int off, int size,
/* Fields that allow narrowing */
case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
- if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+ if (size < sizeof_field(struct sk_buff, protocol))
return false;
/* fall through */
case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
@@ -8824,7 +8825,7 @@ sk_reuseport_is_valid_access(int off, int size,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
si->dst_reg, si->src_reg, \
bpf_target_off(struct sk_reuseport_kern, F, \
- FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+ sizeof_field(struct sk_reuseport_kern, F), \
target_size)); \
})
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d524a693e00f..2dbbb030fbed 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -599,8 +599,8 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
offset += sizeof(struct gre_base_hdr);
if (hdr->flags & GRE_CSUM)
- offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
- FIELD_SIZEOF(struct gre_full_hdr, reserved1);
+ offset += sizeof_field(struct gre_full_hdr, csum) +
+ sizeof_field(struct gre_full_hdr, reserved1);
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
@@ -622,11 +622,11 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
else
key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- offset += FIELD_SIZEOF(struct gre_full_hdr, key);
+ offset += sizeof_field(struct gre_full_hdr, key);
}
if (hdr->flags & GRE_SEQ)
- offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
+ offset += sizeof_field(struct pptp_gre_header, seq);
if (gre_ver == 0) {
if (*p_proto == htons(ETH_P_TEB)) {
@@ -653,7 +653,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
u8 *ppp_hdr;
if (hdr->flags & GRE_ACK)
- offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
+ offset += sizeof_field(struct pptp_gre_header, ack);
ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
sizeof(_ppp_hdr),
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 652da6369037..920784a9b7ff 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -98,9 +98,6 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
- if (neigh->parms->neigh_cleanup)
- neigh->parms->neigh_cleanup(neigh);
-
trace_neigh_cleanup_and_release(neigh, 0);
__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c4624298996..4c826b8bf9b1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -919,14 +919,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Kobject_put later will trigger rx_queue_release call which
+ * decreases dev refcount: Take that reference here
+ */
+ dev_hold(queue->dev);
+
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
goto err;
- dev_hold(queue->dev);
-
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02916f43bf63..20bc406f3871 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ rtnl_prop_list_size(dev)
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ 0;
}
@@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
goto nla_put_failure;
+ if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) &&
+ nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr))
+ goto nla_put_failure;
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROP_LIST] = { .type = NLA_NESTED },
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
+ [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 973a71f4bc89..44b0894d8ae1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5472,12 +5472,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
}
/**
- * skb_mpls_push() - push a new MPLS header after the mac header
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
+ * the packet
*
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
* @mac_len: length of the MAC header
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
+ * ethernet
*
* Expects skb->data at mac header.
*
@@ -5501,7 +5504,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, mac_len);
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_protocol(skb, skb->protocol);
}
@@ -5510,6 +5513,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
mac_len);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
+ skb_reset_mac_len(skb);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@ -5529,7 +5533,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
* @mac_len: length of the MAC header
- * @ethernet: flag to indicate if ethernet header is present in packet
+ * @ethernet: flag to indicate if the packet is ethernet
*
* Expects skb->data at mac header.
*
diff --git a/net/core/sock.c b/net/core/sock.c
index 043db3ce023e..8459ad579f73 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2916,7 +2916,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0UL;
- sk->sk_pacing_shift = 10;
+ WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
sk_rx_queue_clear(sk);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index eb29e5adc84d..9f9e00ba3ad7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
return ret;
}
+# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
+# endif /* CONFIG_HAVE_EBPF_JIT */
static int
proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7911235706a9..04840697fe79 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -13,7 +13,7 @@
static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev && skb->dev->phydev &&
- skb->dev->phydev->drv))
+ skb->dev->phydev->mii_ts))
return ptp_classify_raw(skb);
else
return PTP_CLASS_NONE;
@@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb)
void skb_clone_tx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
struct sk_buff *clone;
unsigned int type;
@@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->txtstamp)) {
clone = skb_clone_sk(skb);
if (!clone)
return;
- phydev->drv->txtstamp(phydev, clone, type);
+ mii_ts->txtstamp(mii_ts, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
return false;
if (skb_headroom(skb) < ETH_HLEN)
@@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return false;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->rxtstamp))
+ return mii_ts->rxtstamp(mii_ts, skb, type);
return false;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7c8390ad4dc6..8310714c47fd 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -36,7 +36,7 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
const u32 *k = data;
const u32 key = *k;
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+ BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id)
!= sizeof(u32));
/* Use cyclic increasing ID as direct hash key */
@@ -56,7 +56,7 @@ static const struct rhashtable_params mem_id_rht_params = {
.nelem_hint = 64,
.head_offset = offsetof(struct xdp_mem_allocator, node),
.key_offset = offsetof(struct xdp_mem_allocator, mem.id),
- .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+ .key_len = sizeof_field(struct xdp_mem_allocator, mem.id),
.max_size = MEM_ID_MAX,
.min_size = 8,
.automatic_shrinking = true,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a52e8ba1ced0..4af8a98fe784 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1132,7 +1132,7 @@ static int __init dccp_init(void)
int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
if (rc)
goto out_fail;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 1e6c3cac11e6..92663dcb3aa2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -29,6 +29,12 @@ config NET_DSA_TAG_8021Q
Drivers which use these helpers should select this as dependency.
+config NET_DSA_TAG_AR9331
+ tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ the Atheros AR9331 SoC with built-in switch.
+
config NET_DSA_TAG_BRCM_COMMON
tristate
default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9a482c38bdb1..108486cfdeef 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
+obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 9ef2caa13f27..c66abbed4daf 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -124,7 +124,8 @@ static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
return NULL;
}
-struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp)
+static struct dsa_link *dsa_link_touch(struct dsa_port *dp,
+ struct dsa_port *link_dp)
{
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2dd86d9bcda9..09ea2fd78c74 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -150,22 +150,6 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
/* slave.c */
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 46ac9ba21987..ffb5601f7ed6 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -415,9 +415,9 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
return phydev;
}
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void dsa_port_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -427,10 +427,9 @@ void dsa_port_phylink_validate(struct phylink_config *config,
ds->ops->phylink_validate(ds, dp->index, supported, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -444,11 +443,10 @@ void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0)
state->link = 0;
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_pcs_get_state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state)
+static void dsa_port_phylink_mac_config(struct phylink_config *config,
+ unsigned int mode,
+ const struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -458,9 +456,8 @@ void dsa_port_phylink_mac_config(struct phylink_config *config,
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -470,11 +467,10 @@ void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
ds->ops->phylink_mac_an_restart(ds, dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface)
+static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct phy_device *phydev = NULL;
@@ -491,12 +487,11 @@ void dsa_port_phylink_mac_link_down(struct phylink_config *config,
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev)
+static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -509,7 +504,6 @@ void dsa_port_phylink_mac_link_up(struct phylink_config *config,
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
new file mode 100644
index 000000000000..466ffa92a474
--- /dev/null
+++ b/net/dsa/tag_ar9331.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+ */
+
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+
+#include "dsa_priv.h"
+
+#define AR9331_HDR_LEN 2
+#define AR9331_HDR_VERSION 1
+
+#define AR9331_HDR_VERSION_MASK GENMASK(15, 14)
+#define AR9331_HDR_PRIORITY_MASK GENMASK(13, 12)
+#define AR9331_HDR_TYPE_MASK GENMASK(10, 8)
+#define AR9331_HDR_BROADCAST BIT(7)
+#define AR9331_HDR_FROM_CPU BIT(6)
+/* AR9331_HDR_RESERVED - not used or may be version field.
+ * According to the AR8216 doc it should 0b10. On AR9331 it is 0b11 on RX path
+ * and should be set to 0b11 to make it work.
+ */
+#define AR9331_HDR_RESERVED_MASK GENMASK(5, 4)
+#define AR9331_HDR_PORT_NUM_MASK GENMASK(3, 0)
+
+static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ __le16 *phdr;
+ u16 hdr;
+
+ if (skb_cow_head(skb, 0) < 0)
+ return NULL;
+
+ phdr = skb_push(skb, AR9331_HDR_LEN);
+
+ hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION);
+ hdr |= AR9331_HDR_FROM_CPU | dp->index;
+ /* 0b10 for AR8216 and 0b11 for AR9331 */
+ hdr |= AR9331_HDR_RESERVED_MASK;
+
+ phdr[0] = cpu_to_le16(hdr);
+
+ return skb;
+}
+
+static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt)
+{
+ u8 ver, port;
+ u16 hdr;
+
+ if (unlikely(!pskb_may_pull(skb, AR9331_HDR_LEN)))
+ return NULL;
+
+ hdr = le16_to_cpu(*(__le16 *)skb_mac_header(skb));
+
+ ver = FIELD_GET(AR9331_HDR_VERSION_MASK, hdr);
+ if (unlikely(ver != AR9331_HDR_VERSION)) {
+ netdev_warn_once(ndev, "%s:%i wrong header version 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ if (unlikely(hdr & AR9331_HDR_FROM_CPU)) {
+ netdev_warn_once(ndev, "%s:%i packet should not be from cpu 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ skb_pull_rcsum(skb, AR9331_HDR_LEN);
+
+ /* Get source port information */
+ port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr);
+
+ skb->dev = dsa_master_find_slave(ndev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ return skb;
+}
+
+static const struct dsa_device_ops ar9331_netdev_ops = {
+ .name = "ar9331",
+ .proto = DSA_TAG_PROTO_AR9331,
+ .xmit = ar9331_tag_xmit,
+ .rcv = ar9331_tag_rcv,
+ .overhead = AR9331_HDR_LEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331);
+module_dsa_tag_driver(ar9331_netdev_ops);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 73605bcbb385..90d055c4df9e 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -84,8 +84,6 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
* (eg, 0x00=port1, 0x02=port3, 0x06=port7)
*/
-#define KSZ8795_INGRESS_TAG_LEN 1
-
#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
@@ -96,12 +94,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
u8 *tag;
u8 *addr;
- nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN);
+ nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
if (!nskb)
return NULL;
/* Tag encoding */
- tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN);
+ tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
*tag = 1 << dp->index;
@@ -124,7 +122,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ8795,
.xmit = ksz8795_xmit,
.rcv = ksz8795_rcv,
- .overhead = KSZ8795_INGRESS_TAG_LEN,
+ .overhead = KSZ_INGRESS_TAG_LEN,
};
DSA_TAG_DRIVER(ksz8795_netdev_ops);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
new file mode 100644
index 000000000000..f68387618973
--- /dev/null
+++ b/net/ethtool/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += ioctl.o common.o
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
new file mode 100644
index 000000000000..0a8728565356
--- /dev/null
+++ b/net/ethtool/common.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "common.h"
+
+const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+ [NETIF_F_SG_BIT] = "tx-scatter-gather",
+ [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
+ [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
+ [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
+ [NETIF_F_HIGHDMA_BIT] = "highdma",
+ [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
+ [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
+
+ [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
+ [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
+ [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
+ [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
+ [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
+ [NETIF_F_LLTX_BIT] = "tx-lockless",
+ [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
+ [NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
+ [NETIF_F_LRO_BIT] = "rx-lro",
+
+ [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
+ [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
+ [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
+ [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
+ [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
+ [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+ [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
+ [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
+ [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
+ [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
+ [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+
+ [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
+ [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
+ [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
+ [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
+ [NETIF_F_RXHASH_BIT] = "rx-hashing",
+ [NETIF_F_RXCSUM_BIT] = "rx-checksum",
+ [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
+ [NETIF_F_LOOPBACK_BIT] = "loopback",
+ [NETIF_F_RXFCS_BIT] = "rx-fcs",
+ [NETIF_F_RXALL_BIT] = "rx-all",
+ [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
+ [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
+ [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
+ [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
+ [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
+};
+
+const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
+ [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
+ [ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
+};
+
+const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
+ [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
+};
+
+const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
+ [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+ [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
+};
+
+#define __LINK_MODE_NAME(speed, type, duplex) \
+ #speed "base" #type "/" #duplex
+#define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \
+ [ETHTOOL_LINK_MODE(speed, type, duplex)] = \
+ __LINK_MODE_NAME(speed, type, duplex)
+#define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name
+
+const char link_mode_names[][ETH_GSTRING_LEN] = {
+ __DEFINE_LINK_MODE_NAME(10, T, Half),
+ __DEFINE_LINK_MODE_NAME(10, T, Full),
+ __DEFINE_LINK_MODE_NAME(100, T, Half),
+ __DEFINE_LINK_MODE_NAME(100, T, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T, Half),
+ __DEFINE_LINK_MODE_NAME(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"),
+ __DEFINE_SPECIAL_MODE_NAME(TP, "TP"),
+ __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"),
+ __DEFINE_SPECIAL_MODE_NAME(MII, "MII"),
+ __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"),
+ __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"),
+ __DEFINE_LINK_MODE_NAME(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"),
+ __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"),
+ __DEFINE_LINK_MODE_NAME(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"),
+ __DEFINE_LINK_MODE_NAME(1000, KX, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KX4, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KR, Full),
+ __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"),
+ __DEFINE_LINK_MODE_NAME(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_NAME(20000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(40000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(25000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(50000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(1000, X, Full),
+ __DEFINE_LINK_MODE_NAME(10000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LRM, Full),
+ __DEFINE_LINK_MODE_NAME(10000, ER, Full),
+ __DEFINE_LINK_MODE_NAME(2500, T, Full),
+ __DEFINE_LINK_MODE_NAME(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"),
+ __DEFINE_LINK_MODE_NAME(50000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100, T1, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T1, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR8, Full),
+};
+static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
new file mode 100644
index 000000000000..bbb788908cb1
--- /dev/null
+++ b/net/ethtool/common.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ETHTOOL_COMMON_H
+#define _ETHTOOL_COMMON_H
+
+#include <linux/ethtool.h>
+
+/* compose link mode index from speed, type and duplex */
+#define ETHTOOL_LINK_MODE(speed, type, duplex) \
+ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT
+
+extern const char
+netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
+extern const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN];
+extern const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char link_mode_names[][ETH_GSTRING_LEN];
+
+#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/core/ethtool.c b/net/ethtool/ioctl.c
index cd9bc67381b2..88f7cddf5a6f 100644
--- a/net/core/ethtool.c
+++ b/net/ethtool/ioctl.c
@@ -27,6 +27,8 @@
#include <net/xdp_sock.h>
#include <net/flow_offload.h>
+#include "common.h"
+
/*
* Some useful ethtool_ops methods that're device independent.
* If we find that all drivers want to do the same thing here,
@@ -54,88 +56,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info);
#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
-static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
- [NETIF_F_SG_BIT] = "tx-scatter-gather",
- [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
- [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
- [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
- [NETIF_F_HIGHDMA_BIT] = "highdma",
- [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
- [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
-
- [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
- [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
- [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
- [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
- [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
- [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
- [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
- [NETIF_F_GRO_BIT] = "rx-gro",
- [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
- [NETIF_F_LRO_BIT] = "rx-lro",
-
- [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
- [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
- [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
- [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
- [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
- [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
- [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
- [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
- [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
- [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
- [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
- [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
- [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
- [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
-
- [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
- [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
- [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
- [NETIF_F_RXHASH_BIT] = "rx-hashing",
- [NETIF_F_RXCSUM_BIT] = "rx-checksum",
- [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
- [NETIF_F_LOOPBACK_BIT] = "loopback",
- [NETIF_F_RXFCS_BIT] = "rx-fcs",
- [NETIF_F_RXALL_BIT] = "rx-all",
- [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
- [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
- [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
- [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
- [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
- [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
- [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
-};
-
-static const char
-rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
- [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
- [ETH_RSS_HASH_XOR_BIT] = "xor",
- [ETH_RSS_HASH_CRC32_BIT] = "crc32",
-};
-
-static const char
-tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
- [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
- [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
-};
-
-static const char
-phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
- [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
- [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
-};
-
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -234,6 +154,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
!ops->get_ethtool_phy_stats)
return phy_ethtool_get_sset_count(dev->phydev);
+ if (sset == ETH_SS_LINK_MODES)
+ return __ETHTOOL_LINK_MODE_MASK_NBITS;
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -258,6 +181,9 @@ static void __ethtool_get_strings(struct net_device *dev,
else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
!ops->get_ethtool_phy_stats)
phy_ethtool_get_strings(dev->phydev, data);
+ else if (stringset == ETH_SS_LINK_MODES)
+ memcpy(data, link_mode_names,
+ __ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN);
else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
@@ -2170,8 +2096,8 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GET_TS_INFO;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- err = phydev->drv->ts_info(phydev, &info);
+ if (phy_has_tsinfo(phydev)) {
+ err = phy_ts_info(phydev, &info);
} else if (ops->get_ts_info) {
err = ops->get_ts_info(dev, &info);
} else {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b9df9c09b84e..b92a42433a7d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -980,9 +980,12 @@ static struct key_vector *fib_find_node(struct trie *t,
/* Return the first fib alias matching TOS with
* priority less than or equal to PRIO.
+ * If 'find_first' is set, return the first matching
+ * fib alias, regardless of TOS and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id)
+ u8 tos, u32 prio, u32 tb_id,
+ bool find_first)
{
struct fib_alias *fa;
@@ -998,6 +1001,8 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
continue;
if (fa->tb_id != tb_id)
break;
+ if (find_first)
+ return fa;
if (fa->fa_tos > tos)
continue;
if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1063,9 +1068,6 @@ noleaf:
return -ENOMEM;
}
-/* fib notifier for ADD is sent before calling fib_insert_alias with
- * the expectation that the only possible failure ENOMEM
- */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1118,11 +1120,13 @@ static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
return true;
}
+static void fib_remove_alias(struct trie *t, struct key_vector *tp,
+ struct key_vector *l, struct fib_alias *old);
+
/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
{
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
@@ -1149,7 +1153,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
l = fib_find_node(t, &tp, key);
fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
- tb->tb_id) : NULL;
+ tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
* with the same keys [prefix,tos,priority], if such key already
@@ -1217,12 +1221,17 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- err = call_fib_entry_notifiers(net,
- FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa,
- extack);
- if (err)
- goto out_free_new_fa;
+ if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
+ tb->tb_id, true) == fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event,
+ key, plen,
+ new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+ }
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1244,12 +1253,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
if (fa_match)
goto out;
- if (cfg->fc_nlflags & NLM_F_APPEND) {
- event = FIB_EVENT_ENTRY_APPEND;
+ if (cfg->fc_nlflags & NLM_F_APPEND)
nlflags |= NLM_F_APPEND;
- } else {
+ else
fa = fa_first;
- }
}
err = -ENOENT;
if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1269,14 +1276,26 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
- if (err)
- goto out_free_new_fa;
-
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_fib_notif;
+ goto out_free_new_fa;
+
+ /* The alias was already inserted, so the node must exist. */
+ l = l ? l : fib_find_node(t, &tp, key);
+ if (WARN_ON_ONCE(!l))
+ goto out_free_new_fa;
+
+ if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
+ new_fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event, key, plen,
+ new_fa, extack);
+ if (err)
+ goto out_remove_new_fa;
+ }
if (!plen)
tb->tb_num_default++;
@@ -1287,14 +1306,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
succeeded:
return 0;
-out_fib_notif:
- /* notifier was sent that entry would be added to trie, but
- * the add failed and need to recover. Only failure for
- * fib_insert_alias is ENOMEM.
- */
- NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
- plen, new_fa, NULL);
+out_remove_new_fa:
+ fib_remove_alias(t, tp, l, new_fa);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1545,6 +1558,36 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
node_pull_suffix(tp, fa->fa_slen);
}
+static void fib_notify_alias_delete(struct net *net, u32 key,
+ struct hlist_head *fah,
+ struct fib_alias *fa_to_delete,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_alias *fa_next, *fa_to_notify;
+ u32 tb_id = fa_to_delete->tb_id;
+ u8 slen = fa_to_delete->fa_slen;
+ enum fib_event_type fib_event;
+
+ /* Do not notify if we do not care about the route. */
+ if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
+ return;
+
+ /* Determine if the route should be replaced by the next route in the
+ * list.
+ */
+ fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
+ struct fib_alias, fa_list);
+ if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ fa_to_notify = fa_next;
+ } else {
+ fib_event = FIB_EVENT_ENTRY_DEL;
+ fa_to_notify = fa_to_delete;
+ }
+ call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
+ fa_to_notify, extack);
+}
+
/* Caller must hold RTNL. */
int fib_table_delete(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
@@ -1566,7 +1609,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
+ fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
@@ -1598,8 +1641,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!fa_to_delete)
return -ESRCH;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete, extack);
+ fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1923,10 +1965,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
continue;
}
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
- n->key,
- KEYLENGTH - fa->fa_slen, fa,
- NULL);
+ fib_notify_alias_delete(net, n->key, &n->leaf, fa,
+ NULL);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -2022,6 +2062,7 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
struct netlink_ext_ack *extack)
{
struct fib_alias *fa;
+ int last_slen = -1;
int err;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
@@ -2036,8 +2077,12 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
if (tb->tb_id != fa->tb_id)
continue;
- err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen,
+ if (fa->fa_slen == last_slen)
+ continue;
+
+ last_slen = fa->fa_slen;
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
+ l->key, KEYLENGTH - fa->fa_slen,
fa, extack);
if (err)
return err;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index af154977904c..f11e997e517b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -911,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
num = 0;
ilb = &hashinfo->listening_hash[i];
spin_lock(&ilb->lock);
- sk_for_each(sk, &ilb->head) {
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 83fb00153018..2bbaaf0c7176 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struct sock *sk,
struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
+ const struct hlist_nulls_node *node;
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
- sk_for_each_rcu(sk2, &ilb->head) {
+ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
if (sk2 != sk &&
sk2->sk_family == sk->sk_family &&
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
@@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
- hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
+ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
else
- hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
inet_hash2(hashinfo, sk);
ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk)
reuseport_detach_sock(sk);
if (ilb) {
inet_unhash2(hashinfo, sk);
- __sk_del_node_init(sk);
- ilb->count--;
- } else {
- __sk_nulls_del_node_init_rcu(sk);
+ ilb->count--;
}
+ __sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
unlock:
spin_unlock_bh(lock);
@@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_HEAD(&h->listening_hash[i].head);
+ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
h->listening_hash[i].count = 0;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 572b6307a2df..8274f98c511c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1464,8 +1464,8 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index cfb025606793..9b153c7fcbb4 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -580,8 +580,8 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_LINK] = { .type = NLA_U32 },
[IFLA_VTI_IKEY] = { .type = NLA_U32 },
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
- [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VTI_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_VTI_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 09e2cae92956..f09fbc85b108 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1085,8 +1085,7 @@ do_error:
goto out;
out_err:
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
- err == -EAGAIN)) {
+ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
@@ -1417,8 +1416,7 @@ out_err:
sock_zerocopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
- err == -EAGAIN)) {
+ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
@@ -1778,6 +1776,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
if (skb) {
+ if (zc->recv_skip_hint > 0)
+ break;
skb = skb->next;
offset = seq - TCP_SKB_CB(skb)->seq;
} else {
@@ -3947,7 +3947,7 @@ void __init tcp_init(void)
BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 32772d6ded4e..a6545ef0d27b 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
/* Sort of tcp_tso_autosize() but ignoring
* driver provided sk_gso_max_size.
*/
- bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ bytes = min_t(unsigned long,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 26637fce324d..6f52e5288a6f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2147,13 +2147,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
spin_lock(&ilb->lock);
- sk = sk_head(&ilb->head);
+ sk = sk_nulls_head(&ilb->nulls_head);
st->offset = 0;
goto get_sk;
}
@@ -2161,9 +2162,9 @@ get_head:
++st->num;
++st->offset;
- sk = sk_next(sk);
+ sk = sk_nulls_next(sk);
get_sk:
- sk_for_each_from(sk) {
+ sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == afinfo->family)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b184f03d7437..1f7735ca8f22 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1725,7 +1725,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
u32 bytes, segs;
bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
@@ -2260,7 +2260,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
- sk->sk_pacing_rate >> sk->sk_pacing_shift);
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
@@ -2438,6 +2438,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (tcp_small_queue_check(sk, skb, 0))
break;
+ /* Argh, we hit an empty skb(), presumably a thread
+ * is sleeping in sendmsg()/sk_stream_wait_memory().
+ * We do not want to send a pure-ack packet and have
+ * a strange looking rtx queue with empty packet(s).
+ */
+ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
+ break;
+
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -3121,7 +3129,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
*/
void tcp_send_fin(struct sock *sk)
{
- struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
+ struct sk_buff *skb, *tskb, *tail = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* Optimization, tack on the FIN if we have one skb in write queue and
@@ -3129,6 +3137,7 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
+ tskb = tail;
if (!tskb && tcp_under_memory_pressure(sk))
tskb = skb_rb_last(&sk->tcp_rtx_queue);
@@ -3136,7 +3145,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (tcp_write_queue_empty(sk)) {
+ if (!tail) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 98d82305d6de..39d861d00377 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5231,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+
ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
return -EINVAL;
}
- if (!netlink_strict_get_check(skb))
- return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
-
err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv6_policy, extack);
if (err)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7bae6a91b487..b1e9a10e1133 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb,
return call_fib6_notifier(nb, event_type, &info.info);
}
+static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ unsigned int nsiblings,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ .nsiblings = nsiblings,
+ };
+
+ return call_fib6_notifier(nb, event_type, &info.info);
+}
+
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
@@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info);
}
+int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ .nsiblings = rt->fib6_nsiblings,
+ };
+
+ rt->fib6_table->fib_seq++;
+ return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
+}
+
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
@@ -408,22 +434,29 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
- if (rt == arg->net->ipv6.fib6_null_entry)
+ enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ int err;
+
+ if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
- rt, arg->extack);
+
+ if (rt->fib6_nsiblings)
+ err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
+ rt,
+ rt->fib6_nsiblings,
+ arg->extack);
+ else
+ err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
+ arg->extack);
+
+ return err;
}
static int fib6_node_dump(struct fib6_walker *w)
{
- struct fib6_info *rt;
- int err = 0;
+ int err;
- for_each_fib6_walker_rt(w) {
- err = fib6_rt_dump(rt, w->args);
- if (err)
- break;
- }
+ err = fib6_rt_dump(w->leaf, w->args);
w->leaf = NULL;
return err;
}
@@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool notify_sibling_rt = false;
u16 nlflags = NLM_F_EXCL;
int err;
@@ -1130,6 +1164,7 @@ next_iter:
/* Find the first route that have the same metric */
sibling = leaf;
+ notify_sibling_rt = true;
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -1139,6 +1174,7 @@ next_iter:
}
sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
+ notify_sibling_rt = false;
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -1165,10 +1201,21 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- if (!info->skip_notify_kernel) {
+ /* The route should only be notified if it is the first
+ * route in the node or if it is added as a sibling
+ * route to the first route in the node.
+ */
+ if (!info->skip_notify_kernel &&
+ (notify_sibling_rt || ins == &fn->leaf)) {
+ enum fib_event_type fib_event;
+
+ if (notify_sibling_rt)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
+ fib_event, rt,
+ extack);
if (err) {
struct fib6_info *sibling, *next_sibling;
@@ -1212,7 +1259,7 @@ add:
return -ENOENT;
}
- if (!info->skip_notify_kernel) {
+ if (!info->skip_notify_kernel && ins == &fn->leaf) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1845,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct fib6_info __rcu **rtp, struct nl_info *info)
{
+ struct fib6_info *leaf, *replace_rt = NULL;
struct fib6_walker *w;
struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
+ bool notify_del = false;
RT6_TRACE("fib6_del_route\n");
+ /* If the deleted route is the first in the node and it is not part of
+ * a multipath route, then we need to replace it with the next route
+ * in the node, if exists.
+ */
+ leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (leaf == rt && !rt->fib6_nsiblings) {
+ if (rcu_access_pointer(rt->fib6_next))
+ replace_rt = rcu_dereference_protected(rt->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ else
+ notify_del = true;
+ }
+
/* Unlink it */
*rtp = rt->fib6_next;
rt->fib6_node = NULL;
@@ -1869,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
+ /* The route is deleted from a multipath route. If this
+ * multipath route is the first route in the node, then we need
+ * to emit a delete notification. Otherwise, we need to skip
+ * the notification.
+ */
+ if (rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf))
+ notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
@@ -1904,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
- if (!info->skip_notify_kernel)
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+ if (!info->skip_notify_kernel) {
+ if (notify_del)
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
+ rt, NULL);
+ else if (replace_rt)
+ call_fib6_entry_notifiers_replace(net, replace_rt);
+ }
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 923034c52ce4..9d0965252ddf 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -2170,8 +2170,8 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b59940416cb5..4b8659e077d3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3749,6 +3749,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
struct fib6_info *sibling, *next_sibling;
+ struct fib6_node *fn;
/* prefer to send a single notification with all hops */
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
@@ -3764,12 +3765,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1;
}
+ /* 'rt' points to the first sibling route. If it is not the
+ * leaf, then we do not need to send a notification. Otherwise,
+ * we need to check if the last sibling has a next route or not
+ * and emit a replace or delete notification, respectively.
+ */
info->skip_notify_kernel = 1;
- call_fib6_multipath_entry_notifiers(net,
- FIB_EVENT_ENTRY_DEL,
- rt,
- rt->fib6_nsiblings,
- NULL);
+ fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (rcu_access_pointer(fn->leaf) == rt) {
+ struct fib6_info *last_sibling, *replace_rt;
+
+ last_sibling = list_last_entry(&rt->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
+ replace_rt = rcu_dereference_protected(
+ last_sibling->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ if (replace_rt)
+ call_fib6_entry_notifiers_replace(net,
+ replace_rt);
+ else
+ call_fib6_multipath_entry_notifiers(net,
+ FIB_EVENT_ENTRY_DEL,
+ rt, rt->fib6_nsiblings,
+ NULL);
+ }
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
@@ -5017,12 +5038,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
+static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
+{
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool should_notify = false;
+ struct fib6_info *leaf;
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+
+ leaf = rcu_dereference(fn->leaf);
+ if (!leaf)
+ goto out;
+
+ if (rt == leaf ||
+ (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf)))
+ should_notify = true;
+out:
+ rcu_read_unlock();
+
+ return should_notify;
+}
+
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
- enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
@@ -5147,13 +5193,27 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++;
}
- event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
- err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
- rt_notif, nhn - 1, extack);
- if (err) {
- /* Delete all the siblings that were just added */
- err_nh = NULL;
- goto add_errout;
+ /* An in-kernel notification should only be sent in case the new
+ * multipath route is added as the first route in the node, or if
+ * it was appended to it. We pass 'rt_notif' since it is the first
+ * sibling and might allow us to skip some checks in the replace case.
+ */
+ if (ip6_route_mpath_should_notify(rt_notif)) {
+ enum fib_event_type fib_event;
+
+ if (rt_notif->fib6_nsiblings != nhn - 1)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+
+ err = call_fib6_multipath_entry_notifiers(info->nl_net,
+ fib_event, rt_notif,
+ nhn - 1, extack);
+ if (err) {
+ /* Delete all the siblings that were just added */
+ err_nh = NULL;
+ goto add_errout;
+ }
}
/* success ... tell user about new route */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ebb62a4ebe30..c4bdcbc84b07 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -50,7 +50,7 @@ static struct iucv_interface *pr_iucv;
static const u8 iprm_shutdown[8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
-#define TRGCLS_SIZE FIELD_SIZEOF(struct iucv_message, class)
+#define TRGCLS_SIZE sizeof_field(struct iucv_message, class)
#define __iucv_sock_wait(sk, condition, timeo, ret) \
do { \
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 204a8351efff..c29170e767a8 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP value */
+ !pdu->dsap; /* NULL DSAP value */
}
static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
@@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP */
+ !pdu->dsap; /* NULL DSAP */
}
static int llc_station_ac_send_xid_r(struct sk_buff *skb)
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 63cb0028b02d..9fc2968856c0 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -442,7 +442,7 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
return 0;
sband = hw->wiphy->bands[status->band];
- if (!sband || status->rate_idx > sband->n_bitrates)
+ if (!sband || status->rate_idx >= sband->n_bitrates)
return 0;
rate = &sband->bitrates[status->rate_idx];
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b3c9001d1f43..c80b1e163ea4 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -201,8 +201,6 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
s64 deficit[IEEE80211_NUM_ACS];
- u32 q_depth[IEEE80211_NUM_ACS];
- u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -214,6 +212,56 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
deficit[ac] = sta->airtime[ac].deficit;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ }
+
+ p += scnprintf(p, bufsz + buf - p,
+ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+ "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+ rx_airtime, tx_airtime, sta->airtime_weight,
+ deficit[0], deficit[1], deficit[2], deficit[3]);
+
+ rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return rv;
+}
+
+static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+ int ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->active_txq_lock[ac]);
+ sta->airtime[ac].rx_airtime = 0;
+ sta->airtime[ac].tx_airtime = 0;
+ sta->airtime[ac].deficit = sta->airtime_weight;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ }
+
+ return count;
+}
+STA_OPS_RW(airtime);
+
+static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+ size_t bufsz = 400;
+ char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+ u32 q_depth[IEEE80211_NUM_ACS];
+ u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
+ ssize_t rv;
+ int ac;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->active_txq_lock[ac]);
q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
spin_unlock_bh(&local->active_txq_lock[ac]);
@@ -221,12 +269,8 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
}
p += scnprintf(p, bufsz + buf - p,
- "RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n"
"Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n"
"Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n",
- rx_airtime, tx_airtime, sta->airtime_weight,
- deficit[0], deficit[1], deficit[2], deficit[3],
q_depth[0], q_depth[1], q_depth[2], q_depth[3],
q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1],
q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]),
@@ -236,11 +280,10 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
return rv;
}
-static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *ppos)
{
struct sta_info *sta = file->private_data;
- struct ieee80211_local *local = sta->sdata->local;
u32 ac, q_limit_l, q_limit_h;
char _buf[100] = {}, *buf = _buf;
@@ -251,7 +294,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
return -EFAULT;
buf[sizeof(_buf) - 1] = '\0';
- if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
+ if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
!= 3)
return -EINVAL;
@@ -261,17 +304,10 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
sta->airtime[ac].aql_limit_low = q_limit_l;
sta->airtime[ac].aql_limit_high = q_limit_h;
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
- sta->airtime[ac].rx_airtime = 0;
- sta->airtime[ac].tx_airtime = 0;
- sta->airtime[ac].deficit = sta->airtime_weight;
- spin_unlock_bh(&local->active_txq_lock[ac]);
- }
-
return count;
}
-STA_OPS_RW(airtime);
+STA_OPS_RW(aql);
+
static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -996,6 +1032,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
DEBUGFS_ADD(airtime);
+ if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AQL))
+ DEBUGFS_ADD(aql);
+
debugfs_create_xul("driver_buffered_tids", 0400, sta->debugfs_dir,
&sta->driver_buffered_tids);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6cca0853f183..4c2b5ba3ac09 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -672,9 +672,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
}
- local->airtime_flags = AIRTIME_USE_TX |
- AIRTIME_USE_RX |
- AIRTIME_USE_AQL;
+ local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
local->aql_threshold = IEEE80211_AQL_THRESHOLD;
atomic_set(&local->aql_total_pending_airtime, 0);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8eafd81e97b4..0f5f40678885 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1916,6 +1916,9 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
{
int tx_pending;
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+ return;
+
if (!tx_completed) {
if (sta)
atomic_add(tx_airtime,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ad5d8a4ae56d..c00e28585f9d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -127,7 +127,6 @@ enum ieee80211_agg_stop_reason {
/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
-#define AIRTIME_USE_AQL BIT(2)
struct airtime_info {
u64 rx_airtime;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b696b9136f4c..a8a7306a1f56 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2256,6 +2256,15 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
payload[7]);
}
+ /*
+ * Initialize skb->priority for QoS frames. This is put in the TID field
+ * of the frame before passing it to the driver.
+ */
+ if (ieee80211_is_data_qos(hdr->frame_control)) {
+ u8 *p = ieee80211_get_qos_ctl(hdr);
+ skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK;
+ }
+
memset(info, 0, sizeof(*info));
info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
@@ -3668,7 +3677,7 @@ begin:
IEEE80211_SKB_CB(skb)->control.vif = vif;
- if (local->airtime_flags & AIRTIME_USE_AQL) {
+ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
u32 airtime;
airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
@@ -3790,7 +3799,7 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct sta_info *sta;
struct ieee80211_local *local = hw_to_local(hw);
- if (!(local->airtime_flags & AIRTIME_USE_AQL))
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
return true;
if (!txq->sta)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0af1898af2b8..f475fec84536 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -895,9 +895,10 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
/* Resolve race on insertion if this protocol allows this. */
-static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+static __cold noinline int
+nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_tuple_hash *h)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d8d33ef52ce0..6a1c8f1f6171 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3626,6 +3626,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
list_for_each_entry(net, net_exit_list, exit_list)
ctnetlink_net_exit(net);
+
+ /* wait for other cpus until they are done with ctnl_notifiers */
+ synchronize_rcu();
}
static struct pernet_operations ctnetlink_net_ops = {
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index c54c9a6cc981..de7a0d1e15c8 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -28,6 +28,7 @@ struct nf_flow_key {
struct flow_dissector_key_basic basic;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ports tp;
@@ -57,6 +58,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
@@ -69,9 +71,18 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->ipv4.dst = tuple->dst_v4.s_addr;
mask->ipv4.dst = 0xffffffff;
break;
+ case AF_INET6:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IPV6);
+ key->ipv6.src = tuple->src_v6;
+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
+ key->ipv6.dst = tuple->dst_v6;
+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
+ break;
default:
return -EOPNOTSUPP;
}
+ match->dissector.used_keys |= BIT(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
@@ -96,14 +107,13 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_BASIC) |
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_PORTS);
return 0;
}
static void flow_offload_mangle(struct flow_action_entry *entry,
- enum flow_action_mangle_base htype,
- u32 offset, u8 *value, u8 *mask)
+ enum flow_action_mangle_base htype, u32 offset,
+ const __be32 *value, const __be32 *mask)
{
entry->id = FLOW_ACTION_MANGLE;
entry->mangle.htype = htype;
@@ -140,12 +150,12 @@ static int flow_offload_eth_src(struct net *net,
memcpy(&val16, dev->dev_addr, 2);
val = val16 << 16;
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
mask = ~0xffffffff;
memcpy(&val, dev->dev_addr + 2, 4);
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
dev_put(dev);
return 0;
@@ -170,13 +180,13 @@ static int flow_offload_eth_dst(struct net *net,
mask = ~0xffffffff;
memcpy(&val, n->ha, 4);
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
mask = ~0x0000ffff;
memcpy(&val16, n->ha + 4, 2);
val = val16;
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
neigh_release(n);
return 0;
@@ -206,7 +216,7 @@ static void flow_offload_ipv4_snat(struct net *net,
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
- (u8 *)&addr, (u8 *)&mask);
+ &addr, &mask);
}
static void flow_offload_ipv4_dnat(struct net *net,
@@ -233,12 +243,12 @@ static void flow_offload_ipv4_dnat(struct net *net,
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
- (u8 *)&addr, (u8 *)&mask);
+ &addr, &mask);
}
static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
unsigned int offset,
- u8 *addr, u8 *mask)
+ const __be32 *addr, const __be32 *mask)
{
struct flow_action_entry *entry;
int i;
@@ -246,8 +256,7 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
entry = flow_action_entry_next(flow_rule);
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
- offset + i,
- &addr[i], mask);
+ offset + i, &addr[i], mask);
}
}
@@ -257,23 +266,23 @@ static void flow_offload_ipv6_snat(struct net *net,
struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
- const u8 *addr;
+ const __be32 *addr;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, saddr);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, daddr);
break;
default:
return;
}
- flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
static void flow_offload_ipv6_dnat(struct net *net,
@@ -282,23 +291,23 @@ static void flow_offload_ipv6_dnat(struct net *net,
struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
- const u8 *addr;
+ const __be32 *addr;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, daddr);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, saddr);
break;
default:
return;
}
- flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
static int flow_offload_l4proto(const struct flow_offload *flow)
@@ -326,25 +335,24 @@ static void flow_offload_port_snat(struct net *net,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
- u32 mask = ~htonl(0xffff0000);
- __be16 port;
+ u32 mask = ~htonl(0xffff0000), port;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
offset = 0; /* offsetof(struct tcphdr, source); */
break;
case FLOW_OFFLOAD_DIR_REPLY:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
offset = 0; /* offsetof(struct tcphdr, dest); */
break;
default:
- break;
+ return;
}
-
+ port = htonl(port << 16);
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
- (u8 *)&port, (u8 *)&mask);
+ &port, &mask);
}
static void flow_offload_port_dnat(struct net *net,
@@ -353,25 +361,24 @@ static void flow_offload_port_dnat(struct net *net,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
- u32 mask = ~htonl(0xffff);
- __be16 port;
+ u32 mask = ~htonl(0xffff), port;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
offset = 0; /* offsetof(struct tcphdr, source); */
break;
case FLOW_OFFLOAD_DIR_REPLY:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
offset = 0; /* offsetof(struct tcphdr, dest); */
break;
default:
- break;
+ return;
}
-
+ port = htonl(port);
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
- (u8 *)&port, (u8 *)&mask);
+ &port, &mask);
}
static void flow_offload_ipv4_checksum(struct net *net,
@@ -574,7 +581,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload,
cls_flow.rule = flow_rule->rule;
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
- err = block_cb->cb(TC_SETUP_FT, &cls_flow,
+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
block_cb->cb_priv);
if (err < 0)
continue;
@@ -599,7 +606,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload,
&offload->flow->tuplehash[dir].tuple, &extack);
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
- block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
}
@@ -656,7 +663,7 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload,
&offload->flow->tuplehash[dir].tuple, &extack);
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
- block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
memcpy(stats, &cls_flow.stats, sizeof(*stats));
}
@@ -822,7 +829,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, &bo);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a2b58de82600..f8f52ff99cfb 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -189,7 +189,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
goto err;
}
- if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) {
+ if (skb_dst(skb) && !skb_dst_force(skb)) {
status = -ENETDOWN;
goto err;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 062b73a83af0..273f3838318b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4519,8 +4519,10 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
err = -EINVAL;
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
+ nft_data_release(&elem.key.val, desc.type);
return err;
+ }
priv = set->ops->get(ctx->net, set, &elem, flags);
if (IS_ERR(priv))
@@ -4756,14 +4758,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
!(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
- if (nla[NFTA_SET_ELEM_DATA] != NULL &&
- flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
}
+ if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
+ (nla[NFTA_SET_ELEM_DATA] ||
+ nla[NFTA_SET_ELEM_OBJREF] ||
+ nla[NFTA_SET_ELEM_TIMEOUT] ||
+ nla[NFTA_SET_ELEM_EXPIRATION] ||
+ nla[NFTA_SET_ELEM_USERDATA] ||
+ nla[NFTA_SET_ELEM_EXPR]))
+ return -EINVAL;
+
timeout = 0;
if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
@@ -5476,7 +5484,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = nft_obj_type_get(net, objtype);
+ type = __nft_obj_type_get(objtype);
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -7595,7 +7603,7 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return -EINVAL;
if (len == 0)
return -EINVAL;
- if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
+ if (reg * NFT_REG32_SIZE + len > sizeof_field(struct nft_regs, data))
return -ERANGE;
return 0;
@@ -7643,7 +7651,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
if (len == 0)
return -EINVAL;
if (reg * NFT_REG32_SIZE + len >
- FIELD_SIZEOF(struct nft_regs, data))
+ sizeof_field(struct nft_regs, data))
return -ERANGE;
if (data != NULL && type != NFT_DATA_VALUE)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 431f3b803bfb..a9ea29afb09f 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -44,6 +44,9 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_next(expr);
}
+ if (num_actions == 0)
+ return ERR_PTR(-EOPNOTSUPP);
+
flow = nft_flow_rule_alloc(num_actions);
if (!flow)
return ERR_PTR(-ENOMEM);
@@ -577,6 +580,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
struct net *net = dev_net(dev);
struct nft_chain *chain;
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 7525063c25f5..de3a9596b7f1 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -236,7 +236,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
nla_strlcpy(helper->name,
tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN);
size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
- if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
+ if (size > sizeof_field(struct nf_conn_help, data)) {
ret = -ENOMEM;
goto err2;
}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 02afa752dd2e..10e9d50e4e19 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -80,7 +80,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
- if (d1.len != priv->len) {
+ if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) {
err = -EINVAL;
goto err1;
}
@@ -89,7 +89,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
goto err1;
- if (d2.len != priv->len) {
+ if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) {
err = -EINVAL;
goto err2;
}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index b8092069f868..8a28c127effc 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -81,6 +81,12 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
+ if (desc.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ nft_data_release(&priv->data, desc.type);
+ return err;
+ }
+
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
if (err < 0)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 46ca8bcca1bd..faea72c2df32 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -440,12 +440,12 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
switch (ctx->family) {
case NFPROTO_IPV4:
- len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ len = sizeof_field(struct nf_conntrack_tuple,
src.u3.ip);
break;
case NFPROTO_IPV6:
case NFPROTO_INET:
- len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ len = sizeof_field(struct nf_conntrack_tuple,
src.u3.ip6);
break;
default:
@@ -457,20 +457,20 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip);
break;
case NFT_CT_SRC_IP6:
case NFT_CT_DST_IP6:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip6);
break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u.all);
break;
case NFT_CT_BYTES:
case NFT_CT_PKTS:
@@ -551,7 +551,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
case NFT_CT_MARK:
if (tb[NFTA_CT_DIRECTION])
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conn, mark);
+ len = sizeof_field(struct nf_conn, mark);
break;
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 39dc94f2491e..bc9fd98c5d6d 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
struct nft_masq *priv = nft_expr_priv(expr);
int err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index c3c93e95b46e..8b44a4de5329 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -141,10 +141,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
switch (family) {
case NFPROTO_IPV4:
- alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip);
+ alen = sizeof_field(struct nf_nat_range, min_addr.ip);
break;
case NFPROTO_IPV6:
- alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
+ alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
break;
default:
return -EAFNOSUPPORT;
@@ -171,7 +171,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
}
}
- plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
priv->sreg_proto_min =
nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 4701fa8a45e7..89efcc5a533d 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -66,11 +66,21 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
if (err < 0)
return err;
+ if (desc_from.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ goto err1;
+ }
+
err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
&desc_to, tb[NFTA_RANGE_TO_DATA]);
if (err < 0)
goto err1;
+ if (desc_to.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ goto err2;
+ }
+
if (desc_from.len != desc_to.len) {
err = -EINVAL;
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 43eeb1f609f1..5b779171565c 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 57123259452f..a9f804f7a04a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -74,8 +74,13 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(parent->rb_left);
continue;
}
- if (nft_rbtree_interval_end(rbe))
- goto out;
+ if (nft_rbtree_interval_end(rbe)) {
+ if (nft_set_is_anonymous(set))
+ return false;
+ parent = rcu_dereference_raw(parent->rb_left);
+ interval = NULL;
+ continue;
+ }
*ext = &rbe->ext;
return true;
@@ -88,7 +93,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
*ext = &interval->ext;
return true;
}
-out:
+
return false;
}
@@ -139,8 +144,10 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
if (flags & NFT_SET_ELEM_INTERVAL_END)
interval = rbe;
} else {
- if (!nft_set_elem_active(&rbe->ext, genmask))
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = rcu_dereference_raw(parent->rb_left);
+ continue;
+ }
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
@@ -148,7 +155,11 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
*elem = rbe;
return true;
}
- return false;
+
+ if (nft_rbtree_interval_end(rbe))
+ interval = NULL;
+
+ parent = rcu_dereference_raw(parent->rb_left);
}
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index f92a82c73880..4c33dfc9dab5 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -218,14 +218,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
switch (priv->family) {
case NFPROTO_IPV4:
- alen = FIELD_SIZEOF(union nf_inet_addr, in);
+ alen = sizeof_field(union nf_inet_addr, in);
err = nf_defrag_ipv4_enable(ctx->net);
if (err)
return err;
break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
- alen = FIELD_SIZEOF(union nf_inet_addr, in6);
+ alen = sizeof_field(union nf_inet_addr, in6);
err = nf_defrag_ipv6_enable(ctx->net);
if (err)
return err;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 2236455b10a3..37253d399c6b 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -30,7 +30,7 @@ static unsigned int jhash_rnd __read_mostly;
static unsigned int xt_rateest_hash(const char *name)
{
- return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
+ return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) &
(RATEEST_HSIZE - 1);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 90b2ab9dd449..4e31721e7293 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2755,7 +2755,7 @@ static int __init netlink_proto_init(void)
if (err != 0)
goto out;
- BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
if (!nl_table)
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 78fe622eba65..11b554ce07ff 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -346,7 +346,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
nu->rx_packet_len = -1;
nu->rx_skb = nci_skb_alloc(nu->ndev,
NCI_MAX_PACKET_SIZE,
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!nu->rx_skb)
return -ENOMEM;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4c8395462303..7fbfe2adfffa 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -161,16 +161,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len);
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_mpls *mpls)
+ __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
{
int err;
- err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
- skb->mac_len,
- ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
+ err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
if (err)
return err;
+ if (!mac_len)
+ key->mac_proto = MAC_PROTO_NONE;
+
invalidate_flow_key(key);
return 0;
}
@@ -185,6 +186,9 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
if (err)
return err;
+ if (ethertype == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+
invalidate_flow_key(key);
return 0;
}
@@ -1229,10 +1233,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
- case OVS_ACTION_ATTR_PUSH_MPLS:
- err = push_mpls(skb, key, nla_data(a));
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, skb->mac_len);
break;
+ }
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ struct ovs_action_add_mpls *mpls = nla_data(a);
+ __u16 mac_len = 0;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
+ mac_len = skb->mac_len;
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, mac_len);
+ break;
+ }
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, key, nla_get_be16(a));
break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 1047e8043084..e3a37d22539c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2497,7 +2497,7 @@ static int __init dp_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fd8ed766bdd1..758a8c77f736 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,7 +37,7 @@ enum sw_flow_mac_proto {
* matching for small options.
*/
#define TUN_METADATA_OFFSET(opt_len) \
- (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+ (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@ -52,7 +52,7 @@ struct vlan_head {
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
- FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+ sizeof_field(struct sw_flow_key, recirc_id))
struct ovs_key_nsh {
struct ovs_nsh_key_base base;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 65c2e3458ff5..7da4230627f5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -79,6 +79,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_METER:
case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ case OVS_ACTION_ATTR_ADD_MPLS:
default:
return true;
}
@@ -3005,6 +3006,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_METER] = sizeof(u32),
[OVS_ACTION_ATTR_CLONE] = (u32)-1,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+ [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3072,6 +3074,33 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_RECIRC:
break;
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ const struct ovs_action_add_mpls *mpls = nla_data(a);
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
+ if (vlan_tci & htons(VLAN_CFI_MASK) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ mpls_label_count++;
+ } else {
+ if (mac_proto == MAC_PROTO_ETHERNET) {
+ mpls_label_count = 1;
+ mac_proto = MAC_PROTO_NONE;
+ } else {
+ mpls_label_count++;
+ }
+ }
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
@@ -3109,6 +3138,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
* recirculation.
*/
proto = nla_get_be16(a);
+
+ if (proto == htons(ETH_P_TEB) &&
+ mac_proto != MAC_PROTO_NONE)
+ return -EINVAL;
+
mpls_label_count--;
if (!eth_p_mpls(proto) || !mpls_label_count)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53c1d41fb1c9..3bec515ccde3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -520,7 +520,7 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
int blk_size_in_bytes)
{
struct net_device *dev;
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+ unsigned int mbits, div;
struct ethtool_link_ksettings ecmd;
int err;
@@ -532,30 +532,25 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
}
err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
- if (!err) {
- /*
- * If the link speed is so slow you don't really
- * need to worry about perf anyways
- */
- if (ecmd.base.speed < SPEED_1000 ||
- ecmd.base.speed == SPEED_UNKNOWN) {
- return DEFAULT_PRB_RETIRE_TOV;
- } else {
- msec = 1;
- div = ecmd.base.speed / 1000;
- }
- }
+ if (err)
+ return DEFAULT_PRB_RETIRE_TOV;
+ /* If the link speed is so slow you don't really
+ * need to worry about perf anyways
+ */
+ if (ecmd.base.speed < SPEED_1000 ||
+ ecmd.base.speed == SPEED_UNKNOWN)
+ return DEFAULT_PRB_RETIRE_TOV;
+
+ div = ecmd.base.speed / 1000;
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
if (div)
mbits /= div;
- tmo = mbits * msec;
-
if (div)
- return tmo+1;
- return tmo;
+ return mbits + 1;
+ return mbits;
}
static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 461d75274fb3..971c73c7d34c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1002,10 +1002,13 @@ static void rfkill_sync_work(struct work_struct *work)
int __must_check rfkill_register(struct rfkill *rfkill)
{
static unsigned long rfkill_no;
- struct device *dev = &rfkill->dev;
+ struct device *dev;
int error;
- BUG_ON(!rfkill);
+ if (!rfkill)
+ return -EINVAL;
+
+ dev = &rfkill->dev;
mutex_lock(&rfkill_global_mutex);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d72ddb67bb74..9d3c4d2d893a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -972,7 +972,7 @@ static int __init af_rxrpc_init(void)
int ret = -1;
unsigned int tmp;
- BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
get_random_bytes(&tmp, sizeof(tmp));
tmp &= 0x3fffffff;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2985509147a2..b1e7ec726958 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -409,6 +409,23 @@ config NET_SCH_PLUG
To compile this code as a module, choose M here: the
module will be called sch_plug.
+config NET_SCH_ETS
+ tristate "Enhanced transmission selection scheduler (ETS)"
+ help
+ The Enhanced Transmission Selection scheduler is a classful
+ queuing discipline that merges functionality of PRIO and DRR
+ qdiscs in one scheduler. ETS makes it easy to configure a set of
+ strict and bandwidth-sharing bands to implement the transmission
+ selection described in 802.1Qaz.
+
+ Say Y here if you want to use the ETS packet scheduling
+ algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_ets.
+
+ If unsure, say N.
+
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 415d1e1f237e..bc8856b865ff 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
+obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index bf2d69335d4b..f685c0d73708 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -312,7 +312,7 @@ static void tcf_ct_act_set_labels(struct nf_conn *ct,
u32 *labels_m)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
- size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels);
+ size_t labels_sz = sizeof_field(struct tcf_ct_params, labels);
if (!memchr_inv(labels_m, 0, labels_sz))
return;
@@ -936,7 +936,7 @@ static struct tc_action_ops act_ct_ops = {
static __net_init int ct_init_net(struct net *net)
{
- unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8;
+ unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
if (nf_connlabels_get(net, n_bits - 1)) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6c68971d99df..0d125de54285 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1481,7 +1481,7 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
-#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) sizeof_field(struct fl_flow_key, member)
#define FL_KEY_IS_MASKED(mask, member) \
memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index a0e6fac613de..66c6bcec16cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1108,10 +1108,33 @@ erridr:
return err;
}
+static bool u32_hnode_empty(struct tc_u_hnode *ht, bool *non_root_ht)
+{
+ int i;
+
+ if (!ht)
+ return true;
+ if (!ht->is_root) {
+ *non_root_ht = true;
+ return false;
+ }
+ if (*non_root_ht)
+ return false;
+ if (ht->refcnt < 2)
+ return true;
+
+ for (i = 0; i <= ht->divisor; i++) {
+ if (rtnl_dereference(ht->ht[i]))
+ return false;
+ }
+ return true;
+}
+
static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
struct tc_u_common *tp_c = tp->data;
+ bool non_root_ht = false;
struct tc_u_hnode *ht;
struct tc_u_knode *n;
unsigned int h;
@@ -1124,6 +1147,8 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
+ if (u32_hnode_empty(ht, &non_root_ht))
+ return;
if (arg->count >= arg->skip) {
if (arg->fn(tp, ht, arg) < 0) {
arg->stop = 1;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index e0f40400f679..6cc3ab145513 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -173,8 +173,7 @@ struct cake_tin_data {
u64 tin_rate_bps;
u16 tin_rate_shft;
- u16 tin_quantum_prio;
- u16 tin_quantum_band;
+ u16 tin_quantum;
s32 tin_deficit;
u32 tin_backlog;
u32 tin_dropped;
@@ -1919,7 +1918,7 @@ begin:
while (b->tin_deficit < 0 ||
!(b->sparse_flow_count + b->bulk_flow_count)) {
if (b->tin_deficit <= 0)
- b->tin_deficit += b->tin_quantum_band;
+ b->tin_deficit += b->tin_quantum;
if (b->sparse_flow_count + b->bulk_flow_count)
empty = false;
@@ -2241,8 +2240,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
cake_set_rate(b, rate, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- b->tin_quantum_band = 65535;
- b->tin_quantum_prio = 65535;
+ b->tin_quantum = 65535;
return 0;
}
@@ -2253,8 +2251,7 @@ static int cake_config_precedence(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
u64 rate = q->rate_bps;
- u32 quantum1 = 256;
- u32 quantum2 = 256;
+ u32 quantum = 256;
u32 i;
q->tin_cnt = 8;
@@ -2267,18 +2264,14 @@ static int cake_config_precedence(struct Qdisc *sch)
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
us_to_ns(q->interval));
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
+ b->tin_quantum = max_t(u16, 1U, quantum);
/* calculate next class's parameters */
rate *= 7;
rate >>= 3;
- quantum1 *= 3;
- quantum1 >>= 1;
-
- quantum2 *= 7;
- quantum2 >>= 3;
+ quantum *= 7;
+ quantum >>= 3;
}
return 0;
@@ -2347,8 +2340,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
u64 rate = q->rate_bps;
- u32 quantum1 = 256;
- u32 quantum2 = 256;
+ u32 quantum = 256;
u32 i;
q->tin_cnt = 8;
@@ -2364,18 +2356,14 @@ static int cake_config_diffserv8(struct Qdisc *sch)
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
us_to_ns(q->interval));
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
+ b->tin_quantum = max_t(u16, 1U, quantum);
/* calculate next class's parameters */
rate *= 7;
rate >>= 3;
- quantum1 *= 3;
- quantum1 >>= 1;
-
- quantum2 *= 7;
- quantum2 >>= 3;
+ quantum *= 7;
+ quantum >>= 3;
}
return 0;
@@ -2414,17 +2402,11 @@ static int cake_config_diffserv4(struct Qdisc *sch)
cake_set_rate(&q->tins[3], rate >> 2, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- /* priority weights */
- q->tins[0].tin_quantum_prio = quantum;
- q->tins[1].tin_quantum_prio = quantum >> 4;
- q->tins[2].tin_quantum_prio = quantum << 2;
- q->tins[3].tin_quantum_prio = quantum << 4;
-
/* bandwidth-sharing weights */
- q->tins[0].tin_quantum_band = quantum;
- q->tins[1].tin_quantum_band = quantum >> 4;
- q->tins[2].tin_quantum_band = quantum >> 1;
- q->tins[3].tin_quantum_band = quantum >> 2;
+ q->tins[0].tin_quantum = quantum;
+ q->tins[1].tin_quantum = quantum >> 4;
+ q->tins[2].tin_quantum = quantum >> 1;
+ q->tins[3].tin_quantum = quantum >> 2;
return 0;
}
@@ -2455,15 +2437,10 @@ static int cake_config_diffserv3(struct Qdisc *sch)
cake_set_rate(&q->tins[2], rate >> 2, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- /* priority weights */
- q->tins[0].tin_quantum_prio = quantum;
- q->tins[1].tin_quantum_prio = quantum >> 4;
- q->tins[2].tin_quantum_prio = quantum << 4;
-
/* bandwidth-sharing weights */
- q->tins[0].tin_quantum_band = quantum;
- q->tins[1].tin_quantum_band = quantum >> 4;
- q->tins[2].tin_quantum_band = quantum >> 2;
+ q->tins[0].tin_quantum = quantum;
+ q->tins[1].tin_quantum = quantum >> 4;
+ q->tins[2].tin_quantum = quantum >> 2;
return 0;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
new file mode 100644
index 000000000000..a87e9159338c
--- /dev/null
+++ b/net/sched/sch_ets.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * net/sched/sch_ets.c Enhanced Transmission Selection scheduler
+ *
+ * Description
+ * -----------
+ *
+ * The Enhanced Transmission Selection scheduler is a classful queuing
+ * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
+ * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
+ * implement the transmission selection described in 802.1Qaz.
+ *
+ * Although ETS is technically classful, it's not possible to add and remove
+ * classes at will. Instead one specifies number of classes, how many are
+ * PRIO-like and how many DRR-like, and quanta for the latter.
+ *
+ * Algorithm
+ * ---------
+ *
+ * The strict classes, if any, are tried for traffic first: first band 0, if it
+ * has no traffic then band 1, etc.
+ *
+ * When there is no traffic in any of the strict queues, the bandwidth-sharing
+ * ones are tried next. Each band is assigned a deficit counter, initialized to
+ * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
+ * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
+ * head of the list if the packet size is smaller or equal to the deficit
+ * counter. If the counter is too small, it is increased by "quantum" and the
+ * scheduler moves on to the next band in the active list.
+ */
+
+#include <linux/module.h>
+#include <net/gen_stats.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct ets_class {
+ struct list_head alist; /* In struct ets_sched.active. */
+ struct Qdisc *qdisc;
+ u32 quantum;
+ u32 deficit;
+ struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_queue qstats;
+};
+
+struct ets_sched {
+ struct list_head active;
+ struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
+ unsigned int nbands;
+ unsigned int nstrict;
+ u8 prio2band[TC_PRIO_MAX + 1];
+ struct ets_class classes[TCQ_ETS_MAX_BANDS];
+};
+
+static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_NBANDS] = { .type = NLA_U8 },
+ [TCA_ETS_NSTRICT] = { .type = NLA_U8 },
+ [TCA_ETS_QUANTA] = { .type = NLA_NESTED },
+ [TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
+};
+
+static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
+};
+
+static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
+ unsigned int *quantum,
+ struct netlink_ext_ack *extack)
+{
+ *quantum = nla_get_u32(attr);
+ if (!*quantum) {
+ NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct ets_class *
+ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+
+ return &q->classes[arg - 1];
+}
+
+static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band = cl - q->classes;
+
+ return TC_H_MAKE(sch->handle, band + 1);
+}
+
+static void ets_offload_change(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct tc_ets_qopt_offload qopt;
+ unsigned int w_psum_prev = 0;
+ unsigned int q_psum = 0;
+ unsigned int q_sum = 0;
+ unsigned int quantum;
+ unsigned int w_psum;
+ unsigned int weight;
+ unsigned int i;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_ETS_REPLACE;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.replace_params.bands = q->nbands;
+ qopt.replace_params.qstats = &sch->qstats;
+ memcpy(&qopt.replace_params.priomap,
+ q->prio2band, sizeof(q->prio2band));
+
+ for (i = 0; i < q->nbands; i++)
+ q_sum += q->classes[i].quantum;
+
+ for (i = 0; i < q->nbands; i++) {
+ quantum = q->classes[i].quantum;
+ q_psum += quantum;
+ w_psum = quantum ? q_psum * 100 / q_sum : 0;
+ weight = w_psum - w_psum_prev;
+ w_psum_prev = w_psum;
+
+ qopt.replace_params.quanta[i] = quantum;
+ qopt.replace_params.weights[i] = weight;
+ }
+
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static void ets_offload_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_ets_qopt_offload qopt;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_ETS_DESTROY;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
+ struct Qdisc *old, unsigned long arg,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_ets_qopt_offload qopt;
+
+ qopt.command = TC_ETS_GRAFT;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.graft_params.band = arg - 1;
+ qopt.graft_params.child_handle = new->handle;
+
+ qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
+ &qopt, extack);
+}
+
+static int ets_offload_dump(struct Qdisc *sch)
+{
+ struct tc_ets_qopt_offload qopt;
+
+ qopt.command = TC_ETS_STATS;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.stats.bstats = &sch->bstats;
+ qopt.stats.qstats = &sch->qstats;
+
+ return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
+{
+ unsigned int band = cl - q->classes;
+
+ return band < q->nstrict;
+}
+
+static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, *arg);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *opt = tca[TCA_OPTIONS];
+ struct nlattr *tb[TCA_ETS_MAX + 1];
+ unsigned int quantum;
+ int err;
+
+ /* Classes can be added and removed only through Qdisc_ops.change
+ * interface.
+ */
+ if (!cl) {
+ NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_ETS_QUANTA_BAND])
+ /* Nothing to configure. */
+ return 0;
+
+ if (ets_class_is_strict(q, cl)) {
+ NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
+ return -EINVAL;
+ }
+
+ err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
+ extack);
+ if (err)
+ return err;
+
+ sch_tree_lock(sch);
+ cl->quantum = quantum;
+ sch_tree_unlock(sch);
+
+ ets_offload_change(sch);
+ return 0;
+}
+
+static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ ets_class_id(sch, cl), NULL);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
+
+ *old = qdisc_replace(sch, new, &cl->qdisc);
+ ets_offload_graft(sch, new, *old, arg, extack);
+ return 0;
+}
+
+static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+
+ return cl->qdisc;
+}
+
+static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
+{
+ unsigned long band = TC_H_MIN(classid);
+ struct ets_sched *q = qdisc_priv(sch);
+
+ if (band - 1 >= q->nbands)
+ return 0;
+ return band;
+}
+
+static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct ets_sched *q = qdisc_priv(sch);
+
+ /* We get notified about zero-length child Qdiscs as well if they are
+ * offloaded. Those aren't on the active list though, so don't attempt
+ * to remove them.
+ */
+ if (!ets_class_is_strict(q, cl) && sch->q.qlen)
+ list_del(&cl->alist);
+}
+
+static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *nest;
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle = ets_class_id(sch, cl);
+ tcm->tcm_info = cl->qdisc->handle;
+
+ nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+ if (!ets_class_is_strict(q, cl)) {
+ if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
+ goto nla_put_failure;
+ }
+ return nla_nest_end(skb, nest);
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
+ struct gnet_dump *d)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct Qdisc *cl_q = cl->qdisc;
+
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < q->nbands; i++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, i + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct tcf_block *
+ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+
+ if (cl) {
+ NL_SET_ERR_MSG(extack, "ETS classid must be zero");
+ return NULL;
+ }
+
+ return q->block;
+}
+
+static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ return ets_class_find(sch, classid);
+}
+
+static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ u32 band = skb->priority;
+ struct tcf_result res;
+ struct tcf_proto *fl;
+ int err;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ if (TC_H_MAJ(skb->priority) != sch->handle) {
+ fl = rcu_dereference_bh(q->filter_list);
+ err = tcf_classify(skb, fl, &res, false);
+#ifdef CONFIG_NET_CLS_ACT
+ switch (err) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
+ case TC_ACT_SHOT:
+ return NULL;
+ }
+#endif
+ if (!fl || err < 0) {
+ if (TC_H_MAJ(band))
+ band = 0;
+ return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
+ }
+ band = res.classid;
+ }
+ band = TC_H_MIN(band) - 1;
+ if (band >= q->nbands)
+ return &q->classes[q->prio2band[0]];
+ return &q->classes[band];
+}
+
+static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ unsigned int len = qdisc_pkt_len(skb);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct ets_class *cl;
+ int err = 0;
+ bool first;
+
+ cl = ets_classify(skb, sch, &err);
+ if (!cl) {
+ if (err & __NET_XMIT_BYPASS)
+ qdisc_qstats_drop(sch);
+ __qdisc_drop(skb, to_free);
+ return err;
+ }
+
+ first = !cl->qdisc->q.qlen;
+ err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ if (unlikely(err != NET_XMIT_SUCCESS)) {
+ if (net_xmit_drop_count(err)) {
+ cl->qstats.drops++;
+ qdisc_qstats_drop(sch);
+ }
+ return err;
+ }
+
+ if (first && !ets_class_is_strict(q, cl)) {
+ list_add_tail(&cl->alist, &q->active);
+ cl->deficit = cl->quantum;
+ }
+
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+ return err;
+}
+
+static struct sk_buff *
+ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
+{
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+ return skb;
+}
+
+static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ struct ets_class *cl;
+ struct sk_buff *skb;
+ unsigned int band;
+ unsigned int len;
+
+ while (1) {
+ for (band = 0; band < q->nstrict; band++) {
+ cl = &q->classes[band];
+ skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (skb)
+ return ets_qdisc_dequeue_skb(sch, skb);
+ }
+
+ if (list_empty(&q->active))
+ goto out;
+
+ cl = list_first_entry(&q->active, struct ets_class, alist);
+ skb = cl->qdisc->ops->peek(cl->qdisc);
+ if (!skb) {
+ qdisc_warn_nonwc(__func__, cl->qdisc);
+ goto out;
+ }
+
+ len = qdisc_pkt_len(skb);
+ if (len <= cl->deficit) {
+ cl->deficit -= len;
+ skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (unlikely(!skb))
+ goto out;
+ if (cl->qdisc->q.qlen == 0)
+ list_del(&cl->alist);
+ return ets_qdisc_dequeue_skb(sch, skb);
+ }
+
+ cl->deficit += cl->quantum;
+ list_move_tail(&cl->alist, &q->active);
+ }
+out:
+ return NULL;
+}
+
+static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
+ unsigned int nbands, u8 *priomap,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *attr;
+ int prio = 0;
+ u8 band;
+ int rem;
+ int err;
+
+ err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
+ ets_priomap_policy, NL_VALIDATE_STRICT,
+ extack);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, priomap_attr, rem) {
+ switch (nla_type(attr)) {
+ case TCA_ETS_PRIOMAP_BAND:
+ if (prio > TC_PRIO_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
+ return -EINVAL;
+ }
+ band = nla_get_u8(attr);
+ if (band >= nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
+ return -EINVAL;
+ }
+ priomap[prio++] = band;
+ break;
+ default:
+ WARN_ON_ONCE(1); /* Validate should have caught this. */
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
+ unsigned int nbands, unsigned int nstrict,
+ unsigned int *quanta,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *attr;
+ int band = nstrict;
+ int rem;
+ int err;
+
+ err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
+ ets_quanta_policy, NL_VALIDATE_STRICT,
+ extack);
+ if (err < 0)
+ return err;
+
+ nla_for_each_nested(attr, quanta_attr, rem) {
+ switch (nla_type(attr)) {
+ case TCA_ETS_QUANTA_BAND:
+ if (band >= nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
+ return -EINVAL;
+ }
+ err = ets_quantum_parse(sch, attr, &quanta[band++],
+ extack);
+ if (err)
+ return err;
+ break;
+ default:
+ WARN_ON_ONCE(1); /* Validate should have caught this. */
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
+ struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_ETS_MAX + 1];
+ unsigned int oldbands = q->nbands;
+ u8 priomap[TC_PRIO_MAX + 1];
+ unsigned int nstrict = 0;
+ unsigned int nbands;
+ unsigned int i;
+ int err;
+
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_ETS_NBANDS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
+ return -EINVAL;
+ }
+ nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
+ if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
+ return -EINVAL;
+ }
+ /* Unless overridden, traffic goes to the last band. */
+ memset(priomap, nbands - 1, sizeof(priomap));
+
+ if (tb[TCA_ETS_NSTRICT]) {
+ nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
+ if (nstrict > nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
+ return -EINVAL;
+ }
+ }
+
+ if (tb[TCA_ETS_PRIOMAP]) {
+ err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
+ nbands, priomap, extack);
+ if (err)
+ return err;
+ }
+
+ if (tb[TCA_ETS_QUANTA]) {
+ err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
+ nbands, nstrict, quanta, extack);
+ if (err)
+ return err;
+ }
+ /* If there are more bands than strict + quanta provided, the remaining
+ * ones are ETS with quantum of MTU. Initialize the missing values here.
+ */
+ for (i = nstrict; i < nbands; i++) {
+ if (!quanta[i])
+ quanta[i] = psched_mtu(qdisc_dev(sch));
+ }
+
+ /* Before commit, make sure we can allocate all new qdiscs */
+ for (i = oldbands; i < nbands; i++) {
+ queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ ets_class_id(sch, &q->classes[i]),
+ extack);
+ if (!queues[i]) {
+ while (i > oldbands)
+ qdisc_put(queues[--i]);
+ return -ENOMEM;
+ }
+ }
+
+ sch_tree_lock(sch);
+
+ q->nbands = nbands;
+ q->nstrict = nstrict;
+ memcpy(q->prio2band, priomap, sizeof(priomap));
+
+ for (i = q->nbands; i < oldbands; i++)
+ qdisc_tree_flush_backlog(q->classes[i].qdisc);
+
+ for (i = 0; i < q->nbands; i++)
+ q->classes[i].quantum = quanta[i];
+
+ for (i = oldbands; i < q->nbands; i++) {
+ q->classes[i].qdisc = queues[i];
+ if (q->classes[i].qdisc != &noop_qdisc)
+ qdisc_hash_add(q->classes[i].qdisc, true);
+ }
+
+ sch_tree_unlock(sch);
+
+ ets_offload_change(sch);
+ for (i = q->nbands; i < oldbands; i++) {
+ qdisc_put(q->classes[i].qdisc);
+ memset(&q->classes[i], 0, sizeof(q->classes[i]));
+ }
+ return 0;
+}
+
+static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&q->active);
+ return ets_qdisc_change(sch, opt, extack);
+}
+
+static void ets_qdisc_reset(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band;
+
+ for (band = q->nstrict; band < q->nbands; band++) {
+ if (q->classes[band].qdisc->q.qlen)
+ list_del(&q->classes[band].alist);
+ }
+ for (band = 0; band < q->nbands; band++)
+ qdisc_reset(q->classes[band].qdisc);
+ sch->qstats.backlog = 0;
+ sch->q.qlen = 0;
+}
+
+static void ets_qdisc_destroy(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band;
+
+ ets_offload_destroy(sch);
+ tcf_block_put(q->block);
+ for (band = 0; band < q->nbands; band++)
+ qdisc_put(q->classes[band].qdisc);
+}
+
+static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *opts;
+ struct nlattr *nest;
+ int band;
+ int prio;
+ int err;
+
+ err = ets_offload_dump(sch);
+ if (err)
+ return err;
+
+ opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!opts)
+ goto nla_err;
+
+ if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+ goto nla_err;
+
+ if (q->nstrict &&
+ nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+ goto nla_err;
+
+ if (q->nbands > q->nstrict) {
+ nest = nla_nest_start(skb, TCA_ETS_QUANTA);
+ if (!nest)
+ goto nla_err;
+
+ for (band = q->nstrict; band < q->nbands; band++) {
+ if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
+ q->classes[band].quantum))
+ goto nla_err;
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
+ nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
+ if (!nest)
+ goto nla_err;
+
+ for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
+ if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+ goto nla_err;
+ }
+
+ nla_nest_end(skb, nest);
+
+ return nla_nest_end(skb, opts);
+
+nla_err:
+ nla_nest_cancel(skb, opts);
+ return -EMSGSIZE;
+}
+
+static const struct Qdisc_class_ops ets_class_ops = {
+ .change = ets_class_change,
+ .graft = ets_class_graft,
+ .leaf = ets_class_leaf,
+ .find = ets_class_find,
+ .qlen_notify = ets_class_qlen_notify,
+ .dump = ets_class_dump,
+ .dump_stats = ets_class_dump_stats,
+ .walk = ets_qdisc_walk,
+ .tcf_block = ets_qdisc_tcf_block,
+ .bind_tcf = ets_qdisc_bind_tcf,
+ .unbind_tcf = ets_qdisc_unbind_tcf,
+};
+
+static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
+ .cl_ops = &ets_class_ops,
+ .id = "ets",
+ .priv_size = sizeof(struct ets_sched),
+ .enqueue = ets_qdisc_enqueue,
+ .dequeue = ets_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .change = ets_qdisc_change,
+ .init = ets_qdisc_init,
+ .reset = ets_qdisc_reset,
+ .destroy = ets_qdisc_destroy,
+ .dump = ets_qdisc_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init ets_init(void)
+{
+ return register_qdisc(&ets_qdisc_ops);
+}
+
+static void __exit ets_exit(void)
+{
+ unregister_qdisc(&ets_qdisc_ops);
+}
+
+module_init(ets_init);
+module_exit(ets_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5ab696efca95..6c9595f1048a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -441,7 +441,7 @@ static void dev_watchdog(struct timer_list *t)
trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
- dev->netdev_ops->ndo_tx_timeout(dev);
+ dev->netdev_ops->ndo_tx_timeout(dev, i);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a031d1134c60..6b0b3bad4daa 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -36,6 +36,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
+#include <trace/events/sctp.h>
/* Declare internal functions here. */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -1238,6 +1239,11 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
/* Grab the association's destination address list. */
transport_list = &asoc->peer.transport_addr_list;
+ /* SCTP path tracepoint for congestion control debugging. */
+ list_for_each_entry(transport, transport_list, transports) {
+ trace_sctp_probe_path(transport, asoc);
+ }
+
sack_ctsn = ntohl(sack->cum_tsn_ack);
gap_ack_blocks = ntohs(sack->num_gap_ack_blocks);
asoc->stats.gapcnt += gap_ack_blocks;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fbbf19128c2d..78af2fcf90cc 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -227,6 +227,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
sa->sin_port = sh->dest;
sa->sin_addr.s_addr = ip_hdr(skb)->daddr;
}
+ memset(sa->sin_zero, 0, sizeof(sa->sin_zero));
}
/* Initialize an sctp_addr from a socket. */
@@ -235,6 +236,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = 0;
addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -257,6 +259,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -281,6 +284,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4,
saddr->v4.sin_family = AF_INET;
saddr->v4.sin_port = port;
saddr->v4.sin_addr.s_addr = fl4->saddr;
+ memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero));
}
/* Compare two addresses exactly. */
@@ -303,6 +307,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port)
addr->v4.sin_family = AF_INET;
addr->v4.sin_addr.s_addr = htonl(INADDR_ANY);
addr->v4.sin_port = port;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Is this a wildcard address? */
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index df60b5ef24cb..e0b01bf912b3 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -84,8 +84,10 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
return 0;
ret = genradix_prealloc(&stream->out, outcnt, gfp);
- if (ret)
+ if (ret) {
+ genradix_free(&stream->out);
return ret;
+ }
stream->outcnt = outcnt;
return 0;
@@ -100,8 +102,10 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
return 0;
ret = genradix_prealloc(&stream->in, incnt, gfp);
- if (ret)
+ if (ret) {
+ genradix_free(&stream->in);
return ret;
+ }
stream->incnt = incnt;
return 0;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b997072c72e5..cee5bf4a9bb9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -857,6 +857,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out;
sock_hold(&smc->sk); /* sock put in passive closing */
+ if (smc->use_fallback)
+ goto out;
if (flags & O_NONBLOCK) {
if (schedule_work(&smc->connect_work))
smc->connect_nonblock = 1;
@@ -1721,8 +1723,6 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk->sk_error_report(sk);
}
- if (rc)
- return rc;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1730,6 +1730,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
lock_sock(sk);
+ if (rc || smc->use_fallback)
+ goto out;
switch (optname) {
case TCP_ULP:
case TCP_FASTOPEN:
@@ -1741,15 +1743,14 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
- if (!smc->use_fallback)
- rc = -EINVAL;
+ rc = -EINVAL;
}
break;
case TCP_NODELAY:
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (val && !smc->use_fallback)
+ if (val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
@@ -1758,7 +1759,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (!val && !smc->use_fallback)
+ if (!val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
@@ -1769,6 +1770,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
default:
break;
}
+out:
release_sock(sk);
return rc;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bb92c7c6214c..2249de5379ee 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -41,7 +41,7 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.num = 0,
};
-static atomic_t lgr_cnt; /* number of existing link groups */
+static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1287,7 +1287,7 @@ static int smc_core_reboot_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
smc_lgrs_shutdown();
-
+ smc_ib_unregister_client();
return 0;
}
@@ -1297,7 +1297,6 @@ static struct notifier_block smc_reboot_notifier = {
int __init smc_core_init(void)
{
- atomic_set(&lgr_cnt, 0);
return register_reboot_notifier(&smc_reboot_notifier);
}
diff --git a/net/socket.c b/net/socket.c
index 4d38d49d6ad9..51bf34995bcb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -128,6 +128,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+static void sock_show_fdinfo(struct seq_file *m, struct file *f);
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -150,6 +151,9 @@ static const struct file_operations socket_file_ops = {
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = sock_show_fdinfo,
+#endif
};
/*
@@ -957,7 +961,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
.msg_iocb = iocb};
ssize_t res;
- if (file->f_flags & O_NONBLOCK)
+ if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0)
@@ -982,7 +986,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_pos != 0)
return -ESPIPE;
- if (file->f_flags & O_NONBLOCK)
+ if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (sock->type == SOCK_SEQPACKET)
@@ -993,6 +997,14 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
return res;
}
+static void sock_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct socket *sock = f->private_data;
+
+ if (sock->ops->show_fdinfo)
+ sock->ops->show_fdinfo(m, sock);
+}
+
/*
* Atomic setting of ioctl hooks to avoid race
* with module unload.
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 42e01e9cf893..4c20be08b9c4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -305,17 +305,17 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
* @skb: socket buffer to copy
* @method: send method to be used
* @dests: destination nodes for message.
- * @cong_link_cnt: returns number of encountered congested destination links
* Returns 0 if success, otherwise errno
*/
static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
struct tipc_mc_method *method,
- struct tipc_nlist *dests,
- u16 *cong_link_cnt)
+ struct tipc_nlist *dests)
{
struct tipc_msg *hdr, *_hdr;
struct sk_buff_head tmpq;
struct sk_buff *_skb;
+ u16 cong_link_cnt;
+ int rc = 0;
/* Is a cluster supporting with new capabilities ? */
if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
@@ -343,18 +343,19 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
_hdr = buf_msg(_skb);
msg_set_size(_hdr, MCAST_H_SIZE);
msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+ msg_set_errcode(_hdr, TIPC_ERR_NO_PORT);
__skb_queue_head_init(&tmpq);
__skb_queue_tail(&tmpq, _skb);
if (method->rcast)
- tipc_bcast_xmit(net, &tmpq, cong_link_cnt);
+ rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt);
else
- tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt);
+ rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt);
/* This queue should normally be empty by now */
__skb_queue_purge(&tmpq);
- return 0;
+ return rc;
}
/* tipc_mcast_xmit - deliver message to indicated destination nodes
@@ -396,9 +397,14 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
msg_set_is_rcast(hdr, method->rcast);
/* Switch method ? */
- if (rcast != method->rcast)
- tipc_mcast_send_sync(net, skb, method,
- dests, cong_link_cnt);
+ if (rcast != method->rcast) {
+ rc = tipc_mcast_send_sync(net, skb, method, dests);
+ if (unlikely(rc)) {
+ pr_err("Unable to send SYN: method %d, rc %d\n",
+ rcast, rc);
+ goto exit;
+ }
+ }
if (method->rcast)
rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index b043e8c6397a..bfe43da127c0 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -194,6 +194,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
{
struct tipc_net *tn = tipc_net(net);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 pnet_hash = msg_peer_net_hash(hdr);
u16 caps = msg_node_capabilities(hdr);
bool legacy = tn->legacy_addr_format;
u32 sugg = msg_sugg_node_addr(hdr);
@@ -242,9 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
return;
if (!tipc_in_scope(legacy, b->domain, src))
return;
- tipc_node_check_dest(net, src, peer_id, b, caps, signature,
- msg_peer_net_hash(hdr), &maddr, &respond,
- &dupl_addr);
+ tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash,
+ &maddr, &respond, &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 94dd48cd70a3..467c53a1fb5c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -250,7 +250,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static int tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap);
static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
struct sk_buff_head *xmitq);
@@ -1423,14 +1423,14 @@ static int tipc_link_release_pkts(struct tipc_link *l, u16 acked)
*
* returns the actual allocated memory size
*/
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data)
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
{
struct sk_buff *skb = skb_peek(&l->deferdq);
struct tipc_gap_ack_blks *ga = data;
u16 len, expect, seqno = 0;
u8 n = 0;
- if (!skb)
+ if (!skb || !gap)
goto exit;
expect = buf_seqno(skb);
@@ -1739,7 +1739,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_probe(hdr, probe);
msg_set_is_keepalive(hdr, probe || probe_reply);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
- glen = tipc_build_gap_ack_blks(l, data);
+ glen = tipc_build_gap_ack_blks(l, data, rcvgap);
tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + glen + dlen);
skb_trim(skb, INT_H_SIZE + glen + dlen);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 92d04dc2a44b..359b2bc888cf 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -36,6 +36,7 @@
#include <net/sock.h>
#include <linux/list_sort.h>
+#include <linux/rbtree_augmented.h>
#include "core.h"
#include "netlink.h"
#include "name_table.h"
@@ -51,6 +52,7 @@
* @lower: service range lower bound
* @upper: service range upper bound
* @tree_node: member of service range RB tree
+ * @max: largest 'upper' in this node subtree
* @local_publ: list of identical publications made from this node
* Used by closest_first lookup and multicast lookup algorithm
* @all_publ: all publications identical to this one, whatever node and scope
@@ -60,6 +62,7 @@ struct service_range {
u32 lower;
u32 upper;
struct rb_node tree_node;
+ u32 max;
struct list_head local_publ;
struct list_head all_publ;
};
@@ -84,6 +87,130 @@ struct tipc_service {
struct rcu_head rcu;
};
+#define service_range_upper(sr) ((sr)->upper)
+RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
+ struct service_range, tree_node, u32, max,
+ service_range_upper)
+
+#define service_range_entry(rbtree_node) \
+ (container_of(rbtree_node, struct service_range, tree_node))
+
+#define service_range_overlap(sr, start, end) \
+ ((sr)->lower <= (end) && (sr)->upper >= (start))
+
+/**
+ * service_range_foreach_match - iterate over tipc service rbtree for each
+ * range match
+ * @sr: the service range pointer as a loop cursor
+ * @sc: the pointer to tipc service which holds the service range rbtree
+ * @start, end: the range (end >= start) for matching
+ */
+#define service_range_foreach_match(sr, sc, start, end) \
+ for (sr = service_range_match_first((sc)->ranges.rb_node, \
+ start, \
+ end); \
+ sr; \
+ sr = service_range_match_next(&(sr)->tree_node, \
+ start, \
+ end))
+
+/**
+ * service_range_match_first - find first service range matching a range
+ * @n: the root node of service range rbtree for searching
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the leftmost service range node in the rbtree that overlaps the
+ * specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_first(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *l, *r;
+
+ /* Non overlaps in tree at all? */
+ if (!n || service_range_entry(n)->max < start)
+ return NULL;
+
+ while (n) {
+ l = n->rb_left;
+ if (l && service_range_entry(l)->max >= start) {
+ /* A leftmost overlap range node must be one in the left
+ * subtree. If not, it has lower > end, then nodes on
+ * the right side cannot satisfy the condition either.
+ */
+ n = l;
+ continue;
+ }
+
+ /* No one in the left subtree can match, return if this node is
+ * an overlap i.e. leftmost.
+ */
+ sr = service_range_entry(n);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup on the right side */
+ r = n->rb_right;
+ if (sr->lower <= end &&
+ r && service_range_entry(r)->max >= start) {
+ n = r;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * service_range_match_next - find next service range matching a range
+ * @n: a node in service range rbtree from which the searching starts
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the next service range node to the given node in the rbtree that
+ * overlaps the specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_next(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *p, *r;
+
+ while (n) {
+ r = n->rb_right;
+ if (r && service_range_entry(r)->max >= start)
+ /* A next overlap range node must be one in the right
+ * subtree. If not, it has lower > end, then any next
+ * successor (- an ancestor) of this node cannot
+ * satisfy the condition either.
+ */
+ return service_range_match_first(r, start, end);
+
+ /* No one in the right subtree can match, go up to find an
+ * ancestor of this node which is parent of a left-hand child.
+ */
+ while ((p = rb_parent(n)) && n == p->rb_right)
+ n = p;
+ if (!p)
+ break;
+
+ /* Return if this ancestor is an overlap */
+ sr = service_range_entry(p);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup more from this ancestor */
+ if (sr->lower <= end) {
+ n = p;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
static int hash(int x)
{
return x & (TIPC_NAMETBL_SIZE - 1);
@@ -139,84 +266,51 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
return service;
}
-/**
- * tipc_service_first_range - find first service range in tree matching instance
- *
- * Very time-critical, so binary search through range rb tree
- */
-static struct service_range *tipc_service_first_range(struct tipc_service *sc,
- u32 instance)
-{
- struct rb_node *n = sc->ranges.rb_node;
- struct service_range *sr;
-
- while (n) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > instance)
- n = n->rb_left;
- else if (sr->upper < instance)
- n = n->rb_right;
- else
- return sr;
- }
- return NULL;
-}
-
/* tipc_service_find_range - find service range matching publication parameters
*/
static struct service_range *tipc_service_find_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
- struct rb_node *n = sc->ranges.rb_node;
struct service_range *sr;
- sr = tipc_service_first_range(sc, lower);
- if (!sr)
- return NULL;
-
- /* Look for exact match */
- for (n = &sr->tree_node; n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper == upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
+ /* Look for exact match */
+ if (sr->lower == lower && sr->upper == upper)
+ return sr;
}
- if (!n || sr->lower != lower || sr->upper != upper)
- return NULL;
- return sr;
+ return NULL;
}
static struct service_range *tipc_service_create_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
struct rb_node **n, *parent = NULL;
- struct service_range *sr, *tmp;
+ struct service_range *sr;
n = &sc->ranges.rb_node;
while (*n) {
- tmp = container_of(*n, struct service_range, tree_node);
parent = *n;
- tmp = container_of(parent, struct service_range, tree_node);
- if (lower < tmp->lower)
- n = &(*n)->rb_left;
- else if (lower > tmp->lower)
- n = &(*n)->rb_right;
- else if (upper < tmp->upper)
- n = &(*n)->rb_left;
- else if (upper > tmp->upper)
- n = &(*n)->rb_right;
+ sr = service_range_entry(parent);
+ if (lower == sr->lower && upper == sr->upper)
+ return sr;
+ if (sr->max < upper)
+ sr->max = upper;
+ if (lower <= sr->lower)
+ n = &parent->rb_left;
else
- return tmp;
+ n = &parent->rb_right;
}
sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
if (!sr)
return NULL;
sr->lower = lower;
sr->upper = upper;
+ sr->max = upper;
INIT_LIST_HEAD(&sr->local_publ);
INIT_LIST_HEAD(&sr->all_publ);
rb_link_node(&sr->tree_node, parent, n);
- rb_insert_color(&sr->tree_node, &sc->ranges);
+ rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
return sr;
}
@@ -310,7 +404,6 @@ static void tipc_service_subscribe(struct tipc_service *service,
struct list_head publ_list;
struct service_range *sr;
struct tipc_name_seq ns;
- struct rb_node *n;
u32 filter;
ns.type = tipc_sub_read(sb, seq.type);
@@ -325,13 +418,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
return;
INIT_LIST_HEAD(&publ_list);
- for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > ns.upper)
- break;
- if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
- continue;
-
+ service_range_foreach_match(sr, service, ns.lower, ns.upper) {
first = NULL;
list_for_each_entry(p, &sr->all_publ, all_publ) {
if (filter & TIPC_SUB_PORTS)
@@ -425,7 +512,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
/* Remove service range item if this was its last publication */
if (list_empty(&sr->all_publ)) {
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
@@ -473,34 +560,39 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
rcu_read_lock();
sc = tipc_service_find(net, type);
if (unlikely(!sc))
- goto not_found;
+ goto exit;
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
- if (unlikely(!sr))
- goto no_match;
-
- /* Select lookup algorithm: local, closest-first or round-robin */
- if (*dnode == self) {
- list = &sr->local_publ;
- if (list_empty(list))
- goto no_match;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
- list = &sr->local_publ;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else {
- list = &sr->all_publ;
- p = list_first_entry(list, struct publication, all_publ);
- list_move_tail(&p->all_publ, &sr->all_publ);
+ service_range_foreach_match(sr, sc, instance, instance) {
+ /* Select lookup algo: local, closest-first or round-robin */
+ if (*dnode == self) {
+ list = &sr->local_publ;
+ if (list_empty(list))
+ continue;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
+ list = &sr->local_publ;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else {
+ list = &sr->all_publ;
+ p = list_first_entry(list, struct publication,
+ all_publ);
+ list_move_tail(&p->all_publ, &sr->all_publ);
+ }
+ port = p->port;
+ node = p->node;
+ /* Todo: as for legacy, pick the first matching range only, a
+ * "true" round-robin will be performed as needed.
+ */
+ break;
}
- port = p->port;
- node = p->node;
-no_match:
spin_unlock_bh(&sc->lock);
-not_found:
+
+exit:
rcu_read_unlock();
*dnode = node;
return port;
@@ -523,7 +615,8 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
+ /* Todo: a full search i.e. service_range_foreach_match() instead? */
+ sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
if (!sr)
goto no_match;
@@ -552,7 +645,6 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -560,13 +652,7 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->local_publ, local_publ) {
if (p->scope == scope || (!exact && p->scope < scope))
tipc_dest_push(dports, 0, p->port);
@@ -587,7 +673,6 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -595,13 +680,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->all_publ, all_publ) {
tipc_nlist_add(nodes, p->node);
}
@@ -799,7 +878,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
tipc_service_remove_publ(sr, p->node, p->key);
kfree_rcu(p, rcu);
}
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
hlist_del_init_rcu(&sc->service_list);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 2de3cec9929d..85400e4242de 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -302,3 +302,59 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
return err;
}
+
+static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ 0, TIPC_NL_ADDR_LEGACY_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start(msg->skb, TIPC_NLA_NET);
+ if (!attrs)
+ goto msg_full;
+
+ if (tn->legacy_addr_format)
+ if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY))
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_nl_msg msg;
+ struct sk_buff *rep;
+ int err;
+
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ err = __tipc_nl_addr_legacy_get(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+
+ return genlmsg_reply(msg.skb, info);
+}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index b7f2e364eb99..6740d97c706e 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index e53231bd23b4..7c35094c20b8 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -83,6 +83,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
[TIPC_NLA_NET_ADDR] = { .type = NLA_U32 },
[TIPC_NLA_NET_NODEID] = { .type = NLA_U64 },
[TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG }
};
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
@@ -273,6 +274,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
.doit = tipc_nl_node_flush_key,
},
#endif
+ {
+ .cmd = TIPC_NL_ADDR_LEGACY_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_net_addr_legacy_get,
+ },
};
struct genl_family tipc_genl_family __ro_after_init = {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41688da233ab..6552f986774c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1364,8 +1364,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
- u32 dport, dnode = 0;
- u32 type, inst;
+ u32 dport = 0, dnode = 0;
+ u32 type = 0, inst = 0;
int mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1418,23 +1418,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
type = dest->addr.name.name.type;
inst = dest->addr.name.name.instance;
dnode = dest->addr.name.domain;
- msg_set_type(hdr, TIPC_NAMED_MSG);
- msg_set_hdr_sz(hdr, NAMED_H_SIZE);
- msg_set_nametype(hdr, type);
- msg_set_nameinst(hdr, inst);
- msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
dport = tipc_nametbl_translate(net, type, inst, &dnode);
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
- msg_set_type(hdr, TIPC_DIRECT_MSG);
- msg_set_lookup_scope(hdr, 0);
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dest->addr.id.ref);
- msg_set_hdr_sz(hdr, BASIC_H_SIZE);
} else {
return -EINVAL;
}
@@ -1445,6 +1433,22 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(rc))
return rc;
+ if (dest->addrtype == TIPC_ADDR_NAME) {
+ msg_set_type(hdr, TIPC_NAMED_MSG);
+ msg_set_hdr_sz(hdr, NAMED_H_SIZE);
+ msg_set_nametype(hdr, type);
+ msg_set_nameinst(hdr, inst);
+ msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
+ msg_set_destnode(hdr, dnode);
+ msg_set_destport(hdr, dport);
+ } else { /* TIPC_ADDR_ID */
+ msg_set_type(hdr, TIPC_DIRECT_MSG);
+ msg_set_lookup_scope(hdr, 0);
+ msg_set_destnode(hdr, dnode);
+ msg_set_destport(hdr, dest->addr.id.ref);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ }
+
__skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index cd91ad812291..1ba5a92832bb 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -178,7 +178,7 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
* socket and no in-flight SKBs associated with this
* socket, so it is safe to free all the resources.
*/
-static void tls_device_sk_destruct(struct sock *sk)
+void tls_device_sk_destruct(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
@@ -196,6 +196,7 @@ static void tls_device_sk_destruct(struct sock *sk)
if (refcount_dec_and_test(&tls_ctx->refcount))
tls_device_queue_ctx_destruction(tls_ctx);
}
+EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
void tls_device_free_resources_tx(struct sock *sk)
{
@@ -903,7 +904,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
spin_unlock_irq(&tls_device_lock);
ctx->sk_destruct = sk->sk_destruct;
- sk->sk_destruct = tls_device_sk_destruct;
+ smp_store_release(&sk->sk_destruct, tls_device_sk_destruct);
}
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7cfdce10de36..6756a3ccc392 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -676,6 +676,16 @@ static int unix_set_peek_off(struct sock *sk, int val)
return 0;
}
+static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct unix_sock *u;
+
+ if (sk) {
+ u = unix_sk(sock->sk);
+ seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
+ }
+}
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
@@ -701,6 +711,7 @@ static const struct proto_ops unix_stream_ops = {
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_dgram_ops = {
@@ -726,6 +737,7 @@ static const struct proto_ops unix_dgram_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_seqpacket_ops = {
@@ -751,6 +763,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static struct proto unix_proto = {
@@ -788,6 +801,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+ memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -1572,6 +1586,28 @@ static bool unix_skb_scm_eq(struct sk_buff *skb,
unix_secdata_eq(scm, skb);
}
+static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_fp_list *fp = UNIXCB(skb).fp;
+ struct unix_sock *u = unix_sk(sk);
+
+ lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+ if (unlikely(fp && fp->count))
+ u->scm_stat.nr_fds += fp->count;
+}
+
+static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_fp_list *fp = UNIXCB(skb).fp;
+ struct unix_sock *u = unix_sk(sk);
+
+ lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+ if (unlikely(fp && fp->count))
+ u->scm_stat.nr_fds -= fp->count;
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1757,7 +1793,10 @@ restart_locked:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
- skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_lock(&other->sk_receive_queue.lock);
+ scm_stat_add(other, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sock_put(other);
@@ -1859,7 +1898,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto pipe_err_free;
maybe_add_creds(skb, sock, other);
- skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_lock(&other->sk_receive_queue.lock);
+ scm_stat_add(other, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
@@ -2058,8 +2100,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
- skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
- &last);
+ skb = __skb_try_recv_datagram(sk, flags, scm_stat_del,
+ &skip, &err, &last);
if (skb)
break;
@@ -2353,8 +2395,12 @@ unlock:
sk_peek_offset_bwd(sk, chunk);
- if (UNIXCB(skb).fp)
+ if (UNIXCB(skb).fp) {
+ spin_lock(&sk->sk_receive_queue.lock);
+ scm_stat_del(sk, skb);
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_detach_fds(&scm, skb);
+ }
if (unix_skb_len(skb))
break;
@@ -2865,7 +2911,7 @@ static int __init af_unix_init(void)
{
int rc = -1;
- BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8abcb815af2d..56356d2980c8 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -26,6 +26,18 @@ config VSOCKETS_DIAG
Enable this module so userspace applications can query open sockets.
+config VSOCKETS_LOOPBACK
+ tristate "Virtual Sockets loopback transport"
+ depends on VSOCKETS
+ default y
+ select VIRTIO_VSOCKETS_COMMON
+ help
+ This module implements a loopback transport for Virtual Sockets,
+ using vmw_vsock_virtio_transport_common.
+
+ To compile this driver as a module, choose M here: the module
+ will be called vsock_loopback. If unsure, say N.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 7c6f9a0b67b0..6a943ec95c4a 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
+obj-$(CONFIG_VSOCKETS_LOOPBACK) += vsock_loopback.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 74db4cd637a7..9c5b2a91baad 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -136,6 +136,8 @@ static const struct vsock_transport *transport_h2g;
static const struct vsock_transport *transport_g2h;
/* Transport used for DGRAM communication */
static const struct vsock_transport *transport_dgram;
+/* Transport used for local communication */
+static const struct vsock_transport *transport_local;
static DEFINE_MUTEX(vsock_register_mutex);
/**** UTILS ****/
@@ -386,6 +388,21 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
}
EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+static bool vsock_use_local_transport(unsigned int remote_cid)
+{
+ if (!transport_local)
+ return false;
+
+ if (remote_cid == VMADDR_CID_LOCAL)
+ return true;
+
+ if (transport_g2h) {
+ return remote_cid == transport_g2h->get_local_cid();
+ } else {
+ return remote_cid == VMADDR_CID_HOST;
+ }
+}
+
static void vsock_deassign_transport(struct vsock_sock *vsk)
{
if (!vsk->transport)
@@ -402,9 +419,9 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
* (e.g. during the connect() or when a connection request on a listener
* socket is received).
* The vsk->remote_addr is used to decide which transport to use:
+ * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
+ * g2h is not loaded, will use local transport;
* - remote CID <= VMADDR_CID_HOST will use guest->host transport;
- * - remote CID == local_cid (guest->host transport) will use guest->host
- * transport for loopback (host->guest transports don't support loopback);
* - remote CID > VMADDR_CID_HOST will use host->guest transport;
*/
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
@@ -419,9 +436,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_dgram;
break;
case SOCK_STREAM:
- if (remote_cid <= VMADDR_CID_HOST ||
- (transport_g2h &&
- remote_cid == transport_g2h->get_local_cid()))
+ if (vsock_use_local_transport(remote_cid))
+ new_transport = transport_local;
+ else if (remote_cid <= VMADDR_CID_HOST)
new_transport = transport_g2h;
else
new_transport = transport_h2g;
@@ -464,6 +481,9 @@ bool vsock_find_cid(unsigned int cid)
if (transport_h2g && cid == VMADDR_CID_HOST)
return true;
+ if (transport_local && cid == VMADDR_CID_LOCAL)
+ return true;
+
return false;
}
EXPORT_SYMBOL_GPL(vsock_find_cid);
@@ -2137,7 +2157,7 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
int vsock_core_register(const struct vsock_transport *t, int features)
{
- const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
+ const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
int err = mutex_lock_interruptible(&vsock_register_mutex);
if (err)
@@ -2146,6 +2166,7 @@ int vsock_core_register(const struct vsock_transport *t, int features)
t_h2g = transport_h2g;
t_g2h = transport_g2h;
t_dgram = transport_dgram;
+ t_local = transport_local;
if (features & VSOCK_TRANSPORT_F_H2G) {
if (t_h2g) {
@@ -2171,9 +2192,18 @@ int vsock_core_register(const struct vsock_transport *t, int features)
t_dgram = t;
}
+ if (features & VSOCK_TRANSPORT_F_LOCAL) {
+ if (t_local) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_local = t;
+ }
+
transport_h2g = t_h2g;
transport_g2h = t_g2h;
transport_dgram = t_dgram;
+ transport_local = t_local;
err_busy:
mutex_unlock(&vsock_register_mutex);
@@ -2194,6 +2224,9 @@ void vsock_core_unregister(const struct vsock_transport *t)
if (transport_dgram == t)
transport_dgram = NULL;
+ if (transport_local == t)
+ transport_local = NULL;
+
mutex_unlock(&vsock_register_mutex);
}
EXPORT_SYMBOL_GPL(vsock_core_unregister);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 1458c5c8b64d..dfbaf6bd8b1c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -44,10 +44,6 @@ struct virtio_vsock {
spinlock_t send_pkt_list_lock;
struct list_head send_pkt_list;
- struct work_struct loopback_work;
- spinlock_t loopback_list_lock; /* protects loopback_list */
- struct list_head loopback_list;
-
atomic_t queued_replies;
/* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
@@ -86,20 +82,6 @@ out_rcu:
return ret;
}
-static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
- struct virtio_vsock_pkt *pkt)
-{
- int len = pkt->len;
-
- spin_lock_bh(&vsock->loopback_list_lock);
- list_add_tail(&pkt->list, &vsock->loopback_list);
- spin_unlock_bh(&vsock->loopback_list_lock);
-
- queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
-
- return len;
-}
-
static void
virtio_transport_send_pkt_work(struct work_struct *work)
{
@@ -194,7 +176,8 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
}
if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
- len = virtio_transport_send_pkt_loopback(vsock, pkt);
+ virtio_transport_free_pkt(pkt);
+ len = -ENODEV;
goto out_rcu;
}
@@ -502,33 +485,6 @@ static struct virtio_transport virtio_transport = {
.send_pkt = virtio_transport_send_pkt,
};
-static void virtio_transport_loopback_work(struct work_struct *work)
-{
- struct virtio_vsock *vsock =
- container_of(work, struct virtio_vsock, loopback_work);
- LIST_HEAD(pkts);
-
- spin_lock_bh(&vsock->loopback_list_lock);
- list_splice_init(&vsock->loopback_list, &pkts);
- spin_unlock_bh(&vsock->loopback_list_lock);
-
- mutex_lock(&vsock->rx_lock);
-
- if (!vsock->rx_run)
- goto out;
-
- while (!list_empty(&pkts)) {
- struct virtio_vsock_pkt *pkt;
-
- pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
-
- virtio_transport_recv_pkt(&virtio_transport, pkt);
- }
-out:
- mutex_unlock(&vsock->rx_lock);
-}
-
static void virtio_transport_rx_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
@@ -633,13 +589,10 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_init(&vsock->event_lock);
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
- spin_lock_init(&vsock->loopback_list_lock);
- INIT_LIST_HEAD(&vsock->loopback_list);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
- INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
mutex_lock(&vsock->tx_lock);
vsock->tx_run = true;
@@ -720,22 +673,12 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
- spin_lock_bh(&vsock->loopback_list_lock);
- while (!list_empty(&vsock->loopback_list)) {
- pkt = list_first_entry(&vsock->loopback_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
- spin_unlock_bh(&vsock->loopback_list_lock);
-
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
/* Other works can be queued before 'config->del_vqs()', so we flush
* all works before to free the vsock object to avoid use after free.
*/
- flush_work(&vsock->loopback_work);
flush_work(&vsock->rx_work);
flush_work(&vsock->tx_work);
flush_work(&vsock->event_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e5ea29c6bca7..d9f0c9c5425a 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -11,9 +11,6 @@
#include <linux/sched/signal.h>
#include <linux/ctype.h>
#include <linux/list.h>
-#include <linux/virtio.h>
-#include <linux/virtio_ids.h>
-#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
#include <uapi/linux/vsockmon.h>
@@ -34,6 +31,9 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
{
const struct vsock_transport *t = vsock_core_get_transport(vsk);
+ if (WARN_ON(!t))
+ return NULL;
+
return container_of(t, struct virtio_transport, transport);
}
@@ -161,15 +161,25 @@ void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+/* This function can only be used on connecting/connected sockets,
+ * since a socket assigned to a transport is required.
+ *
+ * Do not use on listener sockets!
+ */
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
u32 src_cid, src_port, dst_cid, dst_port;
+ const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
- src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid();
+ t_ops = virtio_transport_get_ops(vsk);
+ if (unlikely(!t_ops))
+ return -EFAULT;
+
+ src_cid = t_ops->transport.get_local_cid();
src_port = vsk->local_addr.svm_port;
if (!info->remote_cid) {
dst_cid = vsk->remote_addr.svm_cid;
@@ -202,7 +212,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_inc_tx_pkt(vvs, pkt);
- return virtio_transport_get_ops(vsk)->send_pkt(pkt);
+ return t_ops->send_pkt(pkt);
}
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
@@ -1021,18 +1031,18 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
int ret;
if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -ENOMEM;
}
child = vsock_create_connected(sk);
if (!child) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -ENOMEM;
}
@@ -1054,7 +1064,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
*/
if (ret || vchild->transport != &t->transport) {
release_sock(child);
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
sock_put(child);
return ret;
}
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 644d32e43d23..4b8b1150a738 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -648,7 +648,7 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
static bool vmci_transport_stream_allow(u32 cid, u32 port)
{
static const u32 non_socket_contexts[] = {
- VMADDR_CID_RESERVED,
+ VMADDR_CID_LOCAL,
};
int i;
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
new file mode 100644
index 000000000000..a45f7ffca8c5
--- /dev/null
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* loopback transport for vsock using virtio_transport_common APIs
+ *
+ * Copyright (C) 2013-2019 Red Hat, Inc.
+ * Authors: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ * Stefano Garzarella <sgarzare@redhat.com>
+ *
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/virtio_vsock.h>
+
+struct vsock_loopback {
+ struct workqueue_struct *workqueue;
+
+ spinlock_t pkt_list_lock; /* protects pkt_list */
+ struct list_head pkt_list;
+ struct work_struct pkt_work;
+};
+
+static struct vsock_loopback the_vsock_loopback;
+
+static u32 vsock_loopback_get_local_cid(void)
+{
+ return VMADDR_CID_LOCAL;
+}
+
+static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ int len = pkt->len;
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_add_tail(&pkt->list, &vsock->pkt_list);
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ queue_work(vsock->workqueue, &vsock->pkt_work);
+
+ return len;
+}
+
+static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ struct virtio_vsock_pkt *pkt, *n;
+ LIST_HEAD(freeme);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) {
+ if (pkt->vsk != vsk)
+ continue;
+ list_move(&pkt->list, &freeme);
+ }
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ list_for_each_entry_safe(pkt, n, &freeme, list) {
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ return 0;
+}
+
+static struct virtio_transport loopback_transport = {
+ .transport = {
+ .module = THIS_MODULE,
+
+ .get_local_cid = vsock_loopback_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vsock_loopback_cancel_pkt,
+
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+ .notify_buffer_size = virtio_transport_notify_buffer_size,
+ },
+
+ .send_pkt = vsock_loopback_send_pkt,
+};
+
+static void vsock_loopback_work(struct work_struct *work)
+{
+ struct vsock_loopback *vsock =
+ container_of(work, struct vsock_loopback, pkt_work);
+ LIST_HEAD(pkts);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_splice_init(&vsock->pkt_list, &pkts);
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ while (!list_empty(&pkts)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ virtio_transport_deliver_tap_pkt(pkt);
+ virtio_transport_recv_pkt(&loopback_transport, pkt);
+ }
+}
+
+static int __init vsock_loopback_init(void)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ int ret;
+
+ vsock->workqueue = alloc_workqueue("vsock-loopback", 0, 0);
+ if (!vsock->workqueue)
+ return -ENOMEM;
+
+ spin_lock_init(&vsock->pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->pkt_list);
+ INIT_WORK(&vsock->pkt_work, vsock_loopback_work);
+
+ ret = vsock_core_register(&loopback_transport.transport,
+ VSOCK_TRANSPORT_F_LOCAL);
+ if (ret)
+ goto out_wq;
+
+ return 0;
+
+out_wq:
+ destroy_workqueue(vsock->workqueue);
+ return ret;
+}
+
+static void __exit vsock_loopback_exit(void)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ struct virtio_vsock_pkt *pkt;
+
+ vsock_core_unregister(&loopback_transport.transport);
+
+ flush_work(&vsock->pkt_work);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ while (!list_empty(&vsock->pkt_list)) {
+ pkt = list_first_entry(&vsock->pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ destroy_workqueue(vsock->workqueue);
+}
+
+module_init(vsock_loopback_init);
+module_exit(vsock_loopback_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Stefano Garzarella <sgarzare@redhat.com>");
+MODULE_DESCRIPTION("loopback transport for vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 350513744575..3e25229a059d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1102,6 +1102,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
#ifdef CONFIG_CFG80211_WEXT
kzfree(wdev->wext.keys);
+ wdev->wext.keys = NULL;
#endif
/* only initialized if we have a netdev */
if (wdev->netdev)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index da5262b2298b..fa3526592c51 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12900,8 +12900,7 @@ static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd,
return -EINVAL;
}
- return nl80211_validate_nested(attr, vcmd->maxattr, vcmd->policy,
- extack);
+ return nla_validate_nested(attr, vcmd->maxattr, vcmd->policy, extack);
}
static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c34f7d077604..2efe44a34644 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -659,6 +659,12 @@ static int x25_release(struct socket *sock)
sock_set_flag(sk, SOCK_DEAD);
sock_set_flag(sk, SOCK_DESTROY);
break;
+
+ case X25_STATE_5:
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25_disconnect(sk, 0, 0, 0);
+ __x25_destroy_socket(sk);
+ goto out;
}
sock_orphan(sk);
@@ -1054,6 +1060,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) {
x25_write_internal(make, X25_CALL_ACCEPTED);
makex25->state = X25_STATE_3;
+ } else {
+ makex25->state = X25_STATE_5;
}
/*
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index f97c43344e95..4d3bb46aaae0 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -382,6 +382,35 @@ out_clear:
return 0;
}
+/*
+ * State machine for state 5, Call Accepted / Call Connected pending (X25_ACCPT_APPRV_FLAG).
+ * The handling of the timer(s) is in file x25_timer.c
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+ struct x25_sock *x25 = x25_sk(sk);
+
+ switch (frametype) {
+ case X25_CLEAR_REQUEST:
+ if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) {
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25->state = X25_STATE_2;
+ x25_start_t23timer(sk);
+ return 0;
+ }
+
+ x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+ x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
/* Higher level upcall for a LAPB frame */
int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
{
@@ -406,6 +435,9 @@ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
case X25_STATE_4:
queued = x25_state4_machine(sk, skb, frametype);
break;
+ case X25_STATE_5:
+ queued = x25_state5_machine(sk, skb, frametype);
+ break;
}
x25_kick(sk);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5560d60c1ff5..02ada7ab8c6e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -338,12 +338,21 @@ out:
}
EXPORT_SYMBOL(xsk_umem_consume_tx);
-static int xsk_zc_xmit(struct xdp_sock *xs)
+static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
+ int err;
+
+ rcu_read_lock();
+ err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
+ rcu_read_unlock();
+
+ return err;
+}
- return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- XDP_WAKEUP_TX);
+static int xsk_zc_xmit(struct xdp_sock *xs)
+{
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -462,19 +471,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- struct net_device *dev;
struct xdp_umem *umem;
if (unlikely(!xsk_is_bound(xs)))
return mask;
- dev = xs->dev;
umem = xs->umem;
if (umem->need_wakeup) {
- if (dev->netdev_ops->ndo_xsk_wakeup)
- dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- umem->need_wakeup);
+ if (xs->zc)
+ xsk_wakeup(xs, umem->need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk);
OpenPOWER on IntegriCloud