summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_mdb.c54
-rw-r--r--net/bridge/br_multicast.c18
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/ceph/pagevec.c4
-rw-r--r--net/core/dev.c66
-rw-r--r--net/core/fib_notifier.c10
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/flow_dissector.c30
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c16
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c10
-rw-r--r--net/core/sock.c35
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/decnet/dn_table.c1
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/dsa2.c585
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/legacy.c22
-rw-r--r--net/dsa/master.c30
-rw-r--r--net/dsa/port.c16
-rw-r--r--net/dsa/slave.c20
-rw-r--r--net/dsa/switch.c6
-rw-r--r--net/dsa/tag_brcm.c70
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_edsa.c2
-rw-r--r--net/dsa/tag_lan9303.c13
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c3
-rw-r--r--net/ipv4/ip_gre.c58
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/ip_tables.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c28
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/sysctl_net_ipv4.c51
-rw-r--r--net/ipv4/tcp.c24
-rw-r--r--net/ipv4/tcp_cong.c76
-rw-r--r--net/ipv4/tcp_input.c236
-rw-r--r--net/ipv4/tcp_ipv4.c22
-rw-r--r--net/ipv4/tcp_metrics.c4
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_offload.c12
-rw-r--r--net/ipv4/tcp_output.c38
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/addrconf.c25
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c111
-rw-r--r--net/ipv6/ila/ila_xlat.c26
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c22
-rw-r--r--net/ipv6/ip6_tunnel.c72
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c22
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c24
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c6
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c24
-rw-r--r--net/l2tp/l2tp_ip6.c24
-rw-r--r--net/l2tp/l2tp_ppp.c76
-rw-r--r--net/llc/llc_input.c4
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/ncsi/ncsi-aen.c15
-rw-r--r--net/ncsi/ncsi-manage.c76
-rw-r--r--net/ncsi/ncsi-rsp.c41
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipset/pfxlen.c395
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c81
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto.c86
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c62
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c41
-rw-r--r--net/netfilter/nf_nat_core.c9
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_tables_api.c195
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_set_bitmap.c18
-rw-r--r--net/netfilter/nft_set_hash.c41
-rw-r--r--net/netfilter/nft_set_rbtree.c73
-rw-r--r--net/netfilter/x_tables.c21
-rw-r--r--net/netfilter/xt_connlimit.c55
-rw-r--r--net/netlabel/netlabel_addrlist.h4
-rw-r--r--net/netlabel/netlabel_calipso.c2
-rw-r--r--net/netlink/af_netlink.c18
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netrom/nr_loopback.c2
-rw-r--r--net/nfc/core.c10
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/hci/core.c7
-rw-r--r--net/nfc/hci/llc_shdlc.c23
-rw-r--r--net/nfc/llcp_core.c14
-rw-r--r--net/nfc/netlink.c35
-rw-r--r--net/nsh/nsh.c60
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/Makefile1
-rw-r--r--net/openvswitch/actions.c122
-rw-r--r--net/openvswitch/datapath.c43
-rw-r--r--net/openvswitch/datapath.h35
-rw-r--r--net/openvswitch/flow.c51
-rw-r--r--net/openvswitch/flow.h7
-rw-r--r--net/openvswitch/flow_netlink.c349
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/meter.c597
-rw-r--r--net/openvswitch/meter.h54
-rw-r--r--net/packet/af_packet.c1
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/ib_fmr.c4
-rw-r--r--net/rds/ib_recv.c10
-rw-r--r--net/rxrpc/af_rxrpc.c16
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c1
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/output.c19
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c83
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/cls_basic.c20
-rw-r--r--net/sched/cls_bpf.c7
-rw-r--r--net/sched/cls_cgroup.c24
-rw-r--r--net/sched/cls_flow.c24
-rw-r--r--net/sched/cls_flower.c16
-rw-r--r--net/sched/cls_fw.c17
-rw-r--r--net/sched/cls_matchall.c15
-rw-r--r--net/sched/cls_route.c17
-rw-r--r--net/sched/cls_rsvp.h15
-rw-r--r--net/sched/cls_tcindex.c33
-rw-r--r--net/sched/cls_u32.c8
-rw-r--r--net/sched/sch_cbs.c4
-rw-r--r--net/sched/sch_mqprio.c5
-rw-r--r--net/sched/sch_netem.c139
-rw-r--r--net/sched/sch_red.c83
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c67
-rw-r--r--net/sctp/stream.c32
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sched.c3
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c19
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c27
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c363
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c236
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h119
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/link.c26
-rw-r--r--net/tipc/msg.c24
-rw-r--r--net/tipc/msg.h17
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tls/tls_main.c96
-rw-r--r--net/tls/tls_sw.c24
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_policy.c43
216 files changed, 4573 insertions, 2373 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a0103500cc6d..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -379,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -426,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 883d25778fa4..e882d8b5db05 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = {
struct mpoa_client *mpcs = NULL; /* FIXME */
static struct atm_mpoa_qos *qos_head = NULL;
-static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
+static DEFINE_TIMER(mpc_timer, NULL);
static struct mpoa_client *find_mpc_by_itfnum(int itf)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a096d3e189da..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -137,7 +137,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
- if ((mdst && mdst->mglist) ||
+ if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
br->dev->stats.multicast++;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 31ddff22563e..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -292,6 +292,46 @@ err:
kfree(priv);
}
+static void br_mdb_switchdev_host_port(struct net_device *dev,
+ struct net_device *lower_dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_HOST_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ },
+ .vid = entry->vid,
+ };
+
+ if (entry->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+#endif
+
+ mdb.obj.orig_dev = dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ switchdev_port_obj_add(lower_dev, &mdb.obj);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(lower_dev, &mdb.obj);
+ break;
+ }
+}
+
+static void br_mdb_switchdev_host(struct net_device *dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter)
+ br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+}
+
static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
struct br_mdb_entry *entry, int type)
{
@@ -317,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
#endif
mdb.obj.orig_dev = port_dev;
- if (port_dev && type == RTM_NEWMDB) {
+ if (p && port_dev && type == RTM_NEWMDB) {
complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
if (complete_info) {
complete_info->port = p;
@@ -327,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
if (switchdev_port_obj_add(port_dev, &mdb.obj))
kfree(complete_info);
}
- } else if (port_dev && type == RTM_DELMDB) {
+ } else if (p && port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
}
+ if (!p)
+ br_mdb_switchdev_host(dev, entry, type);
+
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -353,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = port->dev->ifindex;
+ if (port)
+ entry.ifindex = port->dev->ifindex;
+ else
+ entry.ifindex = dev->ifindex;
entry.addr.proto = group->proto;
entry.addr.u.ip4 = group->u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -655,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5f7f0e9d446c..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(struct timer_list *t)
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
- mp->mglist = false;
+ mp->host_joined = false;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
if (mp->ports)
goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
p->flags);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -773,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
goto err;
if (!port) {
- mp->mglist = true;
+ if (!mp->host_joined) {
+ mp->host_joined = true;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
+ }
mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -1477,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1561,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1622,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
p->flags);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1662,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 67bae0f11c67..d0ef0a8e8831 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -657,6 +657,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
};
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 40553d832b6e..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -209,7 +209,7 @@ struct net_bridge_mdb_entry
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
- bool mglist;
+ bool host_joined;
};
struct net_bridge_mdb_htable
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3b3dcf719e07..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2112,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
for (i = 0, j = 1 ; j < 4 ; j++, i++) {
struct compat_ebt_entry_mwt *match32;
unsigned int size;
- char *buf = buf_start;
+ char *buf = buf_start + offsets[i];
- buf = buf_start + offsets[i];
if (offsets[i] > offsets[j])
return -EINVAL;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ee43bc13221c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -25,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
return ERR_PTR(-ENOMEM);
while (got < num_pages) {
- rc = get_user_pages_unlocked(
+ rc = get_user_pages_fast(
(unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
+ num_pages - got, write_page, pages + got);
if (rc < 0)
break;
BUG_ON(rc == 0);
diff --git a/net/core/dev.c b/net/core/dev.c
index 30b5fe32c525..8ee29f4f5fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1064,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
unsigned long *inuse;
struct net_device *d;
- p = strnchr(name, IFNAMSIZ-1, '%');
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
@@ -1095,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
free_page((unsigned long) inuse);
}
- if (buf != name)
- snprintf(buf, IFNAMSIZ, name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
@@ -1104,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return -ENFILE;
+ return p ? -ENFILE : -EEXIST;
+}
+
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ char buf[IFNAMSIZ];
+ int ret;
+
+ BUG_ON(!net);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -1123,48 +1139,14 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
int dev_alloc_name(struct net_device *dev, const char *name)
{
- char buf[IFNAMSIZ];
- struct net *net;
- int ret;
-
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ return dev_alloc_name_ns(dev_net(dev), dev, name);
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
-{
- char buf[IFNAMSIZ];
- int ret;
-
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
-}
-
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
-
- return 0;
+ return dev_alloc_name_ns(net, dev, name);
}
EXPORT_SYMBOL(dev_get_valid_name);
@@ -3754,7 +3736,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
- cpu = ACCESS_ONCE(rflow->cpu);
+ cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
@@ -8667,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 4fc202dbdfb6..0c048bdeb016 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -34,12 +34,14 @@ static unsigned int fib_seq_sum(void)
rtnl_lock();
for_each_net(net) {
- list_for_each_entry(ops, &net->fib_notifier_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
continue;
fib_seq += ops->fib_seq_read(net);
module_put(ops->owner);
}
+ rcu_read_unlock();
}
rtnl_unlock();
@@ -161,8 +163,14 @@ static int __net_init fib_notifier_net_init(struct net *net)
return 0;
}
+static void __net_exit fib_notifier_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+}
+
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
+ .exit = fib_notifier_net_exit,
};
static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fafd0a41e3f7..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1022,8 +1022,14 @@ static int __net_init fib_rules_net_init(struct net *net)
return 0;
}
+static void __net_exit fib_rules_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->rules_ops));
+}
+
static struct pernet_operations fib_rules_net_ops = {
.init = fib_rules_net_init,
+ .exit = fib_rules_net_exit,
};
static int __init fib_rules_init(void)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1f5caafb4492..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -10,6 +10,7 @@
#include <net/ipv6.h>
#include <net/gre.h>
#include <net/pptp.h>
+#include <net/tipc.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -772,23 +773,22 @@ proto_again:
break;
}
case htons(ETH_P_TIPC): {
- struct {
- __be32 pre[3];
- __be32 srcnode;
- } *hdr, _hdr;
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ struct tipc_basic_hdr *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+ data, hlen, &_hdr);
if (!hdr) {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ FLOW_DISSECTOR_KEY_TIPC)) {
key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ FLOW_DISSECTOR_KEY_TIPC,
target_container);
- key_addrs->tipcaddrs.srcnode = hdr->srcnode;
- key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+ key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
+ key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
}
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
@@ -1024,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
diff -= sizeof(flow->addrs.v6addrs);
break;
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- diff -= sizeof(flow->addrs.tipcaddrs);
+ case FLOW_DISSECTOR_KEY_TIPC:
+ diff -= sizeof(flow->addrs.tipckey);
break;
}
return (sizeof(*flow) - diff) / sizeof(u32);
@@ -1039,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
return (__force __be32)ipv6_addr_hash(
&flow->addrs.v6addrs.src);
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- return flow->addrs.tipcaddrs.srcnode;
+ case FLOW_DISSECTOR_KEY_TIPC:
+ return flow->addrs.tipckey.key;
default:
return 0;
}
@@ -1321,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, addrs.v6addrs),
},
{
- .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
- .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 51d5836d8fb9..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -382,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
struct net *net = dev_net(netdev);
size_t count = len;
- ssize_t ret;
+ ssize_t ret = 0;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -391,9 +391,20 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- ret = dev_set_alias(netdev, buf, count);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
+ }
+err:
+ rtnl_unlock();
- return ret < 0 ? ret : len;
+ return ret;
}
static ssize_t ifalias_show(struct device *dev,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 912731bed7b7..57557a6a950c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e3fa53a07d34..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
int datalen)
{
- struct timeval timestamp;
+ struct timespec64 timestamp;
struct pktgen_hdr *pgh;
pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->tv_sec = 0;
pgh->tv_usec = 0;
} else {
- do_gettimeofday(&timestamp);
+ /*
+ * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+ * as done by wireshark, or y2038 when interpreted as signed.
+ * This is probably harmless, but if anyone wants to improve
+ * it, we could introduce a variant that puts 64-bit nanoseconds
+ * into the respective header bytes.
+ * This would also be slightly faster to read.
+ */
+ ktime_get_real_ts64(&timestamp);
pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
}
}
@@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+ unsigned int burst = READ_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
struct sk_buff *skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dc5ad84ac096..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -920,7 +920,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 97e604d55d55..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
@@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
- kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
@@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
return skb;
}
@@ -357,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
@@ -370,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
void *napi_alloc_frag(unsigned int fragsz)
{
- return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
@@ -1283,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (!n)
return NULL;
- kmemcheck_annotate_bitfield(n, flags1);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -4869,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 759400053110..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
- kmemcheck_annotate_bitfield(sk, flags);
-
if (security_sk_alloc(sk, family, priority))
goto out_free;
@@ -2346,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
} else { /* SK_MEM_SEND */
+ int wmem0 = sk_get_wmem0(sk, prot);
+
if (sk->sk_type == SOCK_STREAM) {
- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ if (sk->sk_wmem_queued < wmem0)
return 1;
- } else if (refcount_read(&sk->sk_wmem_alloc) <
- prot->sysctl_wmem[0])
+ } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
return 1;
+ }
}
if (sk_has_memory_pressure(sk)) {
@@ -2744,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -3042,7 +3043,6 @@ struct prot_inuse {
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
-#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3086,27 +3086,6 @@ static __init int net_inuse_init(void)
}
core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
- __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
- int cpu, idx = prot->inuse_idx;
- int res = 0;
-
- for_each_possible_cpu(cpu)
- res += per_cpu(prot_inuse, cpu).val[idx];
-
- return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
static void assign_proto_idx(struct proto *prot)
{
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bff5ab88cdbb..b36dceab0dc1 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -131,7 +131,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table;
static unsigned int dn_rt_hash_mask;
static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
+static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
int decnet_dst_gc_interval = 2;
static struct dst_ops dn_dst_ops = {
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 08667f68e601..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -156,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
default:
printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
old_divisor);
+ /* fall through */
case 256:
new_divisor = 1024;
new_hashmask = 0x3FF;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..03c3bdf25468 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -7,6 +7,7 @@ config HAVE_NET_DSA
config NET_DSA
tristate "Distributed Switch Architecture"
depends on HAVE_NET_DSA && MAY_USE_DEVLINK
+ depends on BRIDGE || BRIDGE=n
select NET_SWITCHDEV
select PHYLIB
---help---
@@ -19,6 +20,9 @@ if NET_DSA
config NET_DSA_TAG_BRCM
bool
+config NET_DSA_TAG_BRCM_PREPEND
+ bool
+
config NET_DSA_TAG_DSA
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index e9a4a0f33e86..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
+dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b8f2d9f7c3ed..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -44,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_BRCM
[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+ [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_DSA
[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
#endif
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 283104e5ca6a..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -94,14 +94,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst)
kref_put(&dst->refcount, dsa_tree_release);
}
-/* For platform data configurations, we need to have a valid name argument to
- * differentiate a disabled port from an enabled one
- */
-static bool dsa_port_is_valid(struct dsa_port *port)
-{
- return port->type != DSA_PORT_TYPE_UNUSED;
-}
-
static bool dsa_port_is_dsa(struct dsa_port *port)
{
return port->type == DSA_PORT_TYPE_DSA;
@@ -112,197 +104,214 @@ static bool dsa_port_is_cpu(struct dsa_port *port)
return port->type == DSA_PORT_TYPE_CPU;
}
-static bool dsa_ds_find_port_dn(struct dsa_switch *ds,
- struct device_node *port)
+static bool dsa_port_is_user(struct dsa_port *dp)
{
- u32 index;
-
- for (index = 0; index < ds->num_ports; index++)
- if (ds->ports[index].dn == port)
- return true;
- return false;
+ return dp->type == DSA_PORT_TYPE_USER;
}
-static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst,
- struct device_node *port)
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+ struct device_node *dn)
{
struct dsa_switch *ds;
- u32 index;
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- if (dsa_ds_find_port_dn(ds, port))
- return ds;
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dp->dn == dn)
+ return dp;
+ }
}
return NULL;
}
-static int dsa_port_complete(struct dsa_switch_tree *dst,
- struct dsa_switch *src_ds,
- struct dsa_port *port,
- u32 src_port)
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
{
- struct device_node *link;
- int index;
- struct dsa_switch *dst_ds;
-
- for (index = 0;; index++) {
- link = of_parse_phandle(port->dn, "link", index);
- if (!link)
- break;
-
- dst_ds = dsa_dst_find_port_dn(dst, link);
- of_node_put(link);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct device_node *dn = dp->dn;
+ struct of_phandle_iterator it;
+ struct dsa_port *link_dp;
+ int err;
- if (!dst_ds)
- return 1;
+ of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+ link_dp = dsa_tree_find_port_by_node(dst, it.node);
+ if (!link_dp) {
+ of_node_put(it.node);
+ return false;
+ }
- src_ds->rtable[dst_ds->index] = src_port;
+ ds->rtable[link_dp->ds->index] = dp->index;
}
- return 0;
+ return true;
}
-/* A switch is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
- int err;
+ bool complete = true;
+ struct dsa_port *dp;
+ int i;
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
+ for (i = 0; i < DSA_MAX_SWITCHES; i++)
+ ds->rtable[i] = DSA_RTABLE_NONE;
- if (!dsa_port_is_dsa(port))
- continue;
+ for (i = 0; i < ds->num_ports; i++) {
+ dp = &ds->ports[i];
- err = dsa_port_complete(dst, ds, port, index);
- if (err != 0)
- return err;
+ if (dsa_port_is_dsa(dp)) {
+ complete = dsa_port_setup_routing_table(dp);
+ if (!complete)
+ break;
+ }
}
- return 0;
+ return complete;
}
-/* A tree is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_dst_complete(struct dsa_switch_tree *dst)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
- u32 index;
- int err;
+ bool complete = true;
+ int device;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- err = dsa_ds_complete(dst, ds);
- if (err != 0)
- return err;
+ complete = dsa_switch_setup_routing_table(ds);
+ if (!complete)
+ break;
}
- return 0;
+ return complete;
}
-static int dsa_dsa_port_apply(struct dsa_port *port)
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_port_fixed_link_register_of(port);
- if (err) {
- dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
- port->index, err);
- return err;
- }
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- return devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
-}
+ if (dsa_port_is_cpu(dp))
+ return dp;
+ }
+ }
-static void dsa_dsa_port_unapply(struct dsa_port *port)
-{
- devlink_port_unregister(&port->devlink_port);
- dsa_port_fixed_link_unregister_of(port);
+ return NULL;
}
-static int dsa_cpu_port_apply(struct dsa_port *port)
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_port_fixed_link_register_of(port);
- if (err) {
- dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
- port->index, err);
- return err;
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!dst->cpu_dp) {
+ pr_warn("Tree has no master device\n");
+ return -EINVAL;
}
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
- return err;
+ /* Assign the default CPU port to all ports of the fabric */
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dsa_port_is_user(dp))
+ dp->cpu_dp = dst->cpu_dp;
+ }
+ }
+
+ return 0;
}
-static void dsa_cpu_port_unapply(struct dsa_port *port)
+static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
{
- devlink_port_unregister(&port->devlink_port);
- dsa_port_fixed_link_unregister_of(port);
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = NULL;
}
-static int dsa_user_port_apply(struct dsa_port *port)
+static int dsa_port_setup(struct dsa_port *dp)
{
- struct dsa_switch *ds = port->ds;
+ struct dsa_switch *ds = dp->ds;
int err;
- err = dsa_slave_create(port);
- if (err) {
- dev_warn(ds->dev, "Failed to create slave %d: %d\n",
- port->index, err);
- port->slave = NULL;
- return err;
- }
+ memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
+ err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
if (err)
return err;
- devlink_port_type_eth_set(&port->devlink_port, port->slave);
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ err = dsa_port_fixed_link_register_of(dp);
+ if (err) {
+ dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ ds->index, dp->index);
+ return err;
+ }
+
+ break;
+ case DSA_PORT_TYPE_USER:
+ err = dsa_slave_create(dp);
+ if (err)
+ dev_err(ds->dev, "failed to create slave for port %d.%d\n",
+ ds->index, dp->index);
+ else
+ devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+ break;
+ }
return 0;
}
-static void dsa_user_port_unapply(struct dsa_port *port)
+static void dsa_port_teardown(struct dsa_port *dp)
{
- devlink_port_unregister(&port->devlink_port);
- if (port->slave) {
- dsa_slave_destroy(port->slave);
- port->slave = NULL;
+ devlink_port_unregister(&dp->devlink_port);
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ dsa_port_fixed_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_USER:
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
+ dp->slave = NULL;
+ }
+ break;
}
}
-static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
int err;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -343,136 +352,145 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
return err;
}
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
+ return 0;
+}
- if (dsa_port_is_dsa(port)) {
- err = dsa_dsa_port_apply(port);
- if (err)
- return err;
+static void dsa_switch_teardown(struct dsa_switch *ds)
+{
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+
+ dsa_switch_unregister_notifier(ds);
+
+ if (ds->devlink) {
+ devlink_unregister(ds->devlink);
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+ }
+
+}
+
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
+ int err;
+
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
continue;
- }
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_port_apply(port);
+ err = dsa_switch_setup(ds);
+ if (err)
+ return err;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ err = dsa_port_setup(dp);
if (err)
return err;
- continue;
}
-
- err = dsa_user_port_apply(port);
- if (err)
- continue;
}
return 0;
}
-static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
- struct dsa_port *port;
- u32 index;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
continue;
- if (dsa_port_is_dsa(port)) {
- dsa_dsa_port_unapply(port);
- continue;
- }
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- if (dsa_port_is_cpu(port)) {
- dsa_cpu_port_unapply(port);
- continue;
+ dsa_port_teardown(dp);
}
- dsa_user_port_unapply(port);
+ dsa_switch_teardown(ds);
}
+}
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
- dsa_switch_unregister_notifier(ds);
+ /* DSA currently supports a single pair of CPU port and master device */
+ return dsa_master_setup(master, cpu_dp);
+}
- if (ds->devlink) {
- devlink_unregister(ds->devlink);
- devlink_free(ds->devlink);
- ds->devlink = NULL;
- }
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
+ return dsa_master_teardown(master);
}
-static int dsa_dst_apply(struct dsa_switch_tree *dst)
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- u32 index;
+ bool complete;
int err;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- err = dsa_ds_apply(dst, ds);
- if (err)
- return err;
+ if (dst->setup) {
+ pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+ dst->index);
+ return -EEXIST;
}
- /* If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dst->cpu_dp->master->dsa_ptr = dst->cpu_dp;
+ complete = dsa_tree_setup_routing_table(dst);
+ if (!complete)
+ return 0;
+
+ err = dsa_tree_setup_default_cpu(dst);
+ if (err)
+ return err;
- err = dsa_master_ethtool_setup(dst->cpu_dp->master);
+ err = dsa_tree_setup_switches(dst);
if (err)
return err;
- dst->applied = true;
+ err = dsa_tree_setup_master(dst);
+ if (err)
+ return err;
+
+ dst->setup = true;
+
+ pr_info("DSA: tree %d setup\n", dst->index);
return 0;
}
-static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- u32 index;
-
- if (!dst->applied)
+ if (!dst->setup)
return;
- dsa_master_ethtool_restore(dst->cpu_dp->master);
+ dsa_tree_teardown_master(dst);
- dst->cpu_dp->master->dsa_ptr = NULL;
+ dsa_tree_teardown_switches(dst);
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ dsa_tree_teardown_default_cpu(dst);
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
+ pr_info("DSA: tree %d torn down\n", dst->index);
- dsa_ds_unapply(dst, ds);
- }
-
- dst->cpu_dp = NULL;
-
- pr_info("DSA: tree %d unapplied\n", dst->index);
- dst->applied = false;
+ dst->setup = false;
}
static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
unsigned int index)
{
+ dsa_tree_teardown(dst);
+
dst->ds[index] = NULL;
dsa_tree_put(dst);
}
@@ -481,6 +499,7 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
struct dsa_switch *ds)
{
unsigned int index = ds->index;
+ int err;
if (dst->ds[index])
return -EBUSY;
@@ -488,7 +507,11 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
dsa_tree_get(dst);
dst->ds[index] = ds;
- return 0;
+ err = dsa_tree_setup(dst);
+ if (err)
+ dsa_tree_remove_switch(dst, index);
+
+ return err;
}
static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
@@ -516,7 +539,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ds->ops->get_tag_protocol(ds);
+ tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
@@ -532,86 +555,6 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
return 0;
}
-static int dsa_cpu_parse(struct dsa_port *port, u32 index,
- struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
-{
- if (!dst->cpu_dp)
- dst->cpu_dp = port;
-
- return 0;
-}
-
-static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
-{
- struct dsa_port *port;
- u32 index;
- int err;
-
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port) ||
- dsa_port_is_dsa(port))
- continue;
-
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_parse(port, index, dst, ds);
- if (err)
- return err;
- }
-
- }
-
- pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index);
-
- return 0;
-}
-
-static int dsa_dst_parse(struct dsa_switch_tree *dst)
-{
- struct dsa_switch *ds;
- struct dsa_port *dp;
- u32 index;
- int port;
- int err;
-
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- err = dsa_ds_parse(dst, ds);
- if (err)
- return err;
- }
-
- if (!dst->cpu_dp) {
- pr_warn("Tree has no master device\n");
- return -EINVAL;
- }
-
- /* Assign the default CPU port to all ports of the fabric */
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
- if (!dsa_port_is_valid(dp) ||
- dsa_port_is_dsa(dp) ||
- dsa_port_is_cpu(dp))
- continue;
-
- dp->cpu_dp = dst->cpu_dp;
- }
- }
-
- pr_info("DSA: tree %d parsed\n", dst->index);
-
- return 0;
-}
-
static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
{
struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
@@ -768,13 +711,18 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
return dsa_switch_parse_ports(ds, cd);
}
-static int _dsa_register_switch(struct dsa_switch *ds)
+static int dsa_switch_add(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+
+ return dsa_tree_add_switch(dst, ds);
+}
+
+static int dsa_switch_probe(struct dsa_switch *ds)
{
struct dsa_chip_data *pdata = ds->dev->platform_data;
struct device_node *np = ds->dev->of_node;
- struct dsa_switch_tree *dst;
- unsigned int index;
- int i, err;
+ int err;
if (np)
err = dsa_switch_parse_of(ds, np);
@@ -786,46 +734,7 @@ static int _dsa_register_switch(struct dsa_switch *ds)
if (err)
return err;
- index = ds->index;
- dst = ds->dst;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- ds->rtable[i] = DSA_RTABLE_NONE;
-
- err = dsa_tree_add_switch(dst, ds);
- if (err)
- return err;
-
- err = dsa_dst_complete(dst);
- if (err < 0)
- goto out_del_dst;
-
- /* Not all switches registered yet */
- if (err == 1)
- return 0;
-
- if (dst->applied) {
- pr_info("DSA: Disjoint trees?\n");
- return -EINVAL;
- }
-
- err = dsa_dst_parse(dst);
- if (err)
- goto out_del_dst;
-
- err = dsa_dst_apply(dst);
- if (err) {
- dsa_dst_unapply(dst);
- goto out_del_dst;
- }
-
- return 0;
-
-out_del_dst:
- dsa_tree_remove_switch(dst, index);
-
- return err;
+ return dsa_switch_add(ds);
}
struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -855,27 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
int err;
mutex_lock(&dsa2_mutex);
- err = _dsa_register_switch(ds);
+ err = dsa_switch_probe(ds);
mutex_unlock(&dsa2_mutex);
return err;
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-static void _dsa_unregister_switch(struct dsa_switch *ds)
+static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
unsigned int index = ds->index;
- dsa_dst_unapply(dst);
-
dsa_tree_remove_switch(dst, index);
}
void dsa_unregister_switch(struct dsa_switch *ds)
{
mutex_lock(&dsa2_mutex);
- _dsa_unregister_switch(ds);
+ dsa_switch_remove(ds);
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 253a613c40cd..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -108,8 +108,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
const unsigned char *addr, u16 vid);
/* master.c */
-int dsa_master_ethtool_setup(struct net_device *dev);
-void dsa_master_ethtool_restore(struct net_device *dev);
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_master_teardown(struct net_device *dev);
static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
int device, int port)
@@ -147,10 +147,10 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
@@ -191,6 +191,7 @@ void dsa_switch_unregister_notifier(struct dsa_switch *ds);
/* tag_brcm.c */
extern const struct dsa_device_ops brcm_netdev_ops;
+extern const struct dsa_device_ops brcm_prepend_netdev_ops;
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 0511fe2feff7..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -151,7 +151,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ops->get_tag_protocol(ds);
+ tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(tag_ops))
return PTR_ERR(tag_ops);
@@ -593,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
if (!configured)
return -EPROBE_DEFER;
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = dst->cpu_dp;
-
- return dsa_master_ethtool_setup(dst->cpu_dp->master);
+ return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
}
static int dsa_probe(struct platform_device *pdev)
@@ -666,15 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dsa_master_ethtool_restore(dst->cpu_dp->master);
-
- dst->cpu_dp->master->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ dsa_master_teardown(dst->cpu_dp->master);
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 5f3f57e372e0..00589147f042 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -85,7 +85,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
}
}
-int dsa_master_ethtool_setup(struct net_device *dev)
+static int dsa_master_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch *ds = cpu_dp->ds;
@@ -108,10 +108,36 @@ int dsa_master_ethtool_setup(struct net_device *dev)
return 0;
}
-void dsa_master_ethtool_restore(struct net_device *dev)
+static void dsa_master_ethtool_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
cpu_dp->orig_ethtool_ops = NULL;
}
+
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+ /* If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+
+ dev->dsa_ptr = cpu_dp;
+
+ return dsa_master_ethtool_setup(dev);
+}
+
+void dsa_master_teardown(struct net_device *dev)
+{
+ dsa_master_ethtool_teardown(dev);
+
+ dev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb30b1a7de3a..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -17,7 +17,7 @@
#include "dsa_priv.h"
-static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
{
struct raw_notifier_head *nh = &dp->ds->dst->nh;
int err;
@@ -215,7 +215,7 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
return ds->ops->port_fdb_dump(ds, port, cb, data);
}
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
{
@@ -229,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
}
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
@@ -252,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+
+ return 0;
}
int dsa_port_vlan_del(struct dsa_port *dp,
@@ -264,7 +267,10 @@ int dsa_port_vlan_del(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+
+ return 0;
}
int dsa_port_fixed_link_register_of(struct dsa_port *dp)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 814ced75a0cc..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -304,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
+ trans);
+ break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
trans);
@@ -326,6 +333,12 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
@@ -342,11 +355,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
- attr->u.ppid.id_len = sizeof(ds->index);
- memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+ attr->u.ppid.id_len = sizeof(dst->index);
+ memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
attr->u.brport_flags_support = 0;
@@ -1147,7 +1161,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val)
int dsa_slave_create(struct dsa_port *port)
{
- struct dsa_port *cpu_dp = port->cpu_dp;
+ const struct dsa_port *cpu_dp = port->cpu_dp;
struct net_device *master = cpu_dp->master;
struct dsa_switch *ds = port->ds;
const char *name = port->name;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e6c06aa349a6..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (ds->index == info->sw_index)
set_bit(info->port, group);
for (port = 0; port < ds->num_ports; port++)
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ if (dsa_is_dsa_port(ds, port))
set_bit(port, group);
if (switchdev_trans_ph_prepare(trans)) {
@@ -133,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, group, ds->num_ports)
@@ -180,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9e082bae3cb0..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,7 +59,9 @@
#define BRCM_EG_TC_MASK 0x7
#define BRCM_EG_PID_MASK 0x1f
-static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int offset)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 queue = skb_get_queue_mapping(skb);
@@ -70,10 +72,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
skb_push(skb, BRCM_TAG_LEN);
- memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN);
+ if (offset)
+ memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
- /* Build the tag after the MAC Source Address */
- brcm_tag = skb->data + 2 * ETH_ALEN;
+ brcm_tag = skb->data + offset;
/* Set the ingress opcode, traffic class, tag enforcment is
* deprecated
@@ -94,8 +96,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
return skb;
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt,
+ unsigned int offset)
{
int source_port;
u8 *brcm_tag;
@@ -103,8 +107,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
return NULL;
- /* skb->data points to the EtherType, the tag is right before it */
- brcm_tag = skb->data - 2;
+ brcm_tag = skb->data - offset;
/* The opcode should never be different than 0b000 */
if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -126,15 +129,60 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ return skb;
+}
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Build the tag after the MAC Source Address */
+ return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
+}
+
+
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ struct sk_buff *nskb;
+
+ /* skb->data points to the EtherType, the tag is right before it */
+ nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ if (!nskb)
+ return nskb;
+
/* Move the Ethernet DA and SA */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - BRCM_TAG_LEN,
+ memmove(nskb->data - ETH_HLEN,
+ nskb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- return skb;
+ return nskb;
}
const struct dsa_device_ops brcm_netdev_ops = {
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
};
+#endif
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_xmit_ll(skb, dev, 0);
+}
+
+static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+}
+
+const struct dsa_device_ops brcm_prepend_netdev_ops = {
+ .xmit = brcm_tag_xmit_prepend,
+ .rcv = brcm_tag_rcv_prepend,
+};
+#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index dbbcdafed8c3..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -141,6 +141,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index f38a626b3a05..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -160,6 +160,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index e526c8967b98..548c00254c07 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -42,6 +42,10 @@
#define LAN9303_TAG_LEN 4
# define LAN9303_TAG_TX_USE_ALR BIT(3)
# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
+# define LAN9303_TAG_RX_IGMP BIT(3)
+# define LAN9303_TAG_RX_STP BIT(4)
+# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \
+ LAN9303_TAG_RX_STP)
/* Decide whether to transmit using ALR lookup, or transmit directly to
* port using tag. ALR learning is performed only when using ALR lookup.
@@ -88,9 +92,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+ struct packet_type *pt)
{
u16 *lan9303_tag;
+ u16 lan9303_tag1;
unsigned int source_port;
if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
@@ -112,7 +117,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
}
- source_port = ntohs(lan9303_tag[1]) & 0x3;
+ lan9303_tag1 = ntohs(lan9303_tag[1]);
+ source_port = lan9303_tag1 & 0x3;
skb->dev = dsa_master_find_slave(dev, 0, source_port);
if (!skb->dev) {
@@ -126,8 +132,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
skb_pull_rcsum(skb, 2 + 2);
memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
2 * ETH_ALEN);
- skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN,
- eth_stp_addr);
+ skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
return skb;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 37db44f60718..4dd95cdd8070 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
+ if (err == -ENOSPC)
err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b00e4a43b4dc..d57aa64fa7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -432,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
case -EINPROGRESS:
goto error;
- case -EBUSY:
+ case -ENOSPC:
err = NET_XMIT_DROP;
break;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 589caaa90613..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -710,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
bool ecn_ca = false;
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
val = nla_get_u32(nla);
}
@@ -1030,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7f3ef5c287a1..26a3d0315728 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work)
local_bh_disable();
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ for (i = READ_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
i = (i + 1) & (INETFRAGS_HASHSZ - 1);
if (evicted > INETFRAGS_EVICT_MAX)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a4bab81f1462..c690cd0d9b3f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,7 +9,6 @@
*/
#include <linux/kernel.h>
-#include <linux/kmemcheck.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <net/inet_hashtables.h>
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
if (tw) {
const struct inet_sock *inet = inet_sk(sk);
- kmemcheck_annotate_bitfield(tw, flags);
-
tw->tw_dr = dr;
/* Give us an identity. */
tw->tw_daddr = inet->inet_daddr;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c105a315b1a3..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -773,20 +773,46 @@ free_skb:
return NETDEV_TX_OK;
}
+static void ipgre_link_update(struct net_device *dev, bool set_mtu)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int len;
+
+ len = tunnel->tun_hlen;
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ len = tunnel->tun_hlen - len;
+ tunnel->hlen = tunnel->hlen + len;
+
+ dev->needed_headroom = dev->needed_headroom + len;
+ if (set_mtu)
+ dev->mtu = max_t(int, dev->mtu - len, 68);
+
+ if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ tunnel->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err;
struct ip_tunnel_parm p;
+ int err;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
return -EFAULT;
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
+ ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
+
p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
@@ -794,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
if (err)
return err;
+ if (cmd == SIOCCHGTUNNEL) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, true);
+ }
+
p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
return -EFAULT;
+
return 0;
}
@@ -1307,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
+ struct ip_tunnel_parm p;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1322,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_changelink(dev, tb, &p, fwmark);
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
}
static size_t ipgre_get_size(const struct net_device *dev)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e2770fd00be..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct arpt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
+ }
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 39286e543ee6..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ipt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i; /* macro does multi eval of i */
+ }
+
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fe374da4bc13..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv4_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a046c298413a..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -165,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return NF_ACCEPT;
}
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -177,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
- NULL, "nf_ct_icmp: short packet ");
+ icmp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: bad HW ICMP checksum ");
+ icmp_error_log(skb, net, pf, "bad hw icmp checksum");
return -NF_ACCEPT;
}
@@ -199,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: invalid ICMP type ");
+ icmp_error_log(skb, net, pf, "invalid icmp type");
return -NF_ACCEPT;
}
@@ -259,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmp_nlattr_tuple_size(void)
+static unsigned int icmp_nlattr_tuple_size(void)
{
- return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 127153f1ed8a..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
- SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bc40bd411196..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs)
{
u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(*p_tstamp);
+ u32 old = READ_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
u32 new, delta = 0;
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu) {
+ if (pmtu)
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
- }
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a82b44038308..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -201,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(ctl->data, struct net,
+ ipv4.tcp_congestion_control);
char val[TCP_CA_NAME_MAX];
struct ctl_table tbl = {
.data = val,
@@ -208,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
};
int ret;
- tcp_get_default_congestion_control(val);
+ tcp_get_default_congestion_control(net, val);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
- ret = tcp_set_default_congestion_control(val);
+ ret = tcp_set_default_congestion_control(net, val);
return ret;
}
@@ -441,34 +443,12 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
- .procname = "tcp_wmem",
- .data = &sysctl_tcp_wmem,
- .maxlen = sizeof(sysctl_tcp_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
- .procname = "tcp_rmem",
- .data = &sysctl_tcp_rmem,
- .maxlen = sizeof(sysctl_tcp_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
- {
- .procname = "tcp_congestion_control",
- .mode = 0644,
- .maxlen = TCP_CA_NAME_MAX,
- .proc_handler = proc_tcp_congestion_control,
- },
#ifdef CONFIG_NETLABEL
{
.procname = "cipso_cache_enable",
@@ -780,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
},
#endif
{
+ .procname = "tcp_congestion_control",
+ .data = &init_net.ipv4.tcp_congestion_control,
+ .mode = 0644,
+ .maxlen = TCP_CA_NAME_MAX,
+ .proc_handler = proc_tcp_congestion_control,
+ },
+ {
.procname = "tcp_keepalive_time",
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,
.maxlen = sizeof(int),
@@ -1164,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero,
.extra2 = &thousand,
},
+ {
+ .procname = "tcp_wmem",
+ .data = &init_net.ipv4.sysctl_tcp_wmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
+ {
+ .procname = "tcp_rmem",
+ .data = &init_net.ipv4.sysctl_tcp_rmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c4cb19ed4628..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly;
-int sysctl_tcp_wmem[3] __read_mostly;
-int sysctl_tcp_rmem[3] __read_mostly;
-
EXPORT_SYMBOL(sysctl_tcp_mem);
-EXPORT_SYMBOL(sysctl_tcp_rmem);
-EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
@@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+ sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
}
@@ -2514,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
- if (sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
if (opt.opt_val != 0)
@@ -2984,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
- info->tcpi_fackets = tp->fackets_out;
now = tcp_jiffies32;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -3636,13 +3628,13 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit);
- sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_wmem[1] = 16*1024;
- sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
+ init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
+ init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
- sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_rmem[1] = 87380;
- sysctl_tcp_rmem[2] = max(87380, max_rshare);
+ init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
+ init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2f26124fd160..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
}
/* Must be called with rcu lock held */
-static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
+ const char *name)
{
- const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+ struct tcp_congestion_ops *ca = tcp_ca_find(name);
+
#ifdef CONFIG_MODULES
if (!ca && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
might_sleep();
rcu_read_lock();
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(net, name);
if (ca) {
key = ca->key;
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
/* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk)
{
+ struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_congestion_ops *ca;
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (likely(try_module_get(ca->owner))) {
- icsk->icsk_ca_ops = ca;
- goto out;
- }
- /* Fallback to next available. The last really
- * guaranteed fallback is Reno from this list.
- */
- }
-out:
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
+ if (unlikely(!try_module_get(ca->owner)))
+ ca = &tcp_reno;
+ icsk->icsk_ca_ops = ca;
rcu_read_unlock();
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
INET_ECN_xmit(sk);
else
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
}
/* Used by sysctl to change default congestion control */
-int tcp_set_default_congestion_control(const char *name)
+int tcp_set_default_congestion_control(struct net *net, const char *name)
{
struct tcp_congestion_ops *ca;
- int ret = -ENOENT;
-
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
-#ifdef CONFIG_MODULES
- if (!ca && capable(CAP_NET_ADMIN)) {
- spin_unlock(&tcp_cong_list_lock);
+ const struct tcp_congestion_ops *prev;
+ int ret;
- request_module("tcp_%s", name);
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
- }
-#endif
+ rcu_read_lock();
+ ca = tcp_ca_find_autoload(net, name);
+ if (!ca) {
+ ret = -ENOENT;
+ } else if (!try_module_get(ca->owner)) {
+ ret = -EBUSY;
+ } else {
+ prev = xchg(&net->ipv4.tcp_congestion_control, ca);
+ if (prev)
+ module_put(prev->owner);
- if (ca) {
- ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
- list_move(&ca->list, &tcp_cong_list);
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
ret = 0;
}
- spin_unlock(&tcp_cong_list_lock);
+ rcu_read_unlock();
return ret;
}
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
/* Set default value from kernel configuration at bootup */
static int __init tcp_congestion_default(void)
{
- return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+ return tcp_set_default_congestion_control(&init_net,
+ CONFIG_DEFAULT_TCP_CONG);
}
late_initcall(tcp_congestion_default);
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
}
/* Get current default congestion control */
-void tcp_get_default_congestion_control(char *name)
+void tcp_get_default_congestion_control(struct net *net, char *name)
{
- struct tcp_congestion_ops *ca;
- /* We will always have reno... */
- BUG_ON(list_empty(&tcp_cong_list));
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
strncpy(name, ca->name, TCP_CA_NAME_MAX);
rcu_read_unlock();
}
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
if (!load)
ca = tcp_ca_find(name);
else
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(sock_net(sk), name);
+
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
icsk->icsk_ca_setsockopt = 1;
goto out;
}
+
if (!ca) {
err = -ENOENT;
} else if (!load) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0ada8bfc2ebd..734cfc8ff76e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,7 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
-#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
- int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem)
- sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+ sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
}
/* 4. Try to fixup all. It is made immediately after connection enters
@@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
icsk->icsk_ack.quick = 0;
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
+ net->ipv4.sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
@@ -793,12 +795,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
- /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+ /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
* without any lock. We want to make sure compiler wont store
* intermediate values in this location.
*/
- ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
- sk->sk_max_pacing_rate);
+ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
+ sk->sk_max_pacing_rate));
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -840,18 +842,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
-/*
- * Packet counting of FACK is based on in-order assumptions, therefore TCP
- * disables it when reordering is detected
- */
-void tcp_disable_fack(struct tcp_sock *tp)
-{
- /* RFC3517 uses different metric in lost marker => reset on change */
- if (tcp_is_fack(tp))
- tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
-}
-
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
@@ -859,42 +849,39 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
tp->rack.dsack_seen = 1;
}
-static void tcp_update_reordering(struct sock *sk, const int metric,
- const int ts)
+/* It's reordering when higher sequence was delivered (i.e. sacked) before
+ * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ * distance is approximated in full-mss packet distance ("reordering").
+ */
+static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- int mib_idx;
+ const u32 mss = tp->mss_cache;
+ u32 fack, metric;
- if (WARN_ON_ONCE(metric < 0))
+ fack = tcp_highest_sack_seq(tp);
+ if (!before(low_seq, fack))
return;
- if (metric > tp->reordering) {
- tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
-
+ metric = fack - low_seq;
+ if ((metric > tp->reordering * mss) && mss) {
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
tp->reordering,
- tp->fackets_out,
+ 0,
tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
- tcp_disable_fack(tp);
+ tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->rack.reord = 1;
-
/* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk),
+ ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
/* This must be called before lost_out is incremented */
@@ -968,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
* 3. Loss detection event of two flavors:
* A. Scoreboard estimator decided the packet is lost.
* A'. Reno "three dupacks" marks head of queue lost.
- * A''. Its FACK modification, head until snd.fack is lost.
* B. SACK arrives sacking SND.NXT at the moment, when the
* segment was retransmitted.
* 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1111,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
+ u32 reord;
/* Timestamps for earliest and latest never-retransmitted segment
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
@@ -1188,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
- int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
- if (sacked & TCPCB_SACKED_ACKED)
- state->reord = min(fack_count, state->reord);
+ if ((sacked & TCPCB_SACKED_ACKED) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1222,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
* which was in hole. It is reordering.
*/
if (before(start_seq,
- tcp_highest_sack_seq(tp)))
- state->reord = min(fack_count,
- state->reord);
+ tcp_highest_sack_seq(tp)) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
+
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
if (state->first_sackt == 0)
@@ -1243,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
tp->sacked_out += pcount;
tp->delivered += pcount; /* Out-of-order packets delivered */
- fack_count += pcount;
-
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+ if (tp->lost_skb_hint &&
before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
-
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
}
/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1498,7 +1479,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
}
out:
- state->fack_count += pcount;
return prev;
noop:
@@ -1577,8 +1557,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
tcp_highest_sack_seq(tp)))
tcp_advance_highest_sack(sk, skb);
}
-
- state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
@@ -1589,7 +1567,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
{
struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
struct sk_buff *skb;
- int unack_bytes;
while (*p) {
parent = *p;
@@ -1602,12 +1579,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
p = &parent->rb_right;
continue;
}
-
- state->fack_count = 0;
- unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
- if (state->mss_now && unack_bytes > 0)
- state->fack_count = unack_bytes / state->mss_now;
-
return skb;
}
return NULL;
@@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int first_sack_index;
state->flag = 0;
- state->reord = tp->packets_out;
+ state->reord = tp->snd_nxt;
- if (!tp->sacked_out) {
- if (WARN_ON(tp->fackets_out))
- tp->fackets_out = 0;
+ if (!tp->sacked_out)
tcp_highest_sack_reset(sk);
- }
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
@@ -1743,7 +1711,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
state->mss_now = tcp_current_mss(sk);
- state->fack_count = 0;
skb = NULL;
i = 0;
@@ -1801,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
@@ -1816,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
}
skb = tcp_sacktag_skip(skb, sk, state, start_seq);
@@ -1836,9 +1801,8 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state->reord < tp->fackets_out) &&
- ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+ tcp_check_sack_reordering(sk, state->reord, 0);
tcp_verify_left_out(tp);
out:
@@ -1876,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_limit_reno_sacked(tp))
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+
+ if (!tcp_limit_reno_sacked(tp))
+ return;
+
+ tp->reordering = min_t(u32, tp->packets_out + addend,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1923,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
tp->undo_retrans = -1;
- tp->fackets_out = 0;
tp->sacked_out = 0;
}
@@ -1973,7 +1941,6 @@ void tcp_enter_loss(struct sock *sk)
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
- tp->fackets_out = 0;
}
tcp_clear_all_retrans_hints(tp);
@@ -2040,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
return false;
}
-static inline int tcp_fackets_out(const struct tcp_sock *tp)
-{
- return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
-}
-
/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
* counter when SACK is enabled (without SACK, sacked_out is used for
* that purpose).
*
- * Instead, with FACK TCP uses fackets_out that includes both SACKed
- * segments up to the highest received SACK block so far and holes in
- * between them.
- *
* With reordering, holes may still be in flight, so RFC3517 recovery
* uses pure sacked_out (total number of SACKed segments) even though
* it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2062,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
*/
static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
{
- return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+ return tp->sacked_out + 1;
}
-/* Linux NewReno/SACK/FACK/ECN state machine.
+/* Linux NewReno/SACK/ECN state machine.
* --------------------------------------
*
* "Open" Normal state, no dubious events, fast path.
@@ -2130,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
* dynamically measured and adjusted. This is implemented in
* tcp_rack_mark_lost.
*
- * FACK (Disabled by default. Subsumbed by RACK):
- * It is the simplest heuristics. As soon as we decided
- * that something is lost, we decide that _all_ not SACKed
- * packets until the most forward SACK are lost. I.e.
- * lost_out = fackets_out - sacked_out and left_out = fackets_out.
- * It is absolutely correct estimate, if network does not reorder
- * packets. And it loses any connection to reality when reordering
- * takes place. We use FACK by default until reordering
- * is suspected on the path to this destination.
- *
* If the receiver does not support SACK:
*
* NewReno (RFC6582): in Recovery we assume that one segment
@@ -2188,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
}
/* Detect loss in event "A" above by marking head of queue up as lost.
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * For non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
* has at least tp->reordering SACKed seqments above it; "packets" refers to
* the maximum SACKed segments to pass before reaching this limit.
@@ -2224,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
oldcnt = cnt;
- if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ if (tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
if (cnt > packets) {
- if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+ if (tcp_is_sack(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
(oldcnt >= packets))
break;
@@ -2260,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
if (tcp_is_reno(tp)) {
tcp_mark_head_lost(sk, 1, 1);
- } else if (tcp_is_fack(tp)) {
- int lost = tp->fackets_out - tp->reordering;
- if (lost <= 0)
- lost = 1;
- tcp_mark_head_lost(sk, lost, 0);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
@@ -2611,7 +2554,6 @@ void tcp_simple_retransmit(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk);
- u32 prior_lost = tp->lost_out;
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
@@ -2626,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk)
tcp_clear_retrans_hints_partial(tp);
- if (prior_lost == tp->lost_out)
+ if (!tp->lost_out)
return;
if (tcp_is_reno(tp))
@@ -2734,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
}
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && tcp_packet_delayed(tp)) {
/* Plain luck! Hole if filled with delayed
- * packet, rather than with a retransmit.
+ * packet, rather than with a retransmit. Check reordering.
*/
- tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+ tcp_check_sack_reordering(sk, prior_snd_una, 1);
/* We are getting evidence that the reordering degree is higher
* than we realized. If there are no retransmits out then we
@@ -2778,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
}
}
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return after(tcp_highest_sack_seq(tp),
+ tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2790,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- (tcp_fackets_out(tp) > tp->reordering));
+ tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (!tp->sacked_out && tp->fackets_out)
- tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2849,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked))
+ if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
- tcp_fackets_out(tp) > tp->reordering;
+ tcp_force_fast_retransmit(sk);
}
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
@@ -3016,7 +2964,7 @@ void tcp_rearm_rto(struct sock *sk)
/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
static void tcp_set_xmit_timer(struct sock *sk)
{
- if (!tcp_schedule_loss_probe(sk))
+ if (!tcp_schedule_loss_probe(sk, true))
tcp_rearm_rto(sk);
}
@@ -3063,15 +3011,15 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, int *acked,
+static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ u32 prior_snd_una,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
- u32 reord = tp->packets_out;
+ u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
@@ -3086,6 +3034,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ const u32 start_seq = scb->seq;
u8 sacked = scb->sacked;
u32 acked_pcount;
@@ -3116,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
- reord = min(pkts_acked, reord);
+ if (before(start_seq, reord))
+ reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
}
@@ -3194,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets && reord <= tp->fackets_out)
- tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ if (before(reord, prior_fack))
+ tcp_check_sack_reordering(sk, reord, 0);
- delta = tcp_is_fack(tp) ? pkts_acked :
- prior_sacked - tp->sacked_out;
+ delta = prior_sacked - tp->sacked_out;
tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
-
- tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3244,7 +3190,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
}
#endif
- *acked = pkts_acked;
return flag;
}
@@ -3553,12 +3498,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_fackets;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
- int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ u32 prior_fack;
sack_state.first_sackt = 0;
sack_state.rate = &rs;
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_retransmits = 0;
}
- prior_fackets = tp->fackets_out;
+ prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
tcp_rack_update_reo_wnd(sk, &rs);
@@ -3659,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3675,7 +3619,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3697,7 +3642,8 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -5706,9 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(tp);
-
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -6187,7 +6130,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
if (req) {
struct inet_request_sock *ireq = inet_rsk(req);
- kmemcheck_annotate_bitfield(ireq, flags);
ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0162c577bb9c..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2409,8 +2409,8 @@ struct proto tcp_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -2430,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
{
int cpu;
+ module_put(net->ipv4.tcp_congestion_control->owner);
+
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
free_percpu(net->ipv4.tcp_sk);
@@ -2509,12 +2511,26 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
-
+ if (net != &init_net) {
+ memcpy(net->ipv4.sysctl_tcp_rmem,
+ init_net.ipv4.sysctl_tcp_rmem,
+ sizeof(init_net.ipv4.sysctl_tcp_rmem));
+ memcpy(net->ipv4.sysctl_tcp_wmem,
+ init_net.ipv4.sysctl_tcp_wmem,
+ sizeof(init_net.ipv4.sysctl_tcp_wmem));
+ }
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+ /* Reno is always built in */
+ if (!net_eq(net, &init_net) &&
+ try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+ net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
+ else
+ net->ipv4.tcp_congestion_control = &tcp_reno;
+
return 0;
fail:
tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 9d5ddebfd831..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -470,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
- if (val && tp->reordering != val) {
- tcp_disable_fack(tp);
+ if (val && tp->reordering != val)
tp->reordering = val;
- }
crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4bb86580decd..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -475,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->packets_out = 0;
newtp->retrans_out = 0;
newtp->sacked_out = 0;
- newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
newtp->tlp_high_seq = 0;
newtp->lsndtime = tcp_jiffies32;
@@ -509,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
- if (sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(newtp);
- }
+ newtp->rx_opt.sack_ok = ireq->sack_ok;
newtp->window_clamp = req->rsk_window_clamp;
newtp->rcv_ssthresh = req->rsk_rcv_wnd;
newtp->rcv_wnd = req->rsk_rcv_wnd;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
* is freed by GSO engine
*/
if (copy_destructor) {
+ int delta;
+
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
- refcount_add(sum_truesize - gso_skb->truesize,
- &skb->sk->sk_wmem_alloc);
+ delta = sum_truesize - gso_skb->truesize;
+ /* In some pathological cases, delta can be negative.
+ * We need to either use refcount_add() or refcount_sub_and_test()
+ */
+ if (likely(delta >= 0))
+ refcount_add(delta, &skb->sk->sk_wmem_alloc);
+ else
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a9d917e4dad5..a4d214c7b506 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -1218,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
}
}
-/* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not.
- */
-static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
- int decr)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->sacked_out || tcp_is_reno(tp))
- return;
-
- if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
- tp->fackets_out -= decr;
-}
-
/* Pcount in the middle of the write queue got changed, we need to do various
* tweaks to fix counters
*/
@@ -1253,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
if (tcp_is_reno(tp) && decr > 0)
tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
- tcp_adjust_fackets_out(sk, skb, decr);
-
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
@@ -1737,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
{
u32 bytes, segs;
- bytes = min(sk->sk_pacing_rate >> 10,
+ bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
@@ -1978,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
+ win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -2215,7 +2198,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
{
unsigned int limit;
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
limit = min_t(u32, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
@@ -2408,7 +2391,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
- tcp_schedule_loss_probe(sk);
+ tcp_schedule_loss_probe(sk, false);
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
@@ -2416,7 +2399,7 @@ repair:
return !tp->packets_out && !tcp_write_queue_empty(sk);
}
-bool tcp_schedule_loss_probe(struct sock *sk)
+bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2457,7 +2440,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
}
/* If the RTO formula yields an earlier time, then use that time. */
- rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
+ rto_delta_us = advancing_rto ?
+ jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
+ tcp_rto_delta_us(sk); /* How far in future is RTO? */
if (rto_delta_us > 0)
timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
@@ -2961,9 +2946,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
* retransmitted data is acknowledged. It tries to continue
* resending the rest of the retransmit queue, until either
* we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a6699af05539..e4ff25c947c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1852,7 +1852,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
/* if we're overly short, let UDP handle it */
- encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ encap_rcv = READ_ONCE(up->encap_rcv);
if (encap_rcv) {
int ret;
@@ -2297,7 +2297,7 @@ void udp_destroy_sock(struct sock *sk)
unlock_sock_fast(sk, slow);
if (static_key_false(&udp_encap_needed) && up->encap_type) {
void (*encap_destroy)(struct sock *sk);
- encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ encap_destroy = READ_ONCE(up->encap_destroy);
if (encap_destroy)
encap_destroy(sk);
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 66d8c3d912fd..a0ae1c9d37df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.proxy_ndp = 0,
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
- .accept_dad = 1,
+ .accept_dad = 0,
.suppress_frag_ndisc = 1,
.accept_ra_mtu = 1,
.stable_secret = {
@@ -1267,7 +1267,9 @@ out:
in6_ifa_put(ifp);
}
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
+ struct inet6_ifaddr *ift,
+ bool block)
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
@@ -1371,7 +1373,7 @@ retry:
ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft, true, NULL);
+ tmp_valid_lft, tmp_prefered_lft, block, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -1956,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (ifpub) {
in6_ifa_hold(ifpub);
spin_unlock_bh(&ifp->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
} else {
spin_unlock_bh(&ifp->lock);
@@ -2456,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
- ipv6_create_tempaddr(ifp, NULL);
+ ipv6_create_tempaddr(ifp, NULL, false);
} else {
read_unlock_bh(&idev->lock);
}
@@ -4351,7 +4353,7 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
goto restart;
@@ -5057,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
+ array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
}
static inline size_t inet6_ifla6_size(void)
@@ -5984,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
}
static int minus_one = -1;
+static const int zero = 0;
static const int one = 1;
static const int two_five_five = 255;
@@ -6355,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_disable_policy,
},
{
+ .procname = "ndisc_tclass",
+ .data = &ipv6_devconf.ndisc_tclass,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&zero,
+ .extra2 = (void *)&two_five_five,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 37bb33fbc742..78c974391567 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -444,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
+ if (err == -ENOSPC)
err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 4000b71bfdc5..a902ff8f59be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -396,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
case -EINPROGRESS:
goto error;
- case -EBUSY:
+ case -ENOSPC:
err = NET_XMIT_DROP;
break;
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
};
};
-enum {
- ILA_ATYPE_IID = 0,
- ILA_ATYPE_LUID,
- ILA_ATYPE_VIRT_V4,
- ILA_ATYPE_VIRT_UNI_V6,
- ILA_ATYPE_VIRT_MULTI_V6,
- ILA_ATYPE_RSVD_1,
- ILA_ATYPE_RSVD_2,
- ILA_ATYPE_RSVD_3,
-};
-
#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
struct ila_locator locator_match;
__wsum csum_diff;
u8 csum_mode;
+ u8 ident_type;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
#include <uapi/linux/ila.h>
#include "ila.h"
-static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+void ila_init_saved_csum(struct ila_params *p)
{
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+ if (!p->locator_match.v64)
+ return;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
+}
+
+static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
+{
if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&iaddr->loc,
- (__be32 *)&p->locator);
+ return compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)&iaddr->loc);
}
-static void ila_csum_do_neutral(struct ila_addr *iaddr,
- struct ila_params *p)
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
+}
+
+static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
+ struct ila_params *p)
{
__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff, fval;
- /* Check if checksum adjust value has been cached */
- if (p->locator_match.v64) {
- diff = p->csum_diff;
- } else {
- diff = compute_csum_diff8((__be32 *)&p->locator,
- (__be32 *)iaddr);
- }
+ diff = get_csum_diff_iaddr(iaddr, p);
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
iaddr->ident.csum_neutral ^= 1;
}
-static void ila_csum_adjust_transport(struct sk_buff *skb,
+static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
struct ila_params *p)
{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ diff = get_csum_diff_iaddr(iaddr, p);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
+{
size_t nhoff = sizeof(struct ipv6hdr);
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum diff;
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
}
break;
}
-
- /* Now change destination address */
- iaddr->loc = p->locator;
}
void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
- bool set_csum_neutral)
+ bool sir2ila)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- /* First deal with the transport checksum */
- if (ila_csum_neutral_set(iaddr->ident)) {
- /* C-bit is set in the locator indicating that this
- * is a locator being translated to a SIR address.
- * Perform (receiver) checksum-neutral translation.
- */
- if (!set_csum_neutral)
- ila_csum_do_neutral(iaddr, p);
- } else {
- switch (p->csum_mode) {
- case ILA_CSUM_ADJUST_TRANSPORT:
- ila_csum_adjust_transport(skb, p);
- break;
- case ILA_CSUM_NEUTRAL_MAP:
- ila_csum_do_neutral(iaddr, p);
- break;
- case ILA_CSUM_NO_ACTION:
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ if (sir2ila) {
+ if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
+ /* Checksum flag should never be
+ * set in a formatted SIR address.
+ */
+ break;
+ }
+ } else if (!ila_csum_neutral_set(iaddr->ident)) {
+ /* ILA to SIR translation and C-bit isn't
+ * set so we're good.
+ */
break;
}
+ ila_csum_do_neutral_fmt(iaddr, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP_AUTO:
+ ila_csum_do_neutral_nofmt(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
}
/* Now change destination address */
iaddr->loc = p->locator;
}
-void ila_init_saved_csum(struct ila_params *p)
-{
- if (!p->locator_match.v64)
- return;
-
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator,
- (__be32 *)&p->locator_match);
-}
-
static int __init ila_init(void)
{
int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 696281b4bca2..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -20,6 +20,7 @@ struct ila_lwt {
struct ila_params p;
struct dst_cache dst_cache;
u32 connected : 1;
+ u32 lwt_output : 1;
};
static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -45,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
- true);
+ if (ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
/* Already have a next hop address in route, no need for
@@ -98,11 +101,15 @@ drop:
static int ila_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
+ if (!ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(dst->lwtstate),
+ false);
return dst->lwtstate->orig_input(skb);
@@ -114,6 +121,8 @@ drop:
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+ [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
};
static int ila_build_state(struct nlattr *nla,
@@ -127,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
+ u8 ident_type = ILA_ATYPE_USE_FORMAT;
+ u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
+ u8 csum_mode = ILA_CSUM_NO_ACTION;
+ bool lwt_output = true;
+ u8 eff_ident_type;
int ret;
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
- /* Need to have full locator and at least type field
- * included in destination
- */
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- }
iaddr = (struct ila_addr *)&cfg6->fc_dst;
- if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
- /* Don't allow translation for a non-ILA address or checksum
- * neutral flag to be set.
+ if (tb[ILA_ATTR_IDENT_TYPE])
+ ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
+
+ if (ident_type == ILA_ATYPE_USE_FORMAT) {
+ /* Infer identifier type from type field in formatted
+ * identifier.
*/
+
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ eff_ident_type = iaddr->ident.type;
+ } else {
+ eff_ident_type = ident_type;
+ }
+
+ switch (eff_ident_type) {
+ case ILA_ATYPE_IID:
+ /* Don't allow ILA for IID type */
+ return -EINVAL;
+ case ILA_ATYPE_LUID:
+ break;
+ case ILA_ATYPE_VIRT_V4:
+ case ILA_ATYPE_VIRT_UNI_V6:
+ case ILA_ATYPE_VIRT_MULTI_V6:
+ case ILA_ATYPE_NONLOCAL_ADDR:
+ /* These ILA formats are not supported yet. */
+ default:
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
- if (ret < 0)
- return ret;
+ if (tb[ILA_ATTR_HOOK_TYPE])
+ hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
+
+ switch (hook_type) {
+ case ILA_HOOK_ROUTE_OUTPUT:
+ lwt_output = true;
+ break;
+ case ILA_HOOK_ROUTE_INPUT:
+ lwt_output = false;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (!tb[ILA_ATTR_LOCATOR])
+ if (tb[ILA_ATTR_CSUM_MODE])
+ csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
+ ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation if checksum neutral bit is
+ * configured and it's set in the SIR address.
+ */
return -EINVAL;
+ }
newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
@@ -166,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
return ret;
}
+ ilwt->lwt_output = !!lwt_output;
+
p = ila_params_lwtunnel(newts);
+ p->csum_mode = csum_mode;
+ p->ident_type = ident_type;
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
/* Precompute checksum difference for translation since we
* know both the old locator and the new one.
*/
p->locator_match = iaddr->loc;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
-
- if (tb[ILA_ATTR_CSUM_MODE])
- p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
ila_init_saved_csum(p);
@@ -203,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
ILA_ATTR_PAD))
goto nla_put_failure;
+
if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
+ ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
+ ILA_HOOK_ROUTE_INPUT))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -220,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
0;
}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 3123b9de91b5..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -121,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -138,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[ILA_ATTR_CSUM_MODE])
xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+ else
+ xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+
+ if (info->attrs[ILA_ATTR_IDENT_TYPE])
+ xp->ip.ident_type = nla_get_u8(
+ info->attrs[ILA_ATTR_IDENT_TYPE]);
+ else
+ xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -198,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
static unsigned int
ila_nf_input(void *priv,
@@ -396,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
(__force u64)ila->xp.ip.locator_match.v64,
ILA_ATTR_PAD) ||
nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
+ nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
+ nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
return -1;
return 0;
@@ -607,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -617,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
/* Assumes skb contains a valid IPv6 header that is pulled */
- if (!ila_addr_is_ila(iaddr)) {
- /* Type indicates this is not an ILA address */
- return 0;
- }
+ /* No check here that ILA type in the mapping matches what is in the
+ * address. We assume that whatever sender gaves us can be translated.
+ * The checksum mode however is relevant.
+ */
rcu_read_lock();
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 15535ee327c5..9f2e73c71768 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -47,7 +47,7 @@ static atomic_t fl_size = ATOMIC_INIT(0);
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(unsigned long dummy);
-static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
+static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
/* FL hash table lock: it protects only of GC */
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3e10c51e7e0c..4cfd8e0696fe 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ struct net *net = dev_net(skb->dev);
const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
int grehlen = sizeof(*greh);
@@ -402,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
switch (type) {
- __u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu;
+ __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -435,12 +435,11 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
return;
case ICMPV6_PKT_TOOBIG:
- mtu = be32_to_cpu(info) - offset - t->tun_hlen;
- if (t->dev->type == ARPHRD_ETHER)
- mtu -= ETH_HLEN;
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- t->dev->mtu = mtu;
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+ return;
+ case NDISC_REDIRECT:
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
return;
}
@@ -461,7 +460,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
return PACKET_RCVD;
}
@@ -503,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- struct dst_entry *dst = skb_dst(skb);
__be16 protocol;
if (dev->type == ARPHRD_ETHER)
@@ -522,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
- /* TooBig packet may have updated dst->dev's mtu */
- if (dst && dst_mtu(dst) > dst->dev->mtu)
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
-
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 439d65f7e094..3d3092adf1d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -471,15 +471,16 @@ static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
- struct ip6_tnl *t;
- int rel_msg = 0;
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ struct net *net = dev_net(skb->dev);
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
- u8 tproto;
__u32 rel_info = 0;
- __u16 len;
+ struct ip6_tnl *t;
int err = -ENOENT;
+ int rel_msg = 0;
+ u8 tproto;
+ __u16 len;
/* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further
@@ -490,16 +491,15 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
if (!t)
goto out;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto out;
err = 0;
switch (*type) {
- __u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu;
+ __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
+ ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+ sock_net_uid(net, NULL));
mtu = *info - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- t->dev->mtu = mtu;
-
len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
if (len > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ case NDISC_REDIRECT:
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
+ break;
}
*type = rel_type;
@@ -559,13 +563,12 @@ static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
- u8 rel_type = type;
- u8 rel_code = code;
__u32 rel_info = ntohl(info);
- int err;
- struct sk_buff *skb2;
const struct iphdr *eiph;
+ struct sk_buff *skb2;
+ int err, rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
struct rtable *rt;
struct flowi4 fl4;
@@ -590,10 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
- case NDISC_REDIRECT:
- rel_type = ICMP_REDIRECT;
- rel_code = ICMP_REDIR_HOST;
- /* fall through */
default:
return 0;
}
@@ -612,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
- rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->saddr, 0,
- 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+ 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt))
goto out;
skb2->dev = rt->dst.dev;
+ ip_rt_put(rt);
/* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->daddr, eiph->saddr,
- 0, 0,
- IPPROTO_IPIP,
- RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) ||
- rt->dst.dev->type != ARPHRD_TUNNEL) {
+ eiph->daddr, eiph->saddr, 0, 0,
+ IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
}
skb_dst_set(skb2, &rt->dst);
} else {
- ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@ -650,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+ rel_info);
}
- if (rel_type == ICMP_REDIRECT)
- skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -666,11 +657,10 @@ static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
+ __u32 rel_info = ntohl(info);
+ int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
- __u32 rel_info = ntohl(info);
- int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
@@ -901,7 +891,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
if (t) {
- u8 tproto = ACCESS_ONCE(t->parms.proto);
+ u8 tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto drop;
@@ -1236,7 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
@@ -1306,7 +1296,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 59fad81e5f7a..9c24b85949c1 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk)
struct net *net = sock_net(sk);
struct mr6_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == mrt->mroute6_sk) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f9c3ffe04382..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -427,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
int hop_limit, int len)
{
struct ipv6hdr *hdr;
+ struct inet6_dev *idev;
+ unsigned tclass;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+ tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, 0, 0);
+ ip6_flow_hdr(hdr, tclass, 0);
hdr->payload_len = htons(len);
hdr->nexthdr = IPPROTO_ICMPV6;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 01bd3ee5ebc6..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ip6t_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
+ }
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fe01dc953c56..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv6_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a9e1fd1a8536..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -176,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
static int
icmpv6_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
@@ -187,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: short packet ");
+ icmpv6_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: ICMPv6 checksum failed ");
+ icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
return -NF_ACCEPT;
}
@@ -258,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmpv6_nlattr_tuple_size(void)
+static unsigned int icmpv6_nlattr_tuple_size(void)
{
- return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a338bbc33cf3..4a7e5ffa5108 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
return id;
}
-/* This function exists only for tap drivers that must support broken
- * clients requesting UFO without specifying an IPv6 fragment ID.
- *
- * This is similar to ipv6_select_ident() but we use an independent hash
- * seed to limit information leakage.
- *
- * The network header must be set before calling this.
- */
-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
-{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
- struct in6_addr buf[2];
- struct in6_addr *addrs;
- u32 id;
-
- addrs = skb_header_pointer(skb,
- skb_network_offset(skb) +
- offsetof(struct ipv6hdr, saddr),
- sizeof(buf), buf);
- if (!addrs)
- return;
-
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
- skb_shinfo(skb)->ip6_frag_id = htonl(id);
-}
-EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-
__be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 70d9659fc1e9..05eb7bc36156 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2378,6 +2378,7 @@ out:
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
@@ -2403,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
goto err;
} else {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0e2529958b52..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = {
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 40d7234c27b9..3f30fa313bf2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -606,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
/* if we're overly short, let UDP handle it */
- encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ encap_rcv = READ_ONCE(up->encap_rcv);
if (encap_rcv) {
int ret;
@@ -1432,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
void (*encap_destroy)(struct sock *sk);
- encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ encap_destroy = READ_ONCE(up->encap_destroy);
if (encap_destroy)
encap_destroy(sk);
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4e438bc7ee87..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net)
static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ unsigned int i;
+
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
+
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i]));
}
static struct pernet_operations xfrm6_tunnel_net_ops = {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a00d607e7224..3dffb892d52c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3845,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net)
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
pfkey_exit_proc(net);
- BUG_ON(!hlist_empty(&net_pfkey->table));
+ WARN_ON(!hlist_empty(&net_pfkey->table));
}
static struct pernet_operations pfkey_net_ops = {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 7c8d1eb757a5..115918ad8eca 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1246,8 +1246,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
list_del_rcu(&tunnel->list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- l2tp_tunnel_closeall(tunnel);
-
tunnel->sock = NULL;
l2tp_tunnel_dec_refcount(tunnel);
@@ -1835,6 +1833,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
+ int hash;
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
@@ -1844,6 +1843,9 @@ static __net_exit void l2tp_exit_net(struct net *net)
flush_workqueue(l2tp_wq);
rcu_barrier();
+
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+ WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 3e2dec1fb0f5..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,7 +41,6 @@
/* via netdev_priv() */
struct l2tp_eth {
- struct sock *tunnel_sock;
struct l2tp_session *session;
atomic_long_t tx_bytes;
atomic_long_t tx_packets;
@@ -313,7 +312,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
priv = netdev_priv(dev);
priv->session = session;
- priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
session->session_close = l2tp_eth_delete;
#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 6dbe450400a2..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct iphdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -178,24 +179,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
-
- read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
- inet_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
- goto discard;
- }
+ iph = (struct iphdr *)skb_network_header(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip_lock);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
+ tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 59ebb6e4f735..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct ipv6hdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -192,24 +193,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
- inet6_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
- goto discard;
- }
+ iph = ipv6_hdr(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip6_lock);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+ inet6_iif(skb), tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5f5c78b632d0..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -127,8 +127,6 @@ struct pppol2tp_session {
* PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
struct rcu_head rcu; /* For asynchronous release */
- struct sock *tunnel_sock; /* Pointer to the tunnel UDP
- * socket */
int flags; /* accessed by PPPIOCGFLAGS.
* Unused. */
};
@@ -295,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
int error;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen;
error = -ENOTCONN;
@@ -308,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
if (session == NULL)
goto error;
- ps = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto error_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
@@ -322,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
0, GFP_KERNEL);
if (!skb)
- goto error_put_sess_tun;
+ goto error_put_sess;
/* Reserve space for headers. */
skb_reserve(skb, NET_SKB_PAD);
@@ -340,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
if (error < 0) {
kfree_skb(skb);
- goto error_put_sess_tun;
+ goto error_put_sess;
}
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(ps->tunnel_sock);
sock_put(sk);
return total_len;
-error_put_sess_tun:
- sock_put(ps->tunnel_sock);
error_put_sess:
sock_put(sk);
error:
@@ -377,10 +368,8 @@ error:
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
struct sock *sk = (struct sock *) chan->private;
- struct sock *sk_tun;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen, headroom;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -391,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
if (session == NULL)
goto abort;
- ps = l2tp_session_priv(session);
- sk_tun = ps->tunnel_sock;
- if (sk_tun == NULL)
- goto abort_put_sess;
- tunnel = l2tp_sock_to_tunnel(sk_tun);
- if (tunnel == NULL)
- goto abort_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
headroom = NET_SKB_PAD +
@@ -406,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
session->hdr_len + /* L2TP header */
2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
if (skb_cow_head(skb, headroom))
- goto abort_put_sess_tun;
+ goto abort_put_sess;
/* Setup PPP header */
__skb_push(skb, 2);
@@ -417,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(sk_tun);
sock_put(sk);
+
return 1;
-abort_put_sess_tun:
- sock_put(sk_tun);
abort_put_sess:
sock_put(sk);
abort:
@@ -609,7 +590,6 @@ static void pppol2tp_session_init(struct l2tp_session *session)
ps = l2tp_session_priv(session);
mutex_init(&ps->sk_lock);
- ps->tunnel_sock = session->tunnel->sock;
ps->owner = current->pid;
/* If PMTU discovery was enabled, use the MTU that was discovered */
@@ -760,13 +740,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = -EEXIST;
goto end;
}
-
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock) {
- mutex_unlock(&ps->sk_lock);
- error = -EEXIST;
- goto end;
- }
} else {
/* Default MTU must allow space for UDP/L2TP/PPP headers */
cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -919,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
goto end;
pls = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
+ tunnel = session->tunnel;
inet = inet_sk(tunnel->sock);
if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -1001,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
*usockaddr_len = len;
error = 0;
- sock_put(pls->tunnel_sock);
-end_put_sess:
sock_put(sk);
end:
return error;
@@ -1241,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int err;
if (!sk)
@@ -1265,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
/* Special case: if session's session_id is zero, treat ioctl as a
* tunnel ioctl
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
- sock_put(ps->tunnel_sock);
goto end_put_sess;
}
@@ -1400,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int val;
int err;
@@ -1425,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
/* Special case: if session_id == 0x0000, treat as operation on tunnel
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
- sock_put(ps->tunnel_sock);
- } else
+ } else {
err = pppol2tp_session_setsockopt(sk, session, optname, val);
+ }
-end_put_sess:
sock_put(sk);
end:
return err;
@@ -1526,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct l2tp_tunnel *tunnel;
int val, len;
int err;
- struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
return -EINVAL;
@@ -1550,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel */
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
- sock_put(ps->tunnel_sock);
if (err)
goto end_put_sess;
} else {
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index dd3e83328ad5..82cb93f66b9b 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
*/
rcv = rcu_dereference(sap->rcv_func);
dest = llc_pdu_type(skb);
- sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+ sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL;
if (unlikely(!sap_handler)) {
if (rcv)
rcv(skb, dev, pt, orig_dev);
@@ -214,7 +214,7 @@ drop:
kfree_skb(skb);
goto out;
handle_station:
- sta_handler = ACCESS_ONCE(llc_station_handler);
+ sta_handler = READ_ONCE(llc_station_handler);
if (!sta_handler)
goto drop;
sta_handler(skb);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9673e157bf8f..a3060e55122c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2017,7 +2017,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index f135938bf781..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
+ netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
+ nc->id, data & 0x1 ? "up" : "down");
+
chained = !list_empty(&nc->link);
state = nc->state;
spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
ncm = &nc->modes[NCSI_MODE_LINK];
hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
ncm->data[3] = ntohl(hncdsc->status);
+ netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
+ nc->id, ncm->data[3] & 0x3 ? "up" : "down");
if (!list_empty(&nc->link) ||
nc->state != NCSI_CHANNEL_ACTIVE) {
spin_unlock_irqrestore(&nc->lock, flags);
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
}
ret = ncsi_validate_aen_pkt(h, nah->payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
+ h->type);
goto out;
+ }
ret = nah->handler(ndp, h);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for AEN type 0x%x returned %d\n",
+ h->type, ret);
out:
consume_skb(skb);
return ret;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 47baf914eec2..a2b904a718c6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -229,6 +229,8 @@ static void ncsi_channel_monitor(unsigned long data)
case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
break;
default:
+ netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
+ nc->id);
if (!(ndp->flags & NCSI_DEV_HWA)) {
ncsi_report_link(ndp, true);
ndp->flags |= NCSI_DEV_RESHUFFLE;
@@ -682,7 +684,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
if (!data) {
netdev_err(ndp->ndev.dev,
- "ncsi: failed to retrieve filter %d\n", index);
+ "NCSI: failed to retrieve filter %d\n", index);
/* Set the VLAN id to 0 - this will still disable the entry in
* the filter table, but we won't know what it was.
*/
@@ -692,7 +694,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
}
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: removed vlan tag %u at index %d\n",
+ "NCSI: removed vlan tag %u at index %d\n",
vid, index + 1);
ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
@@ -718,7 +720,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
if (index < 0) {
/* New tag to add */
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: new vlan id to set: %u\n",
+ "NCSI: new vlan id to set: %u\n",
vlan->vid);
break;
}
@@ -745,7 +747,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
}
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: set vid %u in packet, index %u\n",
+ "NCSI: set vid %u in packet, index %u\n",
vlan->vid, index + 1);
nca->type = NCSI_PKT_CMD_SVF;
nca->words[1] = vlan->vid;
@@ -784,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_SP\n");
goto error;
+ }
nd->state = ncsi_dev_state_config_cis;
break;
@@ -797,8 +802,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = nc->id;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_CIS\n");
goto error;
+ }
nd->state = ncsi_dev_state_config_clear_vids;
break;
@@ -895,10 +903,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
}
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD %x\n",
+ nca.type);
goto error;
+ }
break;
case ncsi_dev_state_config_done:
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: channel %u config done\n", nc->id);
spin_lock_irqsave(&nc->lock, flags);
if (nc->reconfigure_needed) {
/* This channel's configuration has been updated
@@ -925,6 +939,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
} else {
hot_nc = NULL;
nc->state = NCSI_CHANNEL_INACTIVE;
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: channel %u link down after config\n",
+ nc->id);
}
spin_unlock_irqrestore(&nc->lock, flags);
@@ -937,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
ncsi_process_next_channel(ndp);
break;
default:
- netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
- nd->state);
+ netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
+ nd->state);
}
return;
@@ -990,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
}
if (!found) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: No channel found with link\n");
ncsi_report_link(ndp, true);
return -ENODEV;
}
+ ncm = &found->modes[NCSI_MODE_LINK];
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: Channel %u added to queue (link %s)\n",
+ found->id, ncm->data[2] & 0x1 ? "up" : "down");
+
out:
spin_lock_irqsave(&ndp->lock, flags);
list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -1055,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
/* We can have no channels in extremely case */
if (list_empty(&ndp->channel_queue)) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: No available channels for HWA\n");
ncsi_report_link(ndp, false);
return -ENOENT;
}
@@ -1223,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
return;
error:
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during probe\n",
+ nca.type);
ncsi_report_link(ndp, true);
}
@@ -1276,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
switch (old_state) {
case NCSI_CHANNEL_INACTIVE:
ndp->ndev.state = ncsi_dev_state_config;
+ netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
+ nc->id);
ncsi_configure_channel(ndp);
break;
case NCSI_CHANNEL_ACTIVE:
ndp->ndev.state = ncsi_dev_state_suspend;
+ netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
+ nc->id);
ncsi_suspend_channel(ndp);
break;
default:
@@ -1299,6 +1332,8 @@ out:
return ncsi_choose_active_channel(ndp);
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: No more channels to process\n");
ncsi_report_link(ndp, false);
return -ENODEV;
}
@@ -1390,7 +1425,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
ncsi_dev_state_config ||
!list_empty(&nc->link)) {
netdev_printk(KERN_DEBUG, nd->dev,
- "ncsi: channel %p marked dirty\n",
+ "NCSI: channel %p marked dirty\n",
nc);
nc->reconfigure_needed = true;
}
@@ -1410,7 +1445,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
spin_unlock_irqrestore(&ndp->lock, flags);
netdev_printk(KERN_DEBUG, nd->dev,
- "ncsi: kicked channel %p\n", nc);
+ "NCSI: kicked channel %p\n", nc);
n++;
}
}
@@ -1431,7 +1466,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
nd = ncsi_find_dev(dev);
if (!nd) {
- netdev_warn(dev, "ncsi: No net_device?\n");
+ netdev_warn(dev, "NCSI: No net_device?\n");
return 0;
}
@@ -1442,7 +1477,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
n_vids++;
if (vlan->vid == vid) {
netdev_printk(KERN_DEBUG, dev,
- "vid %u already registered\n", vid);
+ "NCSI: vid %u already registered\n", vid);
return 0;
}
}
@@ -1461,7 +1496,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
vlan->vid = vid;
list_add_rcu(&vlan->list, &ndp->vlan_vids);
- netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid);
+ netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
found = ncsi_kick_channels(ndp) != 0;
@@ -1481,7 +1516,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
nd = ncsi_find_dev(dev);
if (!nd) {
- netdev_warn(dev, "ncsi: no net_device?\n");
+ netdev_warn(dev, "NCSI: no net_device?\n");
return 0;
}
@@ -1491,14 +1526,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
if (vlan->vid == vid) {
netdev_printk(KERN_DEBUG, dev,
- "vid %u found, removing\n", vid);
+ "NCSI: vid %u found, removing\n", vid);
list_del_rcu(&vlan->list);
found = true;
kfree(vlan);
}
if (!found) {
- netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid);
+ netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
return -EINVAL;
}
@@ -1581,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
return 0;
}
- if (ndp->flags & NCSI_DEV_HWA)
+ if (ndp->flags & NCSI_DEV_HWA) {
+ netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
ret = ncsi_enable_hwa(ndp);
- else
+ } else {
ret = ncsi_choose_active_channel(ndp);
+ }
return ret;
}
@@ -1615,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
}
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
ncsi_report_link(ndp, true);
}
EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 927dad4759d1..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 0;
return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
/* Check if the AEN has been enabled */
ncm = &nc->modes[NCSI_MODE_AEN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to AEN configuration */
cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
ncm->enable = 0;
@@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
bitmap = &ncf->bitmap;
if (cmd->at_e & 0x1) {
- if (test_and_set_bit(cmd->index, bitmap))
- return -EBUSY;
+ set_bit(cmd->index, bitmap);
memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
} else {
- if (!test_and_clear_bit(cmd->index, bitmap))
- return -EBUSY;
-
+ clear_bit(cmd->index, bitmap);
memset(ncf->data + 6 * cmd->index, 0, 6);
}
@@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
/* Check if broadcast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
/* Check if broadcast filter isn't enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
ncm->enable = 0;
@@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
ncm->enable = 0;
@@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
/* Check if flow control has been enabled */
ncm = &nc->modes[NCSI_MODE_FC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to flow control mode */
cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -1032,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
if (payload < 0)
payload = ntohs(hdr->length);
ret = ncsi_validate_rsp_pkt(nr, payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for type 0x%x\n",
+ hdr->type);
goto out;
+ }
/* Process the packet */
ret = nrh->handler(nr);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for packet type 0x%x returned %d\n",
+ hdr->type, ret);
out:
ncsi_free_request(nr);
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index a2f19b9906e9..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
if (unlikely(tb[IPSET_ATTR_CIDR])) {
- u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (cidr != HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index c9b4e05ad940..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -454,7 +454,6 @@ static size_t
list_set_memsize(const struct list_set *map, size_t dsize)
{
struct set_elem *e;
- size_t memsize;
u32 n = 0;
rcu_read_lock();
@@ -462,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
n++;
rcu_read_unlock();
- memsize = sizeof(*map) + n * dsize;
-
- return memsize;
+ return (sizeof(*map) + n * dsize);
}
static int
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
+#ifdef E
+#undef E
+#endif
+
+#define PREFIXES_MAP \
+ E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+
#define E(a, b, c, d) \
{.ip6 = { \
htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
* just use prefixlen_netmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_netmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_netmask_map);
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
* just use prefixlen_hostmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_hostmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..3e053cb30070 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static void ip_vs_conn_expire(unsigned long data);
+static void ip_vs_conn_expire(struct timer_list *t);
/*
* Returns hash value for IPVS connection entry
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
ret = 0;
}
@@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
{
__ip_vs_conn_put(cp);
- ip_vs_conn_expire((unsigned long)cp);
+ ip_vs_conn_expire(&cp->timer);
}
/*
@@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
kmem_cache_free(ip_vs_conn_cachep, cp);
}
-static void ip_vs_conn_expire(unsigned long data)
+static void ip_vs_conn_expire(struct timer_list *t)
{
- struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct ip_vs_conn *cp = from_timer(cp, t, timer);
struct netns_ipvs *ipvs = cp->ipvs;
/*
@@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
}
INIT_HLIST_NODE(&cp->c_list);
- setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ timer_setup(&cp->timer, ip_vs_conn_expire, 0);
cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4f940d7eb2f7..fff213eacf2a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
unsigned int hash;
if (svc->flags & IP_VS_SVC_F_HASHED) {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
{
if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
- pr_err("%s(): request for unhash flagged, called from %pF\n",
+ pr_err("%s(): request for unhash flagged, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -1146,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
-static void ip_vs_dest_trash_expire(unsigned long data)
+static void ip_vs_dest_trash_expire(struct timer_list *t)
{
- struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+ struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
@@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
@@ -4019,8 +4023,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
- setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) ipvs);
+ timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
atomic_set(&ipvs->conn_out_counter, 0);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 457c6c193e13..489055091a9b 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
}
-static void estimation_timer(unsigned long arg)
+static void estimation_timer(struct timer_list *t)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
- struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
+ struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
@@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
- setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
+ timer_setup(&ipvs->est_timer, estimation_timer, 0);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b6aa4a970c6e..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -106,6 +106,7 @@ struct ip_vs_lblc_table {
struct rcu_head rcu_head;
struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
struct timer_list periodic_timer; /* collect stale entries */
+ struct ip_vs_service *svc; /* pointer back to service */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
int rover; /* rover for expire check */
@@ -294,10 +295,10 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
* of the table.
* The full expiration check is for this purpose now.
*/
-static void ip_vs_lblc_check_expire(unsigned long data)
+static void ip_vs_lblc_check_expire(struct timer_list *t)
{
- struct ip_vs_service *svc = (struct ip_vs_service *) data;
- struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer);
+ struct ip_vs_service *svc = tbl->svc;
unsigned long now = jiffies;
int goal;
int i, j;
@@ -369,12 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
tbl->rover = 0;
tbl->counter = 1;
tbl->dead = 0;
+ tbl->svc = svc;
/*
* Hook periodic timer for garbage collection
*/
- setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
- (unsigned long)svc);
+ timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c13ff575f9f7..84c57b62a588 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -278,6 +278,7 @@ struct ip_vs_lblcr_table {
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
struct timer_list periodic_timer; /* collect stale entries */
+ struct ip_vs_service *svc; /* pointer back to service */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
bool dead;
@@ -458,10 +459,10 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
* of the table.
* The full expiration check is for this purpose now.
*/
-static void ip_vs_lblcr_check_expire(unsigned long data)
+static void ip_vs_lblcr_check_expire(struct timer_list *t)
{
- struct ip_vs_service *svc = (struct ip_vs_service *) data;
- struct ip_vs_lblcr_table *tbl = svc->sched_data;
+ struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer);
+ struct ip_vs_service *svc = tbl->svc;
unsigned long now = jiffies;
int goal;
int i, j;
@@ -532,12 +533,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
tbl->rover = 0;
tbl->counter = 1;
tbl->dead = 0;
+ tbl->svc = svc;
/*
* Hook periodic timer for garbage collection
*/
- setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
- (unsigned long)svc);
+ timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 13f740875507..9ee71cb276d7 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -458,7 +458,7 @@ static inline bool in_persistence(struct ip_vs_conn *cp)
static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
struct ip_vs_conn *cp, int pkts)
{
- unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
+ unsigned long orig = READ_ONCE(cp->sync_endtime);
unsigned long now = jiffies;
unsigned long n = (now + cp->timeout) & ~3UL;
unsigned int sync_refresh_period;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01130392b7c0..85f643c1e227 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
gc_work->early_drop = false;
- queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+ queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
@@ -1419,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
-int nf_ct_port_nlattr_tuple_size(void)
+unsigned int nf_ct_port_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
@@ -1940,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
return 0;
}
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
{
unsigned int hashsize;
int rc;
@@ -2084,7 +2089,7 @@ int nf_conntrack_init_start(void)
goto err_proto;
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
+ queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
} field_t;
/* Bit Stream */
-typedef struct {
+struct bitstr {
unsigned char *buf;
unsigned char *beg;
unsigned char *end;
unsigned char *cur;
unsigned int bit;
-} bitstr_t;
+};
/* Tool Functions */
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
-static unsigned int get_len(bitstr_t *bs);
-static unsigned int get_bit(bitstr_t *bs);
-static unsigned int get_bits(bitstr_t *bs, unsigned int b);
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
-static unsigned int get_uint(bitstr_t *bs, int b);
+static unsigned int get_len(struct bitstr *bs);
+static unsigned int get_bit(struct bitstr *bs);
+static unsigned int get_bits(struct bitstr *bs, unsigned int b);
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
+static unsigned int get_uint(struct bitstr *bs, int b);
/* Decoder Functions */
-static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
/* Decoder Functions Vector */
-typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
+typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
static const decoder_t Decoders[] = {
decode_nul,
decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
* Functions
****************************************************************************/
/* Assume bs is aligned && v < 16384 */
-static unsigned int get_len(bitstr_t *bs)
+static unsigned int get_len(struct bitstr *bs)
{
unsigned int v;
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
}
/****************************************************************************/
-static unsigned int get_bit(bitstr_t *bs)
+static unsigned int get_bit(struct bitstr *bs)
{
unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
/****************************************************************************/
/* Assume b <= 8 */
-static unsigned int get_bits(bitstr_t *bs, unsigned int b)
+static unsigned int get_bits(struct bitstr *bs, unsigned int b)
{
unsigned int v, l;
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
/****************************************************************************/
/* Assume b <= 32 */
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
{
unsigned int v, l, shift, bytes;
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
/****************************************************************************
* Assume bs is aligned and sizeof(unsigned int) == 4
****************************************************************************/
-static unsigned int get_uint(bitstr_t *bs, int b)
+static unsigned int get_uint(struct bitstr *bs, int b)
{
unsigned int v = 0;
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
}
/****************************************************************************/
-static int decode_nul(bitstr_t *bs, const struct field_t *f,
+static int decode_nul(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bool(bitstr_t *bs, const struct field_t *f,
+static int decode_bool(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_oid(bitstr_t *bs, const struct field_t *f,
+static int decode_oid(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_int(bitstr_t *bs, const struct field_t *f,
+static int decode_int(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_enum(bitstr_t *bs, const struct field_t *f,
+static int decode_enum(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_numstr(bitstr_t *bs, const struct field_t *f,
+static int decode_numstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_octstr(bitstr_t *bs, const struct field_t *f,
+static int decode_octstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seq(bitstr_t *bs, const struct field_t *f,
+static int decode_seq(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seqof(bitstr_t *bs, const struct field_t *f,
+static int decode_seqof(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
/****************************************************************************/
-static int decode_choice(bitstr_t *bs, const struct field_t *f,
+static int decode_choice(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
0, _RasMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
0, _H323_UserInformation
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = buf;
bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
DECODE | EXT, 0, _MultimediaSystemControlMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
if (sz < 1)
break;
len = *p++;
+ sz--;
if (sz < len)
break;
p += len;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index de4053d84364..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,11 +533,12 @@ nla_put_failure:
return -1;
}
-static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
+static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
- size_t len;
+ size_t len, len4 = 0;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
len = l3proto->nla_size;
@@ -545,9 +546,14 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
+ if (l4proto->nlattr_tuple_size) {
+ len4 = l4proto->nlattr_tuple_size();
+ len4 *= 3u; /* ORIG, REPLY, MASTER */
+ }
- return len;
+ return len + len4;
}
+#endif
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
{
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b3e489c859ec..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,6 +64,52 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
*header = NULL;
*table = NULL;
}
+
+__printf(5, 6)
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+ struct net *net,
+ u16 pf, u8 protonum,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (net->ct.sysctl_log_invalid != protonum ||
+ net->ct.sysctl_log_invalid != IPPROTO_RAW)
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
+
+__printf(3, 4)
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ struct net *net;
+ va_list args;
+
+ net = nf_ct_net(ct);
+ if (likely(net->ct.sysctl_log_invalid == 0))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_ct_protonum(ct), "%pV", &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
const struct nf_conntrack_l4proto *
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
return ret;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ if (nfproto == NFPROTO_INET) {
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ } else {
+ err = nf_ct_netns_do_get(net, nfproto);
+ if (err < 0)
+ goto err1;
+ }
+ return 0;
+
+err2:
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-void nf_ct_netns_put(struct net *net, u8 nfproto)
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
@@ -171,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
nf_ct_l3proto_module_put(nfproto);
}
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+ if (nfproto == NFPROTO_INET) {
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
+ } else
+ nf_ct_netns_do_put(net, nfproto);
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
const struct nf_conntrack_l4proto *
@@ -351,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
l4proto->nla_size = 0;
if (l4proto->nlattr_size)
l4proto->nla_size += l4proto->nlattr_size();
- if (l4proto->nlattr_tuple_size)
- l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 0f5a4d79f6b8..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
default:
dn = dccp_pernet(net);
if (dn->dccp_loose == 0) {
- msg = "nf_ct_dccp: not picking up existing connection ";
+ msg = "not picking up existing connection ";
goto out_invalid;
}
case CT_DCCP_REQUEST:
break;
case CT_DCCP_INVALID:
- msg = "nf_ct_dccp: invalid state transition ";
+ msg = "invalid state transition ";
goto out_invalid;
}
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
- NULL, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
return false;
}
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
- struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid packet ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid state transition ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
return -NF_ACCEPT;
}
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 9cd40700842e..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 09a90484c27d..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6303a88af12b..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
enum sctp_conntrack new_state, old_state;
@@ -522,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
}
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_SCTP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index cba1c6ffe51a..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph,
- u_int8_t pf)
+ const struct tcphdr *tcph)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
@@ -702,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
tn->tcp_be_liberal)
res = true;
- if (!res && LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: %s ",
+ if (!res) {
+ nf_ct_l4proto_log_invalid(skb, ct,
+ "%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
before(sack, receiver->td_end + 1) ?
@@ -713,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
+ }
}
pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
[TCPHDR_ACK|TCPHDR_URG] = 1,
};
+static void tcp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+}
+
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
@@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: short packet ");
+ tcp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: truncated/malformed packet ");
+ tcp_error_log(skb, net, pf, "truncated packet");
return -NF_ACCEPT;
}
@@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: bad TCP checksum ");
+ tcp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
/* Check TCP flags. */
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid TCP flag combination ");
+ tcp_error_log(skb, net, pf, "invalid tcp flag combination");
return -NF_ACCEPT;
}
@@ -802,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -939,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid packet ignored in "
- "state %s ", tcp_conntrack_names[old_state]);
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
+ "state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Special case for SYN proxy: when the SYN to the server or
@@ -964,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid state ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -981,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: challenge-ACK ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -993,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
&& before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL,
- NULL, "nf_ct_tcp: invalid RST ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
return -NF_ACCEPT;
}
if (index == TCP_RST_SET
@@ -1022,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
}
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th, pf)) {
+ skb, dataoff, th)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
@@ -1288,9 +1277,14 @@ static int tcp_nlattr_size(void)
+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
}
-static int tcp_nlattr_tuple_size(void)
+static unsigned int tcp_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8af734cd1a94..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is some kind of UDP
@@ -99,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
}
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+}
+
static int udplite_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
@@ -112,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: short packet ");
+ udplite_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
@@ -122,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: invalid checksum coverage ");
+ udplite_error_log(skb, net, pf, "invalid checksum coverage");
return -NF_ACCEPT;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: checksum missing ");
+ udplite_error_log(skb, net, pf, "checksum missing");
return -NF_ACCEPT;
}
@@ -140,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: bad UDPLite checksum ");
+ udplite_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
@@ -150,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
}
#endif
+static void udp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+}
+
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf,
@@ -162,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: short packet ");
+ udp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: truncated/malformed packet ");
+ udp_error_log(skb, net, pf, "truncated/malformed packet");
return -NF_ACCEPT;
}
@@ -186,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: bad UDP checksum ");
+ udp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af8345fc4fbd..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data))
return 1;
- if ((ct->status & IPS_SRC_NAT_DONE) == 0)
- return 0;
-
- /* This netns is being destroyed, and conntrack has nat null binding.
+ /* This module is being removed and conntrack has nat null binding.
* Remove it from bysource hash, as the table will be freed soon.
*
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
- __nf_nat_cleanup_conntrack(ct);
+ if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
+ __nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 0648cb096bd8..dcb5f6375d9d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 64e1ee091225..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
- if (est.lookup == best.lookup) {
- if (!desc->size) {
- if (est.space < best.space)
- break;
- } else if (est.size < best.size) {
- break;
- }
- }
+ if (est.lookup == best.lookup &&
+ est.space < best.space)
+ break;
continue;
case NFT_SET_POL_MEMORY:
if (!desc->size) {
@@ -3593,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
return 0;
}
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
-{
- u8 genmask = nft_genmask_cur(net);
- const struct nft_set *set;
- struct nft_ctx ctx;
- int err;
-
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
- if (err < 0)
- return err;
-
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .dump = nf_tables_dump_set,
- .done = nf_tables_dump_set_done,
- };
- struct nft_set_dump_ctx *dump_ctx;
-
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
- if (!dump_ctx)
- return -ENOMEM;
-
- dump_ctx->set = set;
- dump_ctx->ctx = ctx;
-
- c.data = dump_ctx;
- return netlink_dump_start(nlsk, skb, nlh, &c);
- }
- return -EOPNOTSUPP;
-}
-
static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
@@ -3677,6 +3633,135 @@ nla_put_failure:
return -1;
}
+static int nft_setelem_parse_flags(const struct nft_set *set,
+ const struct nlattr *attr, u32 *flags)
+{
+ if (attr == NULL)
+ return 0;
+
+ *flags = ntohl(nla_get_be32(attr));
+ if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ *flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ const struct nft_set_ext *ext;
+ struct nft_data_desc desc;
+ struct nft_set_elem elem;
+ struct sk_buff *skb;
+ uint32_t flags = 0;
+ void *priv;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!nla[NFTA_SET_ELEM_KEY])
+ return -EINVAL;
+
+ err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ return err;
+
+ priv = set->ops->get(ctx->net, set, &elem, flags);
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
+
+ elem.priv = priv;
+ ext = nft_set_elem_ext(set, &elem);
+
+ err = -ENOMEM;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err1;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
+ NFT_MSG_NEWSETELEM, 0, set, &elem);
+ if (err < 0)
+ goto err2;
+
+ err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
+ /* This avoids a loop in nfnetlink. */
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err2:
+ kfree_skb(skb);
+err1:
+ /* this avoids a loop in nfnetlink. */
+ return err == -EAGAIN ? -ENOBUFS : err;
+}
+
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_set *set;
+ struct nlattr *attr;
+ struct nft_ctx ctx;
+ int rem, err = 0;
+
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_set,
+ .done = nf_tables_dump_set_done,
+ };
+ struct nft_set_dump_ctx *dump_ctx;
+
+ dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+ if (!dump_ctx)
+ return -ENOMEM;
+
+ dump_ctx->set = set;
+ dump_ctx->ctx = ctx;
+
+ c.data = dump_ctx;
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+ return -EINVAL;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_get_set_elem(&ctx, set, attr);
+ if (err < 0)
+ break;
+ }
+
+ return err;
+}
+
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
@@ -3777,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
kfree(elem);
}
-static int nft_setelem_parse_flags(const struct nft_set *set,
- const struct nlattr *attr, u32 *flags)
-{
- if (attr == NULL)
- return 0;
-
- *flags = ntohl(nla_get_be32(attr));
- if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
- if (!(set->flags & NFT_SET_INTERVAL) &&
- *flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
-
- return 0;
-}
-
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c9796629858f..a16356cacec3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -401,7 +401,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
outdev = entry->state.out;
- switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
+ switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
case NFQNL_COPY_META:
case NFQNL_COPY_NONE:
break;
@@ -412,7 +412,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
skb_checksum_help(entskb))
return NULL;
- data_len = ACCESS_ONCE(queue->copy_range);
+ data_len = READ_ONCE(queue->copy_range);
if (data_len > entskb->len)
data_len = entskb->len;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0975d7dd6f..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
-static int nft_ct_netns_get(struct net *net, uint8_t family)
-{
- int err;
-
- if (family == NFPROTO_INET) {
- err = nf_ct_netns_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_get(net, family);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-
-static void nft_ct_netns_put(struct net *net, uint8_t family)
-{
- if (family == NFPROTO_INET) {
- nf_ct_netns_put(net, NFPROTO_IPV4);
- nf_ct_netns_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_put(net, family);
-}
-
#ifdef CONFIG_NF_CONNTRACK_ZONES
static void nft_ct_tmpl_put_pcpu(void)
{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
return err;
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
goto err1;
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nft_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
+static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_bitmap_elem *be;
+
+ list_for_each_entry_rcu(be, &priv->list, head) {
+ if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
+ !nft_set_elem_active(&be->ext, genmask))
+ continue;
+
+ return be;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
+ .get = nft_bitmap_get,
};
static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 9c0d5a7ce5f9..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
+static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he;
+ struct nft_rhash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ if (he != NULL)
+ return he;
+
+ return ERR_PTR(-ENOENT);
+}
+
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
void *(*new)(struct nft_set *,
const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
+static void *nft_hash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_hash_elem *he;
+ u32 hash;
+
+ hash = jhash(elem->key.val.data, set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+ nft_set_elem_active(&he->ext, genmask))
+ return he;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
static inline u32 nft_hash_key(const u32 *key, u32 klen)
{
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
- set->klen) ||
+ set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
.walk = nft_rhash_walk,
+ .get = nft_rhash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
};
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup_fast,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d83a4ec5900d..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
return ret;
}
+static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const u32 *key, struct nft_rbtree_elem **elem,
+ unsigned int seq, unsigned int flags, u8 genmask)
+{
+ struct nft_rbtree_elem *rbe, *interval = NULL;
+ struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent;
+ const void *this;
+ int d;
+
+ parent = rcu_dereference_raw(priv->root.rb_node);
+ while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ this = nft_set_ext_key(&rbe->ext);
+ d = memcmp(this, key, set->klen);
+ if (d < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ interval = rbe;
+ } else if (d > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ } else {
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ parent = rcu_dereference_raw(parent->rb_left);
+
+ if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+ (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+ (flags & NFT_SET_ELEM_INTERVAL_END)) {
+ *elem = rbe;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_rbtree_interval_end(interval)) {
+ *elem = interval;
+ return true;
+ }
+
+ return false;
+}
+
+static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
+ const u32 *key = (const u32 *)&elem->key.val;
+ u8 genmask = nft_genmask_cur(net);
+ bool ret;
+
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return rbe;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (!ret)
+ rbe = ERR_PTR(-ENOENT);
+ read_unlock_bh(&priv->lock);
+
+ return rbe;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
+ .get = nft_rbtree_get,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d8571f414208..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
int *error)
{
struct xt_table_info *private;
+ unsigned int cpu;
int ret;
ret = xt_jumpstack_alloc(newinfo);
@@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_wmb();
+
/*
* Even though table entries have now been swapped, other CPU's
- * may still be using the old entries. This is okay, because
- * resynchronization happens because of the locking done
- * during the get_counters() routine.
+ * may still be using the old entries...
*/
local_bh_enable();
+ /* ... so wait for even xt_recseq on all cpus */
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+ u32 seq = raw_read_seqcount(s);
+
+ if (seq & 1) {
+ do {
+ cond_resched();
+ cpu_relax();
+ } while (seq == raw_read_seqcount(s));
+ }
+ }
+
#ifdef CONFIG_AUDIT
if (audit_enabled) {
audit_log(current->audit_context, GFP_KERNEL,
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index ffa8eec980e9..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
struct xt_connlimit_conn {
struct hlist_node node;
struct nf_conntrack_tuple tuple;
- union nf_inet_addr addr;
};
struct xt_connlimit_rb {
@@ -72,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
}
static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask)
+connlimit_iphash6(const union nf_inet_addr *addr)
{
- union nf_inet_addr res;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
- res.ip6[i] = addr->ip6[i] & mask->ip6[i];
-
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
connlimit_rnd) % CONNLIMIT_SLOTS;
}
@@ -95,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
}
static int
-same_source_net(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- const union nf_inet_addr *u3, u_int8_t family)
+same_source(const union nf_inet_addr *addr,
+ const union nf_inet_addr *u3, u_int8_t family)
{
- if (family == NFPROTO_IPV4) {
- return ntohl(addr->ip & mask->ip) -
- ntohl(u3->ip & mask->ip);
- } else {
- union nf_inet_addr lh, rh;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
- lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
- rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
- }
+ if (family == NFPROTO_IPV4)
+ return ntohl(addr->ip) - ntohl(u3->ip);
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
- }
+ return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
}
static bool add_hlist(struct hlist_head *head,
@@ -125,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
if (conn == NULL)
return false;
conn->tuple = *tuple;
- conn->addr = *addr;
hlist_add_head(&conn->node, head);
return true;
}
@@ -196,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ const union nf_inet_addr *addr,
u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -217,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
parent = *rbnode;
- diff = same_source_net(addr, mask, &rbconn->addr, family);
+ diff = same_source(addr, &rbconn->addr, family);
if (diff < 0) {
rbnode = &((*rbnode)->rb_left);
} else if (diff > 0) {
@@ -270,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
}
conn->tuple = *tuple;
- conn->addr = *addr;
rbconn->addr = *addr;
INIT_HLIST_HEAD(&rbconn->hhead);
@@ -285,7 +264,6 @@ static int count_them(struct net *net,
struct xt_connlimit_data *data,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
u_int8_t family,
const struct nf_conntrack_zone *zone)
{
@@ -294,14 +272,14 @@ static int count_them(struct net *net,
u32 hash;
if (family == NFPROTO_IPV6)
- hash = connlimit_iphash6(addr, mask);
+ hash = connlimit_iphash6(addr);
else
- hash = connlimit_iphash(addr->ip & mask->ip);
+ hash = connlimit_iphash(addr->ip);
root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
- count = count_tree(net, root, tuple, addr, mask, family, zone);
+ count = count_tree(net, root, tuple, addr, family, zone);
spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -332,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int i;
+
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
&iph->daddr : &iph->saddr, sizeof(addr.ip6));
+
+ for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
+ addr.ip6[i] &= info->mask.ip6[i];
} else {
const struct iphdr *iph = ip_hdr(skb);
addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
+
+ addr.ip &= info->mask.ip;
}
connections = count_them(net, info->data, tuple_ptr, &addr,
- &info->mask, xt_family(par), zone);
+ xt_family(par), zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af4list *n = __af4list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af4list_entry(i);
}
return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af6list *n = __af6list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af6list_entry(i);
}
return n;
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index d177dd066504..4d748975117d 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register);
static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
{
- return ACCESS_ONCE(calipso_ops);
+ return READ_ONCE(calipso_ops);
}
/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 26ded4239611..b9e0ee4e22f5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
};
static int netlink_dump(struct sock *sk);
-static void netlink_skb_destructor(struct sk_buff *skb);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -2136,7 +2135,7 @@ static int netlink_dump(struct sock *sk)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
- int len, err = -ENOBUFS;
+ int err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2183,9 +2182,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- len = cb->dump(skb, cb);
+ if (nlk->dump_done_errno > 0)
+ nlk->dump_done_errno = cb->dump(skb, cb);
- if (len > 0) {
+ if (nlk->dump_done_errno > 0 ||
+ skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
@@ -2195,13 +2196,15 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- if (!nlh)
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+ sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+ if (WARN_ON(!nlh))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
- memcpy(nlmsg_data(nlh), &len, sizeof(len));
+ memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+ sizeof(nlk->dump_done_errno));
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2273,6 +2276,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk->cb_running = true;
+ nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 028188597eaa..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -34,6 +34,7 @@ struct netlink_sock {
wait_queue_head_t wait;
bool bound;
bool cb_running;
+ int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 94d4e922af53..989ae647825e 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -18,7 +18,7 @@
static void nr_loopback_timer(unsigned long);
static struct sk_buff_head loopback_queue;
-static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0);
+static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
void __init nr_loopback_init(void)
{
diff --git a/net/nfc/core.c b/net/nfc/core.c
index e5e23c2cbe74..947a470f929d 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,9 +1015,9 @@ exit:
device_unlock(&dev->dev);
}
-static void nfc_check_pres_timeout(unsigned long data)
+static void nfc_check_pres_timeout(struct timer_list *t)
{
- struct nfc_dev *dev = (struct nfc_dev *)data;
+ struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);
schedule_work(&dev->check_pres_work);
}
@@ -1094,9 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout,
- (unsigned long)dev);
-
+ timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
}
@@ -1105,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
err_free_dev:
kfree(dev);
- return ERR_PTR(rc);
+ return NULL;
}
EXPORT_SYMBOL(nfc_allocate_device);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index de6dd37d04c7..ec0a8998e52d 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev,
return;
}
+ digital_abort_cmd(ddev);
ddev->curr_protocol = 0;
}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index a8a6e7814e09..ac8030c4bcf8 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -428,9 +428,9 @@ exit_noskb:
nfc_hci_driver_failure(hdev, r);
}
-static void nfc_hci_cmd_timeout(unsigned long data)
+static void nfc_hci_cmd_timeout(struct timer_list *t)
{
- struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data;
+ struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer);
schedule_work(&hdev->msg_tx_work);
}
@@ -1004,8 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
- (unsigned long)hdev);
+ timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0);
skb_queue_head_init(&hdev->rx_hcp_frags);
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 58df37eae1e8..fe988936ad92 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
}
}
-static void llc_shdlc_connect_timeout(unsigned long data)
+static void llc_shdlc_connect_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
pr_debug("\n");
schedule_work(&shdlc->sm_work);
}
-static void llc_shdlc_t1_timeout(unsigned long data)
+static void llc_shdlc_t1_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer);
pr_debug("SoftIRQ: need to send ack\n");
schedule_work(&shdlc->sm_work);
}
-static void llc_shdlc_t2_timeout(unsigned long data)
+static void llc_shdlc_t2_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer);
pr_debug("SoftIRQ: need to retransmit\n");
@@ -763,14 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
mutex_init(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
- setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
- (unsigned long)shdlc);
-
- setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
- (unsigned long)shdlc);
-
- setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
- (unsigned long)shdlc);
+ timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0);
+ timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0);
+ timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0);
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 7988185072e5..ef4026a23e80 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work)
nfc_dep_link_down(local->dev);
}
-static void nfc_llcp_symm_timer(unsigned long data)
+static void nfc_llcp_symm_timer(struct timer_list *t)
{
- struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+ struct nfc_llcp_local *local = from_timer(local, t, link_timer);
pr_err("SYMM timeout\n");
@@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
}
-static void nfc_llcp_sdreq_timer(unsigned long data)
+static void nfc_llcp_sdreq_timer(struct timer_list *t)
{
- struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+ struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);
schedule_work(&local->sdreq_timeout_work);
}
@@ -1573,8 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
- setup_timer(&local->link_timer, nfc_llcp_symm_timer,
- (unsigned long)local);
+ timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1600,8 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
mutex_init(&local->sdreq_lock);
INIT_HLIST_HEAD(&local->pending_sdreqs);
- setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
- (unsigned long)local);
+ timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
list_add(&local->list, &llcp_devices);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b251fb936a27..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
if (!hdr)
return -EMSGSIZE;
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
return -EMSGSIZE;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
@@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
return rc;
}
+static int nfc_genl_deactivate_target(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nfc_dev *dev;
+ u32 device_idx, target_idx;
+ int rc;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ return -EINVAL;
+
+ device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+ dev = nfc_get_device(device_idx);
+ if (!dev)
+ return -ENODEV;
+
+ target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
+
+ rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
+
+ nfc_put_device(dev);
+ return rc;
+}
+
static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
{
struct nfc_dev *dev;
@@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
goto nla_put_failure;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
@@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = {
.doit = nfc_genl_vendor_cmd,
.policy = nfc_genl_policy,
},
+ {
+ .cmd = NFC_CMD_DEACTIVATE_TARGET,
+ .doit = nfc_genl_deactivate_target,
+ .policy = nfc_genl_policy,
+ },
};
static struct genl_family nfc_genl_family __ro_after_init = {
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827439a8..d7da99a0b0b8 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -14,6 +14,66 @@
#include <net/nsh.h>
#include <net/tun_proto.h>
+int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
+{
+ struct nshhdr *nh;
+ size_t length = nsh_hdr_len(pushed_nh);
+ u8 next_proto;
+
+ if (skb->mac_len) {
+ next_proto = TUN_P_ETHERNET;
+ } else {
+ next_proto = tun_p_from_eth_p(skb->protocol);
+ if (!next_proto)
+ return -EAFNOSUPPORT;
+ }
+
+ /* Add the NSH header */
+ if (skb_cow_head(skb, length) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, length);
+ nh = (struct nshhdr *)(skb->data);
+ memcpy(nh, pushed_nh, length);
+ nh->np = next_proto;
+ skb_postpush_rcsum(skb, nh, length);
+
+ skb->protocol = htons(ETH_P_NSH);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_push);
+
+int nsh_pop(struct sk_buff *skb)
+{
+ struct nshhdr *nh;
+ size_t length;
+ __be16 inner_proto;
+
+ if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+ nh = (struct nshhdr *)(skb->data);
+ length = nsh_hdr_len(nh);
+ inner_proto = tun_p_to_eth_p(nh->np);
+ if (!pskb_may_pull(skb, length))
+ return -ENOMEM;
+
+ if (!inner_proto)
+ return -EAFNOSUPPORT;
+
+ skb_pull_rcsum(skb, length);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+ skb->protocol = inner_proto;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_pop);
+
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
select MPLS
select NET_MPLS_GSO
select DST_CACHE
+ select NET_NSH
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 299f4476cf44..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -12,6 +12,7 @@ openvswitch-y := \
flow.o \
flow_netlink.o \
flow_table.o \
+ meter.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a551232daf61..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
+#include "flow_netlink.h"
struct deferred_action {
struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nshhdr *nh)
+{
+ int err;
+
+ err = nsh_push(skb, nh);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int err;
+
+ err = nsh_pop(skb);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ if (skb->protocol == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+ else
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
__be32 addr, __be32 new_addr)
{
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct nlattr *a)
+{
+ struct nshhdr *nh;
+ size_t length;
+ int err;
+ u8 flags;
+ u8 ttl;
+ int i;
+
+ struct ovs_key_nsh key;
+ struct ovs_key_nsh mask;
+
+ err = nsh_key_from_nlattr(a, &key, &mask);
+ if (err)
+ return err;
+
+ /* Make sure the NSH base header is there */
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+
+ nh = nsh_hdr(skb);
+ length = nsh_hdr_len(nh);
+
+ /* Make sure the whole NSH header is there */
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ skb_postpull_rcsum(skb, nh, length);
+ flags = nsh_get_flags(nh);
+ flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
+ flow_key->nsh.base.flags = flags;
+ ttl = nsh_get_ttl(nh);
+ ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
+ flow_key->nsh.base.ttl = ttl;
+ nsh_set_flags_and_ttl(nh, flags, ttl);
+ nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
+ mask.base.path_hdr);
+ flow_key->nsh.base.path_hdr = nh->path_hdr;
+ switch (nh->mdtype) {
+ case NSH_M_TYPE1:
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
+ nh->md1.context[i] =
+ OVS_MASKED(nh->md1.context[i], key.context[i],
+ mask.context[i]);
+ }
+ memcpy(flow_key->nsh.context, nh->md1.context,
+ sizeof(nh->md1.context));
+ break;
+ case NSH_M_TYPE2:
+ memset(flow_key->nsh.context, 0,
+ sizeof(flow_key->nsh.context));
+ break;
+ default:
+ return -EINVAL;
+ }
+ skb_postpush_rcsum(skb, nh, length);
+ return 0;
+}
+
/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
get_mask(a, struct ovs_key_ethernet *));
break;
+ case OVS_KEY_ATTR_NSH:
+ err = set_nsh(skb, flow_key, a);
+ break;
+
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, flow_key, nla_data(a),
get_mask(a, struct ovs_key_ipv4 *));
@@ -1214,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_POP_ETH:
err = pop_eth(skb, key);
break;
+
+ case OVS_ACTION_ATTR_PUSH_NSH: {
+ u8 buffer[NSH_HDR_MAX_LEN];
+ struct nshhdr *nh = (struct nshhdr *)buffer;
+
+ err = nsh_hdr_from_nlattr(nla_data(a), nh,
+ NSH_HDR_MAX_LEN);
+ if (unlikely(err))
+ break;
+ err = push_nsh(skb, key, nh);
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_NSH:
+ err = pop_nsh(skb, key);
+ break;
+
+ case OVS_ACTION_ATTR_METER:
+ if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+ consume_skb(skb);
+ return 0;
+ }
}
if (unlikely(err)) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d38ac044cee..0dab33fb9844 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
+#include "meter.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *,
uint32_t cutlen);
-/* Must be called with rcu_read_lock. */
-static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
-{
- struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
-
- if (dev) {
- struct vport *vport = ovs_internal_dev_get_vport(dev);
- if (vport)
- return vport->dp;
- }
-
- return NULL;
-}
-
-/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
- * returned dp pointer valid.
- */
-static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
- struct datapath *dp;
-
- WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
- rcu_read_lock();
- dp = get_dp_rcu(net, dp_ifindex);
- rcu_read_unlock();
-
- return dp;
-}
-
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
kfree(dp->ports);
+ ovs_meters_exit(dp);
kfree(dp);
}
@@ -1601,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_meters_init(dp);
+ if (err)
+ goto err_destroy_ports_array;
+
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1629,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
- goto err_destroy_ports_array;
+ goto err_destroy_meters;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1644,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_ports_array:
+err_destroy_meters:
ovs_unlock();
+ ovs_meters_exit(dp);
+err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
@@ -2294,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = {
&dp_vport_genl_family,
&dp_flow_genl_family,
&dp_packet_genl_family,
+ &dp_meter_genl_family,
};
static void dp_unregister_genl(int n_families)
@@ -2474,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4a104ef9e12c..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
+#include "meter.h"
+#include "vport-internal_dev.h"
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
@@ -91,6 +93,9 @@ struct datapath {
u32 user_features;
u32 max_headroom;
+
+ /* Switch meters. */
+ struct hlist_head *meters;
};
/**
@@ -190,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
return ovs_lookup_vport(dp, port_no);
}
+/* Must be called with rcu_read_lock. */
+static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
+{
+ struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
+
+ if (dev) {
+ struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+ if (vport)
+ return vport->dp;
+ }
+
+ return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+ struct datapath *dp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ rcu_read_lock();
+ dp = get_dp_rcu(net, dp_ifindex);
+ rcu_read_unlock();
+
+ return dp;
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef25a72..864ddb1e3642 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
#include <net/ipv6.h>
#include <net/mpls.h>
#include <net/ndisc.h>
+#include <net/nsh.h>
#include "conntrack.h"
#include "datapath.h"
@@ -490,6 +491,52 @@ invalid:
return 0;
}
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ struct nshhdr *nh;
+ unsigned int nh_ofs = skb_network_offset(skb);
+ u8 version, length;
+ int err;
+
+ err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ version = nsh_get_ver(nh);
+ length = nsh_hdr_len(nh);
+
+ if (version != 0)
+ return -EINVAL;
+
+ err = check_header(skb, nh_ofs + length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ key->nsh.base.flags = nsh_get_flags(nh);
+ key->nsh.base.ttl = nsh_get_ttl(nh);
+ key->nsh.base.mdtype = nh->mdtype;
+ key->nsh.base.np = nh->np;
+ key->nsh.base.path_hdr = nh->path_hdr;
+ switch (key->nsh.base.mdtype) {
+ case NSH_M_TYPE1:
+ if (length != NSH_M_TYPE1_LEN)
+ return -EINVAL;
+ memcpy(key->nsh.context, nh->md1.context,
+ sizeof(nh->md1));
+ break;
+ case NSH_M_TYPE2:
+ memset(key->nsh.context, 0,
+ sizeof(nh->md1));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* key_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -735,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->tp, 0, sizeof(key->tp));
}
}
+ } else if (key->eth.type == htons(ETH_P_NSH)) {
+ error = parse_nsh(skb, key);
+ if (error)
+ return error;
}
return 0;
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba4f865..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -35,6 +35,7 @@
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
#include <net/dst_metadata.h>
+#include <net/nsh.h>
struct sk_buff;
@@ -66,6 +67,11 @@ struct vlan_head {
(offsetof(struct sw_flow_key, recirc_id) + \
FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+struct ovs_key_nsh {
+ struct ovs_nsh_key_base base;
+ __be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
struct sw_flow_key {
u8 tun_opts[IP_TUNNEL_OPTS_MAX];
u8 tun_opts_len;
@@ -143,6 +149,7 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct ovs_key_nsh nsh; /* network service header */
};
struct {
/* Connection tracking fields not packed above. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc0d79092e74..dc424798ba6f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
+#include <net/tun_proto.h>
#include <net/erspan.h>
#include "flow_netlink.h"
@@ -80,13 +81,16 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
+ case OVS_ACTION_ATTR_METER:
default:
return true;
}
@@ -175,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND)
- | (1 << OVS_KEY_ATTR_MPLS));
+ | (1 << OVS_KEY_ATTR_MPLS)
+ | (1 << OVS_KEY_ATTR_NSH));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -284,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
}
}
+ if (match->key->eth.type == htons(ETH_P_NSH)) {
+ key_expected |= 1 << OVS_KEY_ATTR_NSH;
+ if (match->mask &&
+ match->mask->key.eth.type == htons(0xffff)) {
+ mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
+ }
+ }
+
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -325,12 +338,25 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
}
+static size_t ovs_nsh_key_attr_size(void)
+{
+ /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+ /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+ * mutually exclusive, so the bigger one can cover
+ * the small one.
+ */
+ + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
+}
+
size_t ovs_key_attr_size(void)
{
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -344,6 +370,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+ + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
+ + ovs_nsh_key_attr_size()
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -377,6 +405,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
};
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+ [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
+ [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
+ [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
+};
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
@@ -409,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+ [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_nsh_key_attr_lens, },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -1227,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return 0;
}
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+ struct nshhdr *nh, size_t size)
+{
+ struct nlattr *a;
+ int rem;
+ u8 flags = 0;
+ u8 ttl = 0;
+ int mdlen = 0;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ if (size < NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ flags = base->flags;
+ ttl = base->ttl;
+ nh->np = base->np;
+ nh->mdtype = base->mdtype;
+ nh->path_hdr = base->path_hdr;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md1, nla_data(a), mdlen);
+ break;
+
+ case OVS_NSH_KEY_ATTR_MD2:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md2, nla_data(a), mdlen);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
+ nh->ver_flags_ttl_len = 0;
+ nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
+
+ return 0;
+}
+
+int nsh_key_from_nlattr(const struct nlattr *attr,
+ struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
+{
+ struct nlattr *a;
+ int rem;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+ const struct ovs_nsh_key_base *base_mask = base + 1;
+
+ nsh->base = *base;
+ nsh_mask->base = *base_mask;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+ const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
+
+ memcpy(nsh->context, md1->context, sizeof(*md1));
+ memcpy(nsh_mask->context, md1_mask->context,
+ sizeof(*md1_mask));
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ /* Not supported yet */
+ return -ENOTSUPP;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct nlattr *a;
+ int rem;
+ bool has_base = false;
+ bool has_md1 = false;
+ bool has_md2 = false;
+ u8 mdtype = 0;
+ int mdlen = 0;
+
+ if (WARN_ON(is_push_nsh && is_mask))
+ return -EINVAL;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ int i;
+
+ if (type > OVS_NSH_KEY_ATTR_MAX) {
+ OVS_NLERR(log, "nsh attr %d is out of range max %d",
+ type, OVS_NSH_KEY_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (!check_attr_len(nla_len(a),
+ ovs_nsh_key_attr_lens[type].len)) {
+ OVS_NLERR(
+ log,
+ "nsh attr %d has unexpected len %d expected %d",
+ type,
+ nla_len(a),
+ ovs_nsh_key_attr_lens[type].len
+ );
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ has_base = true;
+ mdtype = base->mdtype;
+ SW_FLOW_KEY_PUT(match, nsh.base.flags,
+ base->flags, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.ttl,
+ base->ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
+ base->mdtype, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.np,
+ base->np, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
+ base->path_hdr, is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+
+ has_md1 = true;
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+ SW_FLOW_KEY_PUT(match, nsh.context[i],
+ md1->context[i], is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ if (!is_push_nsh) /* Not supported MD type 2 yet */
+ return -ENOTSUPP;
+
+ has_md2 = true;
+ mdlen = nla_len(a);
+ if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
+ OVS_NLERR(
+ log,
+ "Invalid MD length %d for MD type %d",
+ mdlen,
+ mdtype
+ );
+ return -EINVAL;
+ }
+ break;
+ default:
+ OVS_NLERR(log, "Unknown nsh attribute %d",
+ type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+ return -EINVAL;
+ }
+
+ if (has_md1 && has_md2) {
+ OVS_NLERR(
+ 1,
+ "invalid nsh attribute: md1 and md2 are exclusive."
+ );
+ return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+ (has_md2 && mdtype != NSH_M_TYPE2)) {
+ OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+ mdtype);
+ return -EINVAL;
+ }
+
+ if (is_push_nsh &&
+ (!has_base || (!has_md1 && !has_md2))) {
+ OVS_NLERR(
+ 1,
+ "push_nsh: missing base or metadata attributes"
+ );
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 attrs, const struct nlattr **a,
bool is_mask, bool log)
@@ -1354,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+ if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+ is_mask, false, log) < 0)
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
@@ -1670,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
return 0;
}
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
+ goto nla_put_failure;
+
+ if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
+ sizeof(nsh->context), nsh->context))
+ goto nla_put_failure;
+ }
+
+ /* Don't support MD type 2 yet */
+
+ nla_nest_end(skb, start);
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
@@ -1798,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv6_key->ipv6_tclass = output->ip.tos;
ipv6_key->ipv6_hlimit = output->ip.ttl;
ipv6_key->ipv6_frag = output->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_NSH)) {
+ if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+ goto nla_put_failure;
} else if (swkey->eth.type == htons(ETH_P_ARP) ||
swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
@@ -2292,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
+static bool validate_nsh(const struct nlattr *attr, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct sw_flow_match match;
+ struct sw_flow_key key;
+ int ret = 0;
+
+ ovs_match_init(&match, &key, true, NULL);
+ ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
+ is_push_nsh, log);
+ return !ret;
+}
+
/* Return false if there are any non-masked bits set.
* Mask follows data immediately, before any netlink padding.
*/
@@ -2434,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
break;
+ case OVS_KEY_ATTR_NSH:
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ if (!validate_nsh(nla_data(a), masked, false, log))
+ return -EINVAL;
+ break;
+
default:
return -EINVAL;
}
@@ -2533,6 +2843,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
+ [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+ [OVS_ACTION_ATTR_POP_NSH] = 0,
+ [OVS_ACTION_ATTR_METER] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2690,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
mac_proto = MAC_PROTO_ETHERNET;
break;
+ case OVS_ACTION_ATTR_PUSH_NSH:
+ if (mac_proto != MAC_PROTO_ETHERNET) {
+ u8 next_proto;
+
+ next_proto = tun_p_from_eth_p(eth_type);
+ if (!next_proto)
+ return -EINVAL;
+ }
+ mac_proto = MAC_PROTO_NONE;
+ if (!validate_nsh(nla_data(a), false, true, true))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_POP_NSH: {
+ __be16 inner_proto;
+
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ inner_proto = tun_p_to_eth_p(key->nsh.base.np);
+ if (!inner_proto)
+ return -EINVAL;
+ if (key->nsh.base.np == TUN_P_ETHERNET)
+ mac_proto = MAC_PROTO_ETHERNET;
+ else
+ mac_proto = MAC_PROTO_NONE;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_METER:
+ /* Non-existent meters are simply ignored. */
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
+int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
+ struct ovs_key_nsh *nsh_mask);
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
+ size_t size);
+
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..3fbfc78991ac
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/openvswitch.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "meter.h"
+
+#define METER_HASH_BUCKETS 1024
+
+static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
+ [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
+ [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+ [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
+ [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
+ [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
+ [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
+ [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+};
+
+static void ovs_meter_free(struct dp_meter *meter)
+{
+ if (!meter)
+ return;
+
+ kfree_rcu(meter, rcu);
+}
+
+static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
+ u32 meter_id)
+{
+ return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
+}
+
+/* Call with ovs_mutex or RCU read lock. */
+static struct dp_meter *lookup_meter(const struct datapath *dp,
+ u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct hlist_head *head;
+
+ head = meter_hash_bucket(dp, meter_id);
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ if (meter->id == meter_id)
+ return meter;
+ }
+ return NULL;
+}
+
+static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+{
+ struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+
+ hlist_add_head_rcu(&meter->dp_hash_node, head);
+}
+
+static void detach_meter(struct dp_meter *meter)
+{
+ ASSERT_OVSL();
+ if (meter)
+ hlist_del_rcu(&meter->dp_hash_node);
+}
+
+static struct sk_buff *
+ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
+ struct ovs_header **ovs_reply_header)
+{
+ struct sk_buff *skb;
+ struct ovs_header *ovs_header = info->userhdr;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+ info->snd_seq,
+ &dp_meter_genl_family, 0, cmd);
+ if (!*ovs_reply_header) {
+ nlmsg_free(skb);
+ return ERR_PTR(-EMSGSIZE);
+ }
+ (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+ return skb;
+}
+
+static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
+ struct dp_meter *meter)
+{
+ struct nlattr *nla;
+ struct dp_meter_band *band;
+ u16 i;
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
+ goto error;
+
+ if (!meter)
+ return 0;
+
+ if (nla_put(reply, OVS_METER_ATTR_STATS,
+ sizeof(struct ovs_flow_stats), &meter->stats) ||
+ nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+ OVS_METER_ATTR_PAD))
+ goto error;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto error;
+
+ band = meter->bands;
+
+ for (i = 0; i < meter->n_bands; ++i, ++band) {
+ struct nlattr *band_nla;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
+ sizeof(struct ovs_flow_stats),
+ &band->stats))
+ goto error;
+ nla_nest_end(reply, band_nla);
+ }
+ nla_nest_end(reply, nla);
+
+ return 0;
+error:
+ return -EMSGSIZE;
+}
+
+static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct nlattr *nla, *band_nla;
+ int err;
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+ nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+ goto nla_put_failure;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto nla_put_failure;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla)
+ goto nla_put_failure;
+ /* Currently only DROP band type is supported. */
+ if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
+ goto nla_put_failure;
+ nla_nest_end(reply, band_nla);
+ nla_nest_end(reply, nla);
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+nla_put_failure:
+ nlmsg_free(reply);
+ err = -EMSGSIZE;
+ return err;
+}
+
+static struct dp_meter *dp_meter_create(struct nlattr **a)
+{
+ struct nlattr *nla;
+ int rem;
+ u16 n_bands = 0;
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ int err;
+
+ /* Validate attributes, count the bands. */
+ if (!a[OVS_METER_ATTR_BANDS])
+ return ERR_PTR(-EINVAL);
+
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
+ if (++n_bands > DP_MAX_BANDS)
+ return ERR_PTR(-EINVAL);
+
+ /* Allocate and set up the meter before locking anything. */
+ meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
+ sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
+ meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
+ meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
+ spin_lock_init(&meter->lock);
+ if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
+ meter->stats = *(struct ovs_flow_stats *)
+ nla_data(a[OVS_METER_ATTR_STATS]);
+ }
+ meter->n_bands = n_bands;
+
+ /* Set up meter bands. */
+ band = meter->bands;
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
+ struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
+ u32 band_max_delta_t;
+
+ err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
+ nla_data(nla), nla_len(nla), band_policy,
+ NULL);
+ if (err)
+ goto exit_free_meter;
+
+ if (!attr[OVS_BAND_ATTR_TYPE] ||
+ !attr[OVS_BAND_ATTR_RATE] ||
+ !attr[OVS_BAND_ATTR_BURST]) {
+ err = -EINVAL;
+ goto exit_free_meter;
+ }
+
+ band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
+ band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+ band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
+ /* Figure out max delta_t that is enough to fill any bucket.
+ * Keep max_delta_t size to the bucket units:
+ * pkts => 1/1000 packets, kilobits => bits.
+ */
+ band_max_delta_t = (band->burst_size + band->rate) * 1000;
+ /* Start with a full bucket. */
+ band->bucket = band_max_delta_t;
+ if (band_max_delta_t > meter->max_delta_t)
+ meter->max_delta_t = band_max_delta_t;
+ band++;
+ }
+
+ return meter;
+
+exit_free_meter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct dp_meter *meter, *old_meter;
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct datapath *dp;
+ int err;
+ u32 meter_id;
+ bool failed;
+
+ meter = dp_meter_create(a);
+ if (IS_ERR_OR_NULL(meter))
+ return PTR_ERR(meter);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
+ &ovs_reply_header);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto exit_free_meter;
+ }
+
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ if (!a[OVS_METER_ATTR_ID]) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ /* Cannot fail after this. */
+ old_meter = lookup_meter(dp, meter_id);
+ detach_meter(old_meter);
+ attach_meter(dp, meter);
+ ovs_unlock();
+
+ /* Build response with the meter_id and stats from
+ * the old meter, if any.
+ */
+ failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
+ WARN_ON(failed);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ if (old_meter->keep_stats) {
+ err = ovs_meter_cmd_reply_stats(reply, meter_id,
+ old_meter);
+ WARN_ON(err);
+ }
+ spin_unlock_bh(&old_meter->lock);
+ ovs_meter_free(old_meter);
+ }
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+exit_free_meter:
+ kfree(meter);
+ return err;
+}
+
+static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Locate meter, copy stats. */
+ meter = lookup_meter(dp, meter_id);
+ if (!meter) {
+ err = -ENOENT;
+ goto exit_unlock;
+ }
+
+ spin_lock_bh(&meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
+ spin_unlock_bh(&meter->lock);
+ if (err)
+ goto exit_unlock;
+
+ ovs_unlock();
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *old_meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ old_meter = lookup_meter(dp, meter_id);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
+ WARN_ON(err);
+ spin_unlock_bh(&old_meter->lock);
+ detach_meter(old_meter);
+ }
+ ovs_unlock();
+ ovs_meter_free(old_meter);
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+/* Meter action execution.
+ *
+ * Return true 'meter_id' drop band is triggered. The 'skb' should be
+ * dropped by the caller'.
+ */
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
+ long long int long_delta_ms;
+ u32 delta_ms;
+ u32 cost;
+ int i, band_exceeded_max = -1;
+ u32 band_exceeded_rate = 0;
+
+ meter = lookup_meter(dp, meter_id);
+ /* Do not drop the packet when there is no meter. */
+ if (!meter)
+ return false;
+
+ /* Lock the meter while using it. */
+ spin_lock(&meter->lock);
+
+ long_delta_ms = (now_ms - meter->used); /* ms */
+
+ /* Make sure delta_ms will not be too large, so that bucket will not
+ * wrap around below.
+ */
+ delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
+ ? meter->max_delta_t : (u32)long_delta_ms;
+
+ /* Update meter statistics.
+ */
+ meter->used = now_ms;
+ meter->stats.n_packets += 1;
+ meter->stats.n_bytes += skb->len;
+
+ /* Bucket rate is either in kilobits per second, or in packets per
+ * second. We maintain the bucket in the units of either bits or
+ * 1/1000th of a packet, correspondingly.
+ * Then, when rate is multiplied with milliseconds, we get the
+ * bucket units:
+ * msec * kbps = bits, and
+ * msec * packets/sec = 1/1000 packets.
+ *
+ * 'cost' is the number of bucket units in this packet.
+ */
+ cost = (meter->kbps) ? skb->len * 8 : 1000;
+
+ /* Update all bands and find the one hit with the highest rate. */
+ for (i = 0; i < meter->n_bands; ++i) {
+ long long int max_bucket_size;
+
+ band = &meter->bands[i];
+ max_bucket_size = (band->burst_size + band->rate) * 1000;
+
+ band->bucket += delta_ms * band->rate;
+ if (band->bucket > max_bucket_size)
+ band->bucket = max_bucket_size;
+
+ if (band->bucket >= cost) {
+ band->bucket -= cost;
+ } else if (band->rate > band_exceeded_rate) {
+ band_exceeded_rate = band->rate;
+ band_exceeded_max = i;
+ }
+ }
+
+ if (band_exceeded_max >= 0) {
+ /* Update band statistics. */
+ band = &meter->bands[band_exceeded_max];
+ band->stats.n_packets += 1;
+ band->stats.n_bytes += skb->len;
+
+ /* Drop band triggered, let the caller drop the 'skb'. */
+ if (band->type == OVS_METER_BAND_TYPE_DROP) {
+ spin_unlock(&meter->lock);
+ return true;
+ }
+ }
+
+ spin_unlock(&meter->lock);
+ return false;
+}
+
+static struct genl_ops dp_meter_genl_ops[] = {
+ { .cmd = OVS_METER_CMD_FEATURES,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_features
+ },
+ { .cmd = OVS_METER_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_set,
+ },
+ { .cmd = OVS_METER_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_get,
+ },
+ { .cmd = OVS_METER_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_del
+ },
+};
+
+static const struct genl_multicast_group ovs_meter_multicast_group = {
+ .name = OVS_METER_MCGROUP,
+};
+
+struct genl_family dp_meter_genl_family __ro_after_init = {
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_METER_FAMILY,
+ .version = OVS_METER_VERSION,
+ .maxattr = OVS_METER_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_meter_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .mcgrps = &ovs_meter_multicast_group,
+ .n_mcgrps = 1,
+ .module = THIS_MODULE,
+};
+
+int ovs_meters_init(struct datapath *dp)
+{
+ int i;
+
+ dp->meters = kmalloc_array(METER_HASH_BUCKETS,
+ sizeof(struct hlist_head), GFP_KERNEL);
+
+ if (!dp->meters)
+ return -ENOMEM;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->meters[i]);
+
+ return 0;
+}
+
+void ovs_meters_exit(struct datapath *dp)
+{
+ int i;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++) {
+ struct hlist_head *head = &dp->meters[i];
+ struct dp_meter *meter;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
+ kfree(meter);
+ }
+
+ kfree(dp->meters);
+}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#ifndef METER_H
+#define METER_H 1
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/genetlink.h>
+#include <linux/skbuff.h>
+
+#include "flow.h"
+struct datapath;
+
+#define DP_MAX_BANDS 1
+
+struct dp_meter_band {
+ u32 type;
+ u32 rate;
+ u32 burst_size;
+ u32 bucket; /* 1/1000 packets, or in bits */
+ struct ovs_flow_stats stats;
+};
+
+struct dp_meter {
+ spinlock_t lock; /* Per meter lock */
+ struct rcu_head rcu;
+ struct hlist_node dp_hash_node; /*Element in datapath->meters
+ * hash table.
+ */
+ u32 id;
+ u16 kbps:1, keep_stats:1;
+ u16 n_bands;
+ u32 max_delta_t;
+ u64 used;
+ struct ovs_flow_stats stats;
+ struct dp_meter_band bands[];
+};
+
+extern struct genl_family dp_meter_genl_family;
+int ovs_meters_init(struct datapath *dp);
+void ovs_meters_exit(struct datapath *dp);
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id);
+
+#endif /* meter.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9603f6ff17a4..737092ca9b4e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4560,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net)
static void __net_exit packet_net_exit(struct net *net)
{
remove_proc_entry("packet", net->proc_net);
+ WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
}
static struct pernet_operations packet_net_ops = {
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 2cb4c5dfad6f..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net)
static void __net_exit phonet_exit_net(struct net *net)
{
+ struct phonet_net *pnn = phonet_pernet(net);
+
remove_proc_entry("phonet", net->proc_net);
+ WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
}
static struct pernet_operations phonet_net_ops = {
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index e458ece96d3d..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1120,7 +1120,7 @@ static int __init qrtr_proto_init(void)
return 0;
}
-module_init(qrtr_proto_init);
+postcore_initcall(qrtr_proto_init);
static void __exit qrtr_proto_fini(void)
{
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
return -EINVAL;
}
- dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
- rdsibdev_to_node(rds_ibdev));
+ dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
+ rdsibdev_to_node(rds_ibdev));
if (!dma_pages) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
break;
}
- /* XXX when can this fail? */
- ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
- rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
+ rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
(long) ib_sg_dma_address(
ic->i_cm_id->device,
- &recv->r_frag->f_sg),
- ret);
+ &recv->r_frag->f_sg));
+
+ /* XXX when can this fail? */
+ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
if (ret) {
rds_ib_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 344b2dcad52d..9b5c46b052fd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -322,6 +322,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
}
EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+/*
+ * Dummy function used to stop the notifier talking to recvmsg().
+ */
+static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+}
+
/**
* rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
* @sock: The socket the call is on
@@ -336,6 +344,14 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+ /* Make sure we're not going to call back into a kernel service */
+ if (call->notify_rx) {
+ spin_lock_bh(&call->notify_lock);
+ call->notify_rx = rxrpc_dummy_notify_rx;
+ spin_unlock_bh(&call->notify_lock);
+ }
+
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_kernel);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ea5600b747cc..b2151993d384 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -525,6 +525,7 @@ struct rxrpc_call {
unsigned long flags;
unsigned long events;
spinlock_t lock;
+ spinlock_t notify_lock; /* Kernel notification lock */
rwlock_t state_lock; /* lock for state transition */
u32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844aab16..3574508baf9a 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
now = ktime_get_real();
if (ktime_before(call->expire_at, now)) {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index fcdd6555a820..4c7fbc6dcce7 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
INIT_LIST_HEAD(&call->sock_link);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->lock);
+ spin_lock_init(&call->notify_lock);
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1e37eb1c0c66..1b592073ec96 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
write_unlock(&call->state_lock);
if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
- rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
- rxrpc_propose_ack_client_tx_end);
trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
} else {
trace_rxrpc_transmit(call, rxrpc_transmit_end);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 71e6f713fbe7..f47659c7b224 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
/*
* Fill out an ACK packet.
*/
-static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+ struct rxrpc_call *call,
struct rxrpc_ack_buffer *pkt,
rxrpc_seq_t *_hard_ack,
rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
} while (before_eq(seq, top));
}
- mtu = call->conn->params.peer->if_mtu;
- mtu -= call->conn->params.peer->hdrsize;
+ mtu = conn->params.peer->if_mtu;
+ mtu -= conn->params.peer->hdrsize;
jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
pkt->ackinfo.maxMTU = htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
}
call->ackr_reason = 0;
}
- n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
+ n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
spin_unlock_bh(&call->lock);
@@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
rxrpc_serial_t serial;
int ret;
+ /* Don't bother sending aborts for a client call once the server has
+ * hard-ACK'd all of its request data. After that point, we're not
+ * going to stop the operation proceeding, and whilst we might limit
+ * the reply, it's not worth it if we can send a new call on the same
+ * channel instead, thereby closing off this call.
+ */
+ if (rxrpc_is_client_call(call) &&
+ test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ return 0;
+
spin_lock_bh(&call->lock);
if (call->conn)
conn = rxrpc_get_connection_maybe(call->conn);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index e4937b3f3685..8510a98b87e1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
sk = &rx->sk;
if (rx && sk->sk_state < RXRPC_CLOSE) {
if (call->notify_rx) {
+ spin_lock_bh(&call->notify_lock);
call->notify_rx(sk, call, call->user_call_ID);
+ spin_unlock_bh(&call->notify_lock);
} else {
write_lock_bh(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ff4d69082376..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -80,7 +80,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
spin_lock_bh(&idrinfo->lock);
idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
spin_unlock_bh(&idrinfo->lock);
- put_net(idrinfo->net);
gen_kill_estimator(&p->tcfa_rate_est);
free_tcf(p);
}
@@ -339,7 +338,6 @@ err3:
p->idrinfo = idrinfo;
p->ops = ops;
INIT_LIST_HEAD(&p->list);
- get_net(idrinfo->net);
*a = p;
return 0;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 035ed268290b..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tc_action_net_init(tn, &act_bpf_ops, net);
+ return tc_action_net_init(tn, &act_bpf_ops);
}
static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 34e52d01a5dd..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tc_action_net_init(tn, &act_connmark_ops, net);
+ return tc_action_net_init(tn, &act_connmark_ops);
}
static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 35171df2ebef..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tc_action_net_init(tn, &act_csum_ops, net);
+ return tc_action_net_init(tn, &act_csum_ops);
}
static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ef7f7f39d26d..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tc_action_net_init(tn, &act_gact_ops, net);
+ return tc_action_net_init(tn, &act_gact_ops);
}
static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index eca272533418..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -855,7 +855,7 @@ static __net_init int ife_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tc_action_net_init(tn, &act_ife_ops, net);
+ return tc_action_net_init(tn, &act_ife_ops);
}
static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index dbdf3b2470d5..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tc_action_net_init(tn, &act_ipt_ops, net);
+ return tc_action_net_init(tn, &act_ipt_ops);
}
static void __net_exit ipt_exit_net(struct net *net)
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tc_action_net_init(tn, &act_xt_ops, net);
+ return tc_action_net_init(tn, &act_xt_ops);
}
static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8df5775bbf22..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -340,7 +340,7 @@ static __net_init int mirred_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tc_action_net_init(tn, &act_mirred_ops, net);
+ return tc_action_net_init(tn, &act_mirred_ops);
}
static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7eeaaf9217b6..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tc_action_net_init(tn, &act_nat_ops, net);
+ return tc_action_net_init(tn, &act_nat_ops);
}
static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b3d82c334a5f..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tc_action_net_init(tn, &act_pedit_ops, net);
+ return tc_action_net_init(tn, &act_pedit_ops);
}
static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9ec42b26e4b9..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tc_action_net_init(tn, &act_police_ops, net);
+ return tc_action_net_init(tn, &act_police_ops);
}
static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index e69a1e3a39bf..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tc_action_net_init(tn, &act_sample_ops, net);
+ return tc_action_net_init(tn, &act_sample_ops);
}
static void __net_exit sample_exit_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index a8d0ea95f894..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tc_action_net_init(tn, &act_simp_ops, net);
+ return tc_action_net_init(tn, &act_simp_ops);
}
static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fbac62472e09..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tc_action_net_init(tn, &act_skbedit_ops, net);
+ return tc_action_net_init(tn, &act_skbedit_ops);
}
static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 8e12d8897d2f..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tc_action_net_init(tn, &act_skbmod_ops, net);
+ return tc_action_net_init(tn, &act_skbmod_ops);
}
static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index c33faa373cf2..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tc_action_net_init(tn, &act_tunnel_key_ops, net);
+ return tc_action_net_init(tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 115fc33cc6d8..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -26,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
int action;
int err;
u16 tci;
- spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
- bstats_update(&v->tcf_bstats, skb);
- action = v->tcf_action;
+ bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
/* Ensure 'data' points at mac_header prior calling vlan manipulating
* functions.
@@ -41,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- switch (v->tcfv_action) {
+ rcu_read_lock();
+
+ action = READ_ONCE(v->tcf_action);
+
+ p = rcu_dereference(v->vlan_p);
+
+ switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
err = skb_vlan_pop(skb);
if (err)
goto drop;
break;
case TCA_VLAN_ACT_PUSH:
- err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
- (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
+ err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
+ (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
if (err)
goto drop;
break;
@@ -68,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
goto drop;
}
/* replace the vid */
- tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+ tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
- if (v->tcfv_push_prio) {
+ if (p->tcfv_push_prio) {
tci &= ~VLAN_PRIO_MASK;
- tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+ tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
}
/* put updated tci as hwaccel tag */
- __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+ __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
break;
default:
BUG();
@@ -85,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
drop:
action = TC_ACT_SHOT;
- v->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+
unlock:
+ rcu_read_unlock();
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
- spin_unlock(&v->tcf_lock);
return action;
}
@@ -107,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_vlan_params *p, *p_old;
struct tc_vlan *parm;
struct tcf_vlan *v;
int action;
@@ -172,7 +179,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_vlan_ops, bind, false);
+ &act_vlan_ops, bind, true);
if (ret)
return ret;
@@ -185,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
v = to_vlan(*a);
- spin_lock_bh(&v->tcf_lock);
-
- v->tcfv_action = action;
- v->tcfv_push_vid = push_vid;
- v->tcfv_push_prio = push_prio;
- v->tcfv_push_proto = push_proto;
+ ASSERT_RTNL();
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ if (ovr)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
v->tcf_action = parm->action;
- spin_unlock_bh(&v->tcf_lock);
+ p_old = rtnl_dereference(v->vlan_p);
+
+ p->tcfv_action = action;
+ p->tcfv_push_vid = push_vid;
+ p->tcfv_push_prio = push_prio;
+ p->tcfv_push_proto = push_proto;
+
+ rcu_assign_pointer(v->vlan_p, p);
+
+ if (p_old)
+ kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
}
+static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
+
+ p = rcu_dereference_protected(v->vlan_p, 1);
+ kfree_rcu(p, rcu);
+}
+
static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
struct tc_vlan opt = {
.index = v->tcf_index,
.refcnt = v->tcf_refcnt - ref,
.bindcnt = v->tcf_bindcnt - bind,
.action = v->tcf_action,
- .v_action = v->tcfv_action,
+ .v_action = p->tcfv_action,
};
struct tcf_t t;
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
- v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
- (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
+ if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
+ p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
+ (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
- v->tcfv_push_proto) ||
+ p->tcfv_push_proto) ||
(nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
- v->tcfv_push_prio))))
+ p->tcfv_push_prio))))
goto nla_put_failure;
tcf_tm_dump(&t, &v->tcf_tm);
@@ -260,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
.act = tcf_vlan,
.dump = tcf_vlan_dump,
.init = tcf_vlan_init,
+ .cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
.size = sizeof(struct tcf_vlan),
@@ -269,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tc_action_net_init(tn, &act_vlan_ops, net);
+ return tc_action_net_init(tn, &act_vlan_ops);
}
static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 206e19f4fc01..ab255b421781 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1110,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
exts->actions[i++] = act;
exts->nr_actions = i;
}
+ exts->net = net;
}
#else
if ((exts->action && tb[exts->action]) ||
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 871351358c10..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -87,16 +87,21 @@ static int basic_init(struct tcf_proto *tp)
return 0;
}
+static void __basic_delete_filter(struct basic_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void basic_delete_filter_work(struct work_struct *work)
{
struct basic_filter *f = container_of(work, struct basic_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- tcf_em_tree_destroy(&f->ematches);
+ __basic_delete_filter(f);
rtnl_unlock();
-
- kfree(f);
}
static void basic_delete_filter(struct rcu_head *head)
@@ -116,7 +121,10 @@ static void basic_destroy(struct tcf_proto *tp)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
idr_remove_ext(&head->handle_idr, f->handle);
- call_rcu(&f->rcu, basic_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, basic_delete_filter);
+ else
+ __basic_delete_filter(f);
}
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
@@ -130,6 +138,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
idr_remove_ext(&head->handle_idr, f->handle);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, basic_delete_filter);
*last = list_empty(&head->flist);
return 0;
@@ -225,6 +234,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, basic_delete_filter);
} else {
list_add_rcu(&fnew->link, &head->flist);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index dc9bd9a0070b..fb680dafac5a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -261,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
+ tcf_exts_put_net(&prog->exts);
if (cls_bpf_is_ebpf(prog))
bpf_prog_put(prog->filter);
@@ -297,7 +298,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ if (tcf_exts_get_net(&prog->exts))
+ call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ else
+ __cls_bpf_delete_prog(prog);
}
static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
@@ -526,6 +530,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
+ tcf_exts_get_net(&oldprog->exts);
call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
} else {
list_add_rcu(&prog->link, &head->plist);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index a97e069bee89..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
static void cls_cgroup_destroy_work(struct work_struct *work)
{
struct cls_cgroup_head *head = container_of(work,
struct cls_cgroup_head,
work);
rtnl_lock();
- tcf_exts_destroy(&head->exts);
- tcf_em_tree_destroy(&head->ematches);
- kfree(head);
+ __cls_cgroup_destroy(head);
rtnl_unlock();
}
@@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
goto errout;
rcu_assign_pointer(tp->root, new);
- if (head)
+ if (head) {
+ tcf_exts_get_net(&head->exts);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ }
return 0;
errout:
tcf_exts_destroy(&new->exts);
@@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
/* Head can still be NULL due to cls_cgroup_init(). */
- if (head)
- call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ if (head) {
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ else
+ __cls_cgroup_destroy(head);
+ }
}
static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 9c08fcdaf41c..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
-static void flow_destroy_filter_work(struct work_struct *work)
+static void __flow_destroy_filter(struct flow_filter *f)
{
- struct flow_filter *f = container_of(work, struct flow_filter, work);
-
- rtnl_lock();
del_timer_sync(&f->perturb_timer);
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
kfree(f);
+}
+
+static void flow_destroy_filter_work(struct work_struct *work)
+{
+ struct flow_filter *f = container_of(work, struct flow_filter, work);
+
+ rtnl_lock();
+ __flow_destroy_filter(f);
rtnl_unlock();
}
@@ -554,8 +560,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
*arg = fnew;
- if (fold)
+ if (fold) {
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, flow_destroy_filter);
+ }
return 0;
err2:
@@ -572,6 +580,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
struct flow_filter *f = arg;
list_del_rcu(&f->list);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, flow_destroy_filter);
*last = list_empty(&head->filters);
return 0;
@@ -596,7 +605,10 @@ static void flow_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
- call_rcu(&f->rcu, flow_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, flow_destroy_filter);
+ else
+ __flow_destroy_filter(f);
}
kfree_rcu(head, rcu);
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c99fa9e5be46..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -193,13 +193,19 @@ static int fl_init(struct tcf_proto *tp)
return 0;
}
+static void __fl_destroy_filter(struct cls_fl_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void fl_destroy_filter_work(struct work_struct *work)
{
struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __fl_destroy_filter(f);
rtnl_unlock();
}
@@ -282,7 +288,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fl_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fl_destroy_filter);
+ else
+ __fl_destroy_filter(f);
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -952,6 +961,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->list, &fnew->list);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
list_add_tail_rcu(&fnew->list, &head->filters);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5908f56f76da..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -125,13 +125,19 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
+static void __fw_delete_filter(struct fw_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void fw_delete_filter_work(struct work_struct *work)
{
struct fw_filter *f = container_of(work, struct fw_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __fw_delete_filter(f);
rtnl_unlock();
}
@@ -157,7 +163,10 @@ static void fw_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(head->ht[h],
rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fw_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fw_delete_filter);
+ else
+ __fw_delete_filter(f);
}
}
kfree_rcu(head, rcu);
@@ -182,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
if (pfp == f) {
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
ret = 0;
break;
@@ -302,6 +312,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
rcu_assign_pointer(*fp, fnew);
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
*arg = fnew;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 95dc997873e8..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp)
return 0;
}
+static void __mall_destroy(struct cls_mall_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
static void mall_destroy_work(struct work_struct *work)
{
struct cls_mall_head *head = container_of(work, struct cls_mall_head,
work);
rtnl_lock();
- tcf_exts_destroy(&head->exts);
- kfree(head);
+ __mall_destroy(head);
rtnl_unlock();
}
@@ -116,7 +122,10 @@ static void mall_destroy(struct tcf_proto *tp)
if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
- call_rcu(&head->rcu, mall_destroy_rcu);
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, mall_destroy_rcu);
+ else
+ __mall_destroy(head);
}
static void *mall_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 4b14ccd8b8f2..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp)
return 0;
}
+static void __route4_delete_filter(struct route4_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void route4_delete_filter_work(struct work_struct *work)
{
struct route4_filter *f = container_of(work, struct route4_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __route4_delete_filter(f);
rtnl_unlock();
}
@@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
next = rtnl_dereference(f->next);
RCU_INIT_POINTER(b->ht[h2], next);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, route4_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, route4_delete_filter);
+ else
+ __route4_delete_filter(f);
}
}
RCU_INIT_POINTER(head->table[h1], NULL);
@@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
/* Delete it */
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, route4_delete_filter);
/* Strip RTNL protected tree */
@@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
*arg = f;
if (fold) {
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, route4_delete_filter);
}
return 0;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index bdbc541787f8..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
+static void __rsvp_delete_filter(struct rsvp_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void rsvp_delete_filter_work(struct work_struct *work)
{
struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __rsvp_delete_filter(f);
rtnl_unlock();
}
@@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ else
+ __rsvp_delete_filter(f);
}
static void rsvp_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index d6abfa6757f2..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -142,13 +142,19 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
+static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+{
+ tcf_exts_destroy(&r->exts);
+ tcf_exts_put_net(&r->exts);
+}
+
static void tcindex_destroy_rexts_work(struct work_struct *work)
{
struct tcindex_filter_result *r;
r = container_of(work, struct tcindex_filter_result, work);
rtnl_lock();
- tcf_exts_destroy(&r->exts);
+ __tcindex_destroy_rexts(r);
rtnl_unlock();
}
@@ -161,14 +167,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head)
tcf_queue_work(&r->work);
}
+static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+{
+ tcf_exts_destroy(&f->result.exts);
+ tcf_exts_put_net(&f->result.exts);
+ kfree(f);
+}
+
static void tcindex_destroy_fexts_work(struct work_struct *work)
{
struct tcindex_filter *f = container_of(work, struct tcindex_filter,
work);
rtnl_lock();
- tcf_exts_destroy(&f->result.exts);
- kfree(f);
+ __tcindex_destroy_fexts(f);
rtnl_unlock();
}
@@ -213,10 +225,17 @@ found:
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- if (f)
- call_rcu(&f->rcu, tcindex_destroy_fexts);
- else
- call_rcu(&r->rcu, tcindex_destroy_rexts);
+ if (f) {
+ if (tcf_exts_get_net(&f->result.exts))
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ __tcindex_destroy_fexts(f);
+ } else {
+ if (tcf_exts_get_net(&r->exts))
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
+ else
+ __tcindex_destroy_rexts(r);
+ }
*last = false;
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2737b71854c9..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
bool free_pf)
{
tcf_exts_destroy(&n->exts);
+ tcf_exts_put_net(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
@@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
RCU_INIT_POINTER(*kp, key->next);
tcf_unbind_filter(tp, &key->res);
+ tcf_exts_get_net(&key->exts);
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
@@ -590,7 +592,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n->handle);
idr_remove_ext(&ht->handle_idr, n->handle);
- call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ if (tcf_exts_get_net(&n->exts))
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ else
+ u32_destroy_key(n->tp, n, true);
}
}
}
@@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
+ tcf_exts_get_net(&n->exts);
call_rcu(&n->rcu, u32_delete_key_rcu);
return 0;
}
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index bdb533b7fb8c..7a72980c1509 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -212,7 +212,7 @@ static void cbs_disable_offload(struct net_device *dev,
cbs.queue = q->queue;
cbs.enable = 0;
- err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
if (err < 0)
pr_warn("Couldn't disable CBS offload for queue %d\n",
cbs.queue);
@@ -236,7 +236,7 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
cbs.idleslope = opt->idleslope;
cbs.sendslope = opt->sendslope;
- err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
if (err < 0)
return err;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 4d5ed45123f0..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -50,7 +50,8 @@ static void mqprio_destroy(struct Qdisc *sch)
switch (priv->mode) {
case TC_MQPRIO_MODE_DCB:
case TC_MQPRIO_MODE_CHANNEL:
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+ dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
&mqprio);
break;
default:
@@ -265,7 +266,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
return -EINVAL;
}
err = dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_MQPRIO,
+ TC_SETUP_QDISC_MQPRIO,
&mqprio);
if (err)
return err;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index db0228a65e8c..dd70924cbcdf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
struct qdisc_watchdog watchdog;
- psched_tdiff_t latency;
- psched_tdiff_t jitter;
+ s64 latency;
+ s64 jitter;
u32 loss;
u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
u32 a5; /* p23 used only in 4-states */
} clg;
+ struct tc_netem_slot slot_config;
+ struct slotstate {
+ u64 slot_next;
+ s32 packets_left;
+ s32 bytes_left;
+ } slot;
+
};
/* Time stamp put into socket buffer control block
@@ -145,7 +152,7 @@ struct netem_sched_data {
* we save skb->tstamp value in skb->cb[] before destroying it.
*/
struct netem_skb_cb {
- psched_time_t time_to_send;
+ u64 time_to_send;
};
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -305,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
* std deviation sigma. Uses table lookup to approximate the desired
* distribution, and a uniformly-distributed pseudo-random source.
*/
-static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
- struct crndstate *state,
- const struct disttable *dist)
+static s64 tabledist(s64 mu, s32 sigma,
+ struct crndstate *state,
+ const struct disttable *dist)
{
- psched_tdiff_t x;
+ s64 x;
long t;
u32 rnd;
@@ -320,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
/* default uniform distribution */
if (dist == NULL)
- return (rnd % (2*sigma)) - sigma + mu;
+ return (rnd % (2 * sigma)) - sigma + mu;
t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
@@ -332,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
-static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
{
- u64 ticks;
-
len += q->packet_overhead;
if (q->cell_size) {
@@ -346,10 +351,7 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
len = cells * (q->cell_size + q->cell_overhead);
}
- ticks = (u64)len * NSEC_PER_SEC;
-
- do_div(ticks, q->rate);
- return PSCHED_NS2TICKS(ticks);
+ return div64_u64(len * NSEC_PER_SEC, q->rate);
}
static void tfifo_reset(struct Qdisc *sch)
@@ -369,7 +371,7 @@ static void tfifo_reset(struct Qdisc *sch)
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+ u64 tnext = netem_skb_cb(nskb)->time_to_send;
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
while (*p) {
@@ -515,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
q->reorder < get_crandom(&q->reorder_cor)) {
- psched_time_t now;
- psched_tdiff_t delay;
+ u64 now;
+ s64 delay;
delay = tabledist(q->latency, q->jitter,
&q->delay_cor, q->delay_dist);
- now = psched_get_time();
+ now = ktime_get_ns();
if (q->rate) {
struct netem_skb_cb *last = NULL;
@@ -547,11 +549,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* from delay.
*/
delay -= last->time_to_send - now;
- delay = max_t(psched_tdiff_t, 0, delay);
+ delay = max_t(s64, 0, delay);
now = last->time_to_send;
}
- delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
+ delay += packet_time_ns(qdisc_pkt_len(skb), q);
}
cb->time_to_send = now + delay;
@@ -562,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* Do re-ordering by putting one out of N packets at the front
* of the queue.
*/
- cb->time_to_send = psched_get_time();
+ cb->time_to_send = ktime_get_ns();
q->counter = 0;
netem_enqueue_skb_head(&sch->q, skb);
@@ -593,6 +595,20 @@ finish_segs:
return NET_XMIT_SUCCESS;
}
+/* Delay the next round with a new future slot with a
+ * correct number of bytes and packets.
+ */
+
+static void get_slot_next(struct netem_sched_data *q, u64 now)
+{
+ q->slot.slot_next = now + q->slot_config.min_delay +
+ (prandom_u32() *
+ (q->slot_config.max_delay -
+ q->slot_config.min_delay) >> 32);
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+}
+
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -609,15 +625,18 @@ deliver:
}
p = rb_first(&q->t_root);
if (p) {
- psched_time_t time_to_send;
+ u64 time_to_send;
+ u64 now = ktime_get_ns();
skb = rb_to_skb(p);
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
- if (time_to_send <= psched_get_time()) {
- rb_erase(p, &q->t_root);
+ if (q->slot.slot_next && q->slot.slot_next < time_to_send)
+ get_slot_next(q, now);
+ if (time_to_send <= now && q->slot.slot_next <= now) {
+ rb_erase(p, &q->t_root);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
@@ -636,6 +655,14 @@ deliver:
skb->tstamp = 0;
#endif
+ if (q->slot.slot_next) {
+ q->slot.packets_left--;
+ q->slot.bytes_left -= qdisc_pkt_len(skb);
+ if (q->slot.packets_left <= 0 ||
+ q->slot.bytes_left <= 0)
+ get_slot_next(q, now);
+ }
+
if (q->qdisc) {
unsigned int pkt_len = qdisc_pkt_len(skb);
struct sk_buff *to_free = NULL;
@@ -659,7 +686,10 @@ deliver:
if (skb)
goto deliver;
}
- qdisc_watchdog_schedule(&q->watchdog, time_to_send);
+
+ qdisc_watchdog_schedule_ns(&q->watchdog,
+ max(time_to_send,
+ q->slot.slot_next));
}
if (q->qdisc) {
@@ -690,6 +720,7 @@ static void dist_free(struct disttable *d)
* Distribution data is a variable size payload containing
* signed 16 bit values.
*/
+
static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -720,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
+static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
+{
+ const struct tc_netem_slot *c = nla_data(attr);
+
+ q->slot_config = *c;
+ if (q->slot_config.max_packets == 0)
+ q->slot_config.max_packets = INT_MAX;
+ if (q->slot_config.max_bytes == 0)
+ q->slot_config.max_bytes = INT_MAX;
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+ if (q->slot_config.min_delay | q->slot_config.max_delay)
+ q->slot.slot_next = ktime_get_ns();
+ else
+ q->slot.slot_next = 0;
+}
+
static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
const struct tc_netem_corr *c = nla_data(attr);
@@ -821,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
[TCA_NETEM_ECN] = { .type = NLA_U32 },
[TCA_NETEM_RATE64] = { .type = NLA_U64 },
+ [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
+ [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
+ [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -888,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
sch->limit = qopt->limit;
- q->latency = qopt->latency;
- q->jitter = qopt->jitter;
+ q->latency = PSCHED_TICKS2NS(qopt->latency);
+ q->jitter = PSCHED_TICKS2NS(qopt->jitter);
q->limit = qopt->limit;
q->gap = qopt->gap;
q->counter = 0;
@@ -918,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->rate = max_t(u64, q->rate,
nla_get_u64(tb[TCA_NETEM_RATE64]));
+ if (tb[TCA_NETEM_LATENCY64])
+ q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
+
+ if (tb[TCA_NETEM_JITTER64])
+ q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
+
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
+ if (tb[TCA_NETEM_SLOT])
+ get_slot(q, tb[TCA_NETEM_SLOT]);
+
return ret;
}
@@ -1010,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
struct tc_netem_rate rate;
+ struct tc_netem_slot slot;
- qopt.latency = q->latency;
- qopt.jitter = q->jitter;
+ qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ UINT_MAX);
+ qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
qopt.gap = q->gap;
@@ -1020,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
goto nla_put_failure;
+ if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
+ goto nla_put_failure;
+
+ if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
+ goto nla_put_failure;
+
cor.delay_corr = q->delay_cor.rho;
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
@@ -1056,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
+ if (q->slot_config.min_delay | q->slot_config.max_delay) {
+ slot = q->slot_config;
+ if (slot.max_packets == INT_MAX)
+ slot.max_packets = 0;
+ if (slot.max_bytes == INT_MAX)
+ slot.max_bytes = 0;
+ if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
+ goto nla_put_failure;
+ }
+
return nla_nest_end(skb, nla);
nla_put_failure:
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index fdfdb56aaae2..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
@@ -148,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
red_restart(&q->vars);
}
+static int red_offload(struct Qdisc *sch, bool enable)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload opt = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ if (enable) {
+ opt.command = TC_RED_REPLACE;
+ opt.set.min = q->parms.qth_min >> q->parms.Wlog;
+ opt.set.max = q->parms.qth_max >> q->parms.Wlog;
+ opt.set.probability = q->parms.max_P;
+ opt.set.is_ecn = red_use_ecn(q);
+ } else {
+ opt.command = TC_RED_DESTROY;
+ }
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+}
+
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
qdisc_destroy(q->qdisc);
}
@@ -219,6 +246,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
red_start_of_idle_period(&q->vars);
sch_tree_unlock(sch);
+ red_offload(sch, true);
return 0;
}
@@ -244,6 +272,35 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
return red_change(sch, opt);
}
+static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload hw_stats = {
+ .command = TC_RED_STATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .stats.bstats = &sch->bstats,
+ .stats.qstats = &sch->qstats,
+ },
+ };
+ int err;
+
+ opt->flags &= ~TC_RED_OFFLOADED;
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return 0;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ if (!err)
+ opt->flags |= TC_RED_OFFLOADED;
+
+ return err;
+}
+
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -257,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
.Plog = q->parms.Plog,
.Scell_log = q->parms.Scell_log,
};
+ int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
+ err = red_dump_offload(sch, &opt);
+ if (err)
+ goto nla_put_failure;
+
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
@@ -275,6 +337,7 @@ nla_put_failure:
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
struct tc_red_xstats st = {
.early = q->stats.prob_drop + q->stats.forced_drop,
.pdrop = q->stats.pdrop,
@@ -282,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
+ if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ struct red_stats hw_stats = {0};
+ struct tc_red_qopt_offload hw_stats_request = {
+ .command = TC_RED_XSTATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .xstats = &hw_stats,
+ },
+ };
+ if (!dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_RED,
+ &hw_stats_request)) {
+ st.early += hw_stats.prob_drop + hw_stats.forced_drop;
+ st.pdrop += hw_stats.pdrop;
+ st.other += hw_stats.other;
+ st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
+ }
+ }
+
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a6dfa86c0201..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
- }
+ else
+ addr->v6.sin6_scope_id = 0;
}
*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 514465b03829..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3594,8 +3594,8 @@ struct sctp_chunk *sctp_make_strreset_req(
__u16 stream_num, __be16 *stream_list,
bool out, bool in)
{
+ __u16 stream_len = stream_num * sizeof(__u16);
struct sctp_strreset_outreq outreq;
- __u16 stream_len = stream_num * 2;
struct sctp_strreset_inreq inreq;
struct sctp_chunk *retval;
__u16 outlen, inlen;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b029757bea03..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -84,8 +84,8 @@
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len, struct sock **orig_sk);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1970,9 +1970,16 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ /* sk can be changed by peel off when waiting for buf. */
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -3133,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
*/
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_assoc_value params;
struct sctp_association *asoc;
- struct sctp_sock *sp = sctp_sk(sk);
int val;
if (optlen == sizeof(int)) {
@@ -3151,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
val = params.assoc_value;
- } else
+ } else {
return -EINVAL;
+ }
- if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
- return -EINVAL;
+ if (val) {
+ int min_len, max_len;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id && sctp_style(sk, UDP))
- return -EINVAL;
+ min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+ min_len -= sizeof(struct sctphdr) +
+ sizeof(struct sctp_data_chunk);
+
+ max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+ if (val < min_len || val > max_len)
+ return -EINVAL;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
if (val == 0) {
- val = asoc->pathmtu;
- val -= sp->pf->af->net_header_len;
+ val = asoc->pathmtu - sp->pf->af->net_header_len;
val -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
+ sizeof(struct sctp_data_chunk);
}
asoc->user_frag = val;
asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
} else {
+ if (params.assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
sp->user_frag = val;
}
@@ -5015,12 +5031,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
if (!asoc)
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
-
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -7989,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+ size_t msg_len, struct sock **orig_sk)
{
struct sock *sk = asoc->base.sk;
int err = 0;
@@ -8006,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -8022,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk) {
+ release_sock(sk);
+ sk = asoc->base.sk;
+ lock_sock(sk);
+ }
*timeo_p = current_timeo;
}
out:
+ *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
@@ -8034,6 +8051,10 @@ out:
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index b8c8cabb1a58..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -282,15 +282,31 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
str_nums = params->srs_number_streams;
str_list = params->srs_stream_list;
- if (out && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->outcnt)
- goto out;
+ if (str_nums) {
+ int param_len = 0;
- if (in && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->incnt)
- goto out;
+ if (out) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->outcnt)
+ goto out;
+
+ param_len = str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_outreq);
+ }
+
+ if (in) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->incnt)
+ goto out;
+
+ param_len += str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_inreq);
+ }
+
+ if (param_len > SCTP_MAX_CHUNK_LEN -
+ sizeof(struct sctp_reconf_chunk))
+ goto out;
+ }
nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
if (!nstr_list) {
diff --git a/net/socket.c b/net/socket.c
index c729625eb5d3..42d8e9c9ccd5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void)
sock = SOCKET_I(inode);
- kmemcheck_annotate_bitfield(sock, type);
inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2ad827db2704..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
const char
*rpc_proc_name(const struct rpc_task *task)
{
@@ -1505,7 +1504,6 @@ const char
} else
return "no proc";
}
-#endif
/*
* 0. Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int idx = task->tk_msg.rpc_proc->p_statidx;
+ trace_rpc_request(task);
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
+ /* fall through */
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
+ /* fall through */
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
status = -EACCES;
+ /* fall through */
case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task)
task->tk_action = call_bind;
return;
}
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
/* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
rpc_exit(task, task->tk_status);
break;
}
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -EADDRINUSE:
@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task)
* were a timeout.
*/
rpc_delay(task, 3*HZ);
+ /* fall through */
case -ETIMEDOUT:
task->tk_action = call_timeout;
break;
@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task)
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
+ /* fall through */
case -EADDRINUSE:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
case -ENOBUFS:
rpc_delay(task, HZ>>2);
+ /* fall through */
case -EAGAIN:
task->tk_action = call_transmit;
break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return PTR_ERR(gssd_dentry);
}
- dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
mutex_lock(&sn->pipefs_sb_lock);
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
goto out;
}
sn->pipefs_sb = NULL;
- dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
smp_wmb();
sn->rpcb_users = 1;
dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
- "%p, rpcb_local_clnt4: %p) for net %p%s\n",
- sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
- net, (net == &init_net) ? " (init_net)" : "");
+ "%p, rpcb_local_clnt4: %p) for net %x%s\n",
+ sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
+ net->ns.inum, (net == &init_net) ? " (init_net)" : "");
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5dea47eb31bb..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
static __net_exit void sunrpc_exit_net(struct net *net)
{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net);
ip_map_cache_destroy(net);
rpc_proc_exit(net);
+ WARN_ON_ONCE(!list_empty(&sn->all_clients));
}
static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 33f4ae68426d..387cc4add6f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
}
static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6160d17a31c4..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
case -EAGAIN:
xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n");
+ /* fall through */
default:
task->tk_status = -EAGAIN;
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 823a781ec89c..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- req->rl_backchannel = true;
+ __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
@@ -223,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p = xdr_zero;
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
- &rqst->rq_snd_buf, rpcrdma_noch))
+ if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
+ &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index fa759dd2b0f3..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -306,28 +306,9 @@ out_reset:
}
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- fmr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
- .ro_unmap_safe = fmr_op_unmap_safe,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 35d7517ef0e6..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -420,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
@@ -508,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
f->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done);
- /* Initialize CQ count, since there is always a signaled
- * WR being posted here. The new cqcount depends on how
- * many SQEs are about to be consumed.
- */
- rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
-
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
@@ -546,7 +539,6 @@ reset_mrs:
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
- rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr,
fr_invwr);
@@ -559,28 +551,9 @@ reset_mrs:
goto unmap;
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- frwr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
- .ro_unmap_safe = frwr_op_unmap_safe,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f1889f4d4803..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
/* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */
- size = maxsegs * sizeof(struct rpcrdma_read_chunk);
+ size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */
- size += sizeof(struct rpcrdma_segment);
+ size += rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
/* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
- size += maxsegs * sizeof(struct rpcrdma_segment);
+ size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n",
@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return 0;
}
-/* Prepare the RPC-over-RDMA header SGE.
+/**
+ * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * @sc: sendctx containing SGEs to unmap
+ *
+ */
+void
+rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
+ struct ib_sge *sge;
+ unsigned int count;
+
+ dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
+ __func__, sc->sc_unmap_count, sc);
+
+ /* The first two SGEs contain the transport header and
+ * the inline buffer. These are always left mapped so
+ * they can be cheaply re-used.
+ */
+ sge = &sc->sc_sges[2];
+ for (count = sc->sc_unmap_count; count; ++sge, --count)
+ ib_dma_unmap_page(ia->ri_device,
+ sge->addr, sge->length, DMA_TO_DEVICE);
+
+ if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
+ smp_mb__after_atomic();
+ wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ }
+}
+
+/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
static bool
rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
u32 len)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
- struct ib_sge *sge = &req->rl_send_sge[0];
+ struct ib_sge *sge = sc->sc_sges;
- if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
- if (!__rpcrdma_dma_map_regbuf(ia, rb))
- return false;
- sge->addr = rdmab_addr(rb);
- sge->lkey = rdmab_lkey(rb);
- }
+ if (!rpcrdma_dma_map_regbuf(ia, rb))
+ goto out_regbuf;
+ sge->addr = rdmab_addr(rb);
sge->length = len;
+ sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
- req->rl_send_wr.num_sge++;
+ sc->sc_wr.num_sge++;
return true;
+
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
}
/* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -541,10 +575,11 @@ static bool
rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_device *device = ia->ri_device;
- struct ib_sge *sge = req->rl_send_sge;
+ struct ib_sge *sge = sc->sc_sges;
u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages;
@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
* DMA-mapped. Sync the content that has changed.
*/
if (!rpcrdma_dma_map_regbuf(ia, rb))
- return false;
+ goto out_regbuf;
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
ppages++;
remaining -= len;
page_base = 0;
@@ -633,56 +668,61 @@ map_tail:
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
}
out:
- req->rl_send_wr.num_sge = sge_no + 1;
+ sc->sc_wr.num_sge += sge_no;
+ if (sc->sc_unmap_count)
+ __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
return true;
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
+
out_mapping_overflow:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: Send mapping error\n");
return false;
}
-bool
-rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
- u32 hdrlen, struct xdr_buf *xdr,
- enum rpcrdma_chunktype rtype)
+/**
+ * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
+ * @r_xprt: controlling transport
+ * @req: context of RPC Call being marshalled
+ * @hdrlen: size of transport header, in bytes
+ * @xdr: xdr_buf containing RPC Call
+ * @rtype: chunk type being encoded
+ *
+ * Returns 0 on success; otherwise a negative errno is returned.
+ */
+int
+rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
- req->rl_send_wr.num_sge = 0;
- req->rl_mapped_sges = 0;
-
- if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
- goto out_map;
+ req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+ if (!req->rl_sendctx)
+ return -ENOBUFS;
+ req->rl_sendctx->sc_wr.num_sge = 0;
+ req->rl_sendctx->sc_unmap_count = 0;
+ req->rl_sendctx->sc_req = req;
+ __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+
+ if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+ return -EIO;
if (rtype != rpcrdma_areadch)
- if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
- goto out_map;
-
- return true;
-
-out_map:
- pr_err("rpcrdma: failed to DMA map a Send buffer\n");
- return false;
-}
-
-void
-rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
- struct ib_device *device = ia->ri_device;
- struct ib_sge *sge;
- int count;
+ if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+ return -EIO;
- sge = &req->rl_send_sge[2];
- for (count = req->rl_mapped_sges; count--; sge++)
- ib_dma_unmap_page(device, sge->addr, sge->length,
- DMA_TO_DEVICE);
- req->rl_mapped_sges = 0;
+ return 0;
}
/**
@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
transfertypes[rtype], transfertypes[wtype],
xdr_stream_pos(xdr));
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req,
- xdr_stream_pos(xdr),
- &rqst->rq_snd_buf, rtype)) {
- ret = -EIO;
+ ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+ &rqst->rq_snd_buf, rtype);
+ if (ret)
goto out_err;
- }
return 0;
out_err:
@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
* straightforward to check the RPC header's direction field.
*/
static bool
-rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
- __be32 xid, __be32 proc)
+rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
- if (proc != rdma_msg)
+ if (rep->rr_proc != rdma_msg)
return false;
/* Peek at stream contents without advancing. */
@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return false;
/* RPC header */
- if (*p++ != xid)
+ if (*p++ != rep->rr_xid)
return false;
if (*p != cpu_to_be32(RPC_CALL))
return false;
@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EREMOTEIO;
}
+/* Perform XID lookup, reconstruction of the RPC reply, and
+ * RPC completion while holding the transport lock to ensure
+ * the rep, rqst, and rq_task pointers remain stable.
+ */
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ unsigned long cwnd;
+ int status;
+
+ xprt->reestablish_timeout = 0;
+
+ switch (rep->rr_proc) {
+ case rdma_msg:
+ status = rpcrdma_decode_msg(r_xprt, rep, rqst);
+ break;
+ case rdma_nomsg:
+ status = rpcrdma_decode_nomsg(r_xprt, rep);
+ break;
+ case rdma_error:
+ status = rpcrdma_decode_error(r_xprt, rep, rqst);
+ break;
+ default:
+ status = -EIO;
+ }
+ if (status < 0)
+ goto out_badheader;
+
+out:
+ spin_lock(&xprt->recv_lock);
+ cwnd = xprt->cwnd;
+ xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
+
+ xprt_complete_rqst(rqst->rq_task, status);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->recv_lock);
+ return;
+
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
+out_badheader:
+ dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
+ rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ r_xprt->rx_stats.bad_reply_count++;
+ status = -EIO;
+ goto out;
+}
+
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ /* Invalidate and unmap the data payloads before waking
+ * the waiting application. This guarantees the memory
+ * regions are properly fenced from the server before the
+ * application accesses the data. It also ensures proper
+ * send flow control: waking the next RPC waits until this
+ * RPC has relinquished all its Send Queue entries.
+ */
+ if (!list_empty(&req->rl_registered))
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
+
+ /* Ensure that any DMA mapped pages associated with
+ * the Send of the RPC Call have been unmapped before
+ * allowing the RPC to complete. This protects argument
+ * memory not controlled by the RPC client from being
+ * re-used before we're done with it.
+ */
+ if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ r_xprt->rx_stats.reply_waits_for_send++;
+ out_of_line_wait_on_bit(&req->rl_flags,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+ bit_wait,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
+
+/* Reply handling runs in the poll worker thread. Anything that
+ * might wait is deferred to a separate workqueue.
+ */
+void rpcrdma_deferred_completion(struct work_struct *work)
+{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+
+ rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+ rpcrdma_release_rqst(rep->rr_rxprt, req);
+ rpcrdma_complete_rqst(rep);
+}
+
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
-void
-rpcrdma_reply_handler(struct work_struct *work)
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- struct xdr_stream *xdr = &rep->rr_stream;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
- __be32 *p, xid, vers, proc;
- unsigned long cwnd;
- int status;
+ u32 credits;
+ __be32 *p;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
- xdr_init_decode(xdr, &rep->rr_hdrbuf,
+ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
/* Fixed transport header fields */
- p = xdr_inline_decode(xdr, 4 * sizeof(*p));
+ p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
- xid = *p++;
- vers = *p++;
- p++; /* credits */
- proc = *p++;
+ rep->rr_xid = *p++;
+ rep->rr_vers = *p++;
+ credits = be32_to_cpu(*p++);
+ rep->rr_proc = *p++;
+
+ if (rep->rr_vers != rpcrdma_version)
+ goto out_badversion;
- if (rpcrdma_is_bcall(r_xprt, rep, xid, proc))
+ if (rpcrdma_is_bcall(r_xprt, rep))
return;
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
spin_lock(&xprt->recv_lock);
- rqst = xprt_lookup_rqst(xprt, xid);
+ rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst)
goto out_norqst;
xprt_pin_rqst(rqst);
+
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > buf->rb_max_requests)
+ credits = buf->rb_max_requests;
+ buf->rb_credits = credits;
+
spin_unlock(&xprt->recv_lock);
+
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
+ rep->rr_rqst = rqst;
+ clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(xid));
-
- /* Invalidate and unmap the data payloads before waking the
- * waiting application. This guarantees the memory regions
- * are properly fenced from the server before the application
- * accesses the data. It also ensures proper send flow control:
- * waking the next RPC waits until this RPC has relinquished
- * all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered)) {
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
- }
-
- xprt->reestablish_timeout = 0;
- if (vers != rpcrdma_version)
- goto out_badversion;
+ __func__, rep, req, be32_to_cpu(rep->rr_xid));
- switch (proc) {
- case rdma_msg:
- status = rpcrdma_decode_msg(r_xprt, rep, rqst);
- break;
- case rdma_nomsg:
- status = rpcrdma_decode_nomsg(r_xprt, rep);
- break;
- case rdma_error:
- status = rpcrdma_decode_error(r_xprt, rep, rqst);
- break;
- default:
- status = -EIO;
- }
- if (status < 0)
- goto out_badheader;
-
-out:
- spin_lock(&xprt->recv_lock);
- cwnd = xprt->cwnd;
- xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(rqst->rq_task);
-
- xprt_complete_rqst(rqst->rq_task, status);
- xprt_unpin_rqst(rqst);
- spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
- __func__, xprt, rqst, status);
+ if (list_empty(&req->rl_registered) &&
+ !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
+ rpcrdma_complete_rqst(rep);
+ else
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
@@ -1321,37 +1423,22 @@ out_badstatus:
}
return;
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
out_badversion:
dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(vers));
- status = -EIO;
- r_xprt->rx_stats.bad_reply_count++;
- goto out;
-
-out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(proc));
- r_xprt->rx_stats.bad_reply_count++;
- status = -EIO;
- goto out;
+ __func__, be32_to_cpu(rep->rr_vers));
+ goto repost;
-/* The req was still available, but by the time the recv_lock
- * was acquired, the rqst and task had been released. Thus the RPC
- * has already been terminated.
+/* The RPC transaction has already been terminated, or the header
+ * is corrupt.
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(xid));
+ __func__, be32_to_cpu(rep->rr_xid));
goto repost;
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
- goto repost;
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c84e2b644e13..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- if (req->rl_backchannel)
+ if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- if (!list_empty(&req->rl_registered))
- ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
- rpcrdma_unmap_sges(ia, req);
+ if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_buffer_put(req);
}
@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task)
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
+ set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
- seq_printf(seq, "%lu %lu %lu %lu\n",
+ seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated,
- r_xprt->rx_stats.local_inv_needed);
+ r_xprt->rx_stats.local_inv_needed,
+ r_xprt->rx_stats.empty_sendctx_q,
+ r_xprt->rx_stats.reply_waits_for_send);
}
static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11a1fbf7e59e..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
+
+#include <asm-generic/barrier.h>
#include <asm/bitops.h>
#include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
-static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
+struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int
rpcrdma_alloc_wq(void)
@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
static void
rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_sendctx *sc =
+ container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
-}
-
-/* Perform basic sanity checking to avoid using garbage
- * to update the credit grant value.
- */
-static void
-rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
- __be32 *p = rep->rr_rdmabuf->rg_base;
- u32 credits;
- credits = be32_to_cpup(p + 2);
- if (credits == 0)
- credits = 1; /* don't deadlock */
- else if (credits > buffer->rb_max_requests)
- credits = buffer->rb_max_requests;
-
- atomic_set(&buffer->rb_credits, credits);
+ rpcrdma_sendctx_put_locked(sc);
}
/**
@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rdmab_addr(rep->rr_rdmabuf),
wc->byte_len, DMA_FROM_DEVICE);
- if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
- rpcrdma_update_granted_credits(rep);
-
out_schedule:
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ rpcrdma_reply_handler(rep);
return;
out_fail:
@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
connected:
- atomic_set(&xprt->rx_buf.rb_credits, 1);
+ xprt->rx_buf.rb_credits = 1;
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
- if (ep->rep_cqinit <= 2)
- ep->rep_cqinit = 0; /* always signal? */
- rpcrdma_init_cqcount(ep, 0);
+ ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
+ cdata->max_requests >> 2);
+ ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 1, IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
recvcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_recv_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_drain_qp(ia->ri_id->qp);
}
+/* Fixed-size circular FIFO queue. This implementation is wait-free and
+ * lock-free.
+ *
+ * Consumer is the code path that posts Sends. This path dequeues a
+ * sendctx for use by a Send operation. Multiple consumer threads
+ * are serialized by the RPC transport lock, which allows only one
+ * ->send_request call at a time.
+ *
+ * Producer is the code path that handles Send completions. This path
+ * enqueues a sendctx that has been completed. Multiple producer
+ * threads are serialized by the ib_poll_cq() function.
+ */
+
+/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
+ * queue activity, and ib_drain_qp has flushed all remaining Send
+ * requests.
+ */
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
+{
+ unsigned long i;
+
+ for (i = 0; i <= buf->rb_sc_last; i++)
+ kfree(buf->rb_sc_ctxs[i]);
+ kfree(buf->rb_sc_ctxs);
+}
+
+static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_sendctx *sc;
+
+ sc = kzalloc(sizeof(*sc) +
+ ia->ri_max_send_sges * sizeof(struct ib_sge),
+ GFP_KERNEL);
+ if (!sc)
+ return NULL;
+
+ sc->sc_wr.wr_cqe = &sc->sc_cqe;
+ sc->sc_wr.sg_list = sc->sc_sges;
+ sc->sc_wr.opcode = IB_WR_SEND;
+ sc->sc_cqe.done = rpcrdma_wc_send;
+ return sc;
+}
+
+static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_sendctx *sc;
+ unsigned long i;
+
+ /* Maximum number of concurrent outstanding Send WRs. Capping
+ * the circular queue size stops Send Queue overflow by causing
+ * the ->send_request call to fail temporarily before too many
+ * Sends are posted.
+ */
+ i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
+ dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
+ buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+ if (!buf->rb_sc_ctxs)
+ return -ENOMEM;
+
+ buf->rb_sc_last = i - 1;
+ for (i = 0; i <= buf->rb_sc_last; i++) {
+ sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
+ if (!sc)
+ goto out_destroy;
+
+ sc->sc_xprt = r_xprt;
+ buf->rb_sc_ctxs[i] = sc;
+ }
+
+ return 0;
+
+out_destroy:
+ rpcrdma_sendctxs_destroy(buf);
+ return -ENOMEM;
+}
+
+/* The sendctx queue is not guaranteed to have a size that is a
+ * power of two, thus the helpers in circ_buf.h cannot be used.
+ * The other option is to use modulus (%), which can be expensive.
+ */
+static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
+ unsigned long item)
+{
+ return likely(item < buf->rb_sc_last) ? item + 1 : 0;
+}
+
+/**
+ * rpcrdma_sendctx_get_locked - Acquire a send context
+ * @buf: transport buffers from which to acquire an unused context
+ *
+ * Returns pointer to a free send completion context; or NULL if
+ * the queue is empty.
+ *
+ * Usage: Called to acquire an SGE array before preparing a Send WR.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer),
+ * and provides an effective memory barrier that flushes the new value
+ * of rb_sc_head.
+ */
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_sendctx *sc;
+ unsigned long next_head;
+
+ next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
+
+ if (next_head == READ_ONCE(buf->rb_sc_tail))
+ goto out_emptyq;
+
+ /* ORDER: item must be accessed _before_ head is updated */
+ sc = buf->rb_sc_ctxs[next_head];
+
+ /* Releasing the lock in the caller acts as a memory
+ * barrier that flushes rb_sc_head.
+ */
+ buf->rb_sc_head = next_head;
+
+ return sc;
+
+out_emptyq:
+ /* The queue is "empty" if there have not been enough Send
+ * completions recently. This is a sign the Send Queue is
+ * backing up. Cause the caller to pause and try again.
+ */
+ dprintk("RPC: %s: empty sendctx queue\n", __func__);
+ r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
+ r_xprt->rx_stats.empty_sendctx_q++;
+ return NULL;
+}
+
+/**
+ * rpcrdma_sendctx_put_locked - Release a send context
+ * @sc: send context to release
+ *
+ * Usage: Called from Send completion to return a sendctxt
+ * to the queue.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer).
+ */
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
+ unsigned long next_tail;
+
+ /* Unmap SGEs of previously completed by unsignaled
+ * Sends by walking up the queue until @sc is found.
+ */
+ next_tail = buf->rb_sc_tail;
+ do {
+ next_tail = rpcrdma_sendctx_next(buf, next_tail);
+
+ /* ORDER: item must be accessed _before_ tail is updated */
+ rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+
+ } while (buf->rb_sc_ctxs[next_tail] != sc);
+
+ /* Paired with READ_ONCE */
+ smp_store_release(&buf->rb_sc_tail, next_tail);
+}
+
static void
rpcrdma_mr_recovery_worker(struct work_struct *work)
{
@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered);
- req->rl_send_wr.next = NULL;
- req->rl_send_wr.wr_cqe = &req->rl_cqe;
- req->rl_send_wr.sg_list = req->rl_send_sge;
- req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+ INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- atomic_set(&buf->rb_credits, 1);
spin_lock_init(&buf->rb_mwlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(req);
goto out;
}
- req->rl_backchannel = false;
list_add(&req->rl_list, &buf->rb_send_bufs);
}
@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
+ rc = rpcrdma_sendctxs_create(r_xprt);
+ if (rc)
+ goto out;
+
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker);
+ rpcrdma_sendctxs_destroy(buf);
+
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
- req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_send_wr;
+ struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
struct ib_send_wr *send_wr_fail;
int rc;
@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);
- rpcrdma_set_signaled(ep, send_wr);
+ if (!ep->rep_send_count ||
+ test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->rep_send_count = ep->rep_send_batch;
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ --ep->rep_send_count;
+ }
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e26a97d2f922..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
*/
struct rpcrdma_ep {
- atomic_t rep_cqcount;
- int rep_cqinit;
+ unsigned int rep_send_count;
+ unsigned int rep_send_batch;
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker;
};
-static inline void
-rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
-{
- atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
-}
-
-/* To update send queue accounting, provider must take a
- * send completion every now and then.
- */
-static inline void
-rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
-{
- send_wr->send_flags = 0;
- if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
- rpcrdma_init_cqcount(ep, 0);
- send_wr->send_flags = IB_SEND_SIGNALED;
- }
-}
-
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
return rb->rg_iov.lkey;
}
-static inline struct rpcrdma_msg *
-rdmab_to_msg(struct rpcrdma_regbuf *rb)
-{
- return (struct rpcrdma_msg *)rb->rg_base;
-}
-
static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb)
{
@@ -202,22 +178,24 @@ enum {
};
/*
- * struct rpcrdma_rep -- this structure encapsulates state required to recv
- * and complete a reply, asychronously. It needs several pieces of
- * state:
- * o recv buffer (posted to provider)
- * o ib_sge (also donated to provider)
- * o status of reply (length, success or not)
- * o bookkeeping state to get run by reply handler (list, etc)
+ * struct rpcrdma_rep -- this structure encapsulates state required
+ * to receive and complete an RPC Reply, asychronously. It needs
+ * several pieces of state:
*
- * These are allocated during initialization, per-transport instance.
+ * o receive buffer and ib_sge (donated to provider)
+ * o status of receive (success or not, length, inv rkey)
+ * o bookkeeping state to get run by reply handler (XDR stream)
*
- * N of these are associated with a transport instance, and stored in
- * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ * These structures are allocated during transport initialization.
+ * N of these are associated with a transport instance, managed by
+ * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
*/
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
+ __be32 rr_xid;
+ __be32 rr_vers;
+ __be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
struct rpcrdma_regbuf *rr_rdmabuf;
@@ -225,10 +203,34 @@ struct rpcrdma_rep {
struct work_struct rr_work;
struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream;
+ struct rpc_rqst *rr_rqst;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
};
+/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
+ */
+struct rpcrdma_req;
+struct rpcrdma_xprt;
+struct rpcrdma_sendctx {
+ struct ib_send_wr sc_wr;
+ struct ib_cqe sc_cqe;
+ struct rpcrdma_xprt *sc_xprt;
+ struct rpcrdma_req *sc_req;
+ unsigned int sc_unmap_count;
+ struct ib_sge sc_sges[];
+};
+
+/* Limit the number of SGEs that can be unmapped during one
+ * Send completion. This caps the amount of work a single
+ * completion can do before returning to the provider.
+ *
+ * Setting this to zero disables Send completion batching.
+ */
+enum {
+ RPCRDMA_MAX_SEND_BATCH = 7,
+};
+
/*
* struct rpcrdma_mw - external memory region metadata
*
@@ -340,26 +342,30 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
- struct ib_send_wr rl_send_wr;
- struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+ struct rpcrdma_sendctx *rl_sendctx;
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
- struct ib_cqe rl_cqe;
struct list_head rl_all;
- bool rl_backchannel;
+ unsigned long rl_flags;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
+/* rl_flags */
+enum {
+ RPCRDMA_REQ_F_BACKCHANNEL = 0,
+ RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+};
+
static inline void
rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
{
@@ -399,12 +405,17 @@ struct rpcrdma_buffer {
struct list_head rb_mws;
struct list_head rb_all;
+ unsigned long rb_sc_head;
+ unsigned long rb_sc_tail;
+ unsigned long rb_sc_last;
+ struct rpcrdma_sendctx **rb_sc_ctxs;
+
spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
u32 rb_max_requests;
- atomic_t rb_credits; /* most recent credit grant */
+ u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -453,10 +464,12 @@ struct rpcrdma_stats {
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
+ unsigned long empty_sendctx_q;
/* accessed when receiving a reply */
unsigned long long total_rdma_reply;
unsigned long long fixup_copy_count;
+ unsigned long reply_waits_for_send;
unsigned long local_inv_needed;
unsigned long nomsg_call_count;
unsigned long bcall_count;
@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_unmap_safe)(struct rpcrdma_xprt *,
- struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);
+extern struct workqueue_struct *rpcrdma_receive_wq;
+
/*
* Endpoint calls - xprtrdma/verbs.c
*/
@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -610,12 +625,18 @@ enum rpcrdma_chunktype {
rpcrdma_replych
};
-bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
- u32, struct xdr_buf *, enum rpcrdma_chunktype);
-void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
+int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype);
+void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
-void rpcrdma_reply_handler(struct work_struct *work);
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req);
+void rpcrdma_deferred_completion(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4dad5da388d6..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -552,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
+ /* fall through */
case -EPIPE:
xs_close(xprt);
status = -ENOTCONN;
@@ -1611,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state);
xs_tcp_force_close(xprt);
+ /* fall through */
case TCP_CLOSING:
/*
* If the server closed down the connection, make sure that
@@ -2368,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
switch (ret) {
case 0:
xs_set_srcport(transport, sock);
+ /* fall through */
case -EINPROGRESS:
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2419,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
default:
printk("%s: connect returned unhandled error %d\n",
__func__, status);
+ /* fall through */
case -EADDRNOTAVAIL:
/* We're probably in TIME_WAIT. Get rid of existing socket,
* and retry
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 0531b41d1f2d..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
return sizeof(struct switchdev_obj_port_vlan);
case SWITCHDEV_OBJ_ID_PORT_MDB:
return sizeof(struct switchdev_obj_port_mdb);
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ return sizeof(struct switchdev_obj_port_mdb);
default:
BUG();
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 870b9b8f877a..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq);
static void link_print(struct tipc_link *l, const char *str);
static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
}
if (state || probe || setup)
- tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
return rc;
}
@@ -1174,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1188,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
if (l->state == LINK_ESTABLISHING)
mtyp = ACTIVATE_MSG;
- tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
/* Inform peer that this endpoint is going down if applicable */
skb = skb_peek_tail(xmitq);
@@ -1215,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
}
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1289,7 +1290,8 @@ drop:
}
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq)
{
struct tipc_link *bcl = l->bc_rcvlink;
@@ -1337,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_seq_gap(hdr, rcvgap);
msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
msg_set_probe(hdr, probe);
+ msg_set_is_keepalive(hdr, probe || probe_reply);
tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + dlen);
skb_trim(skb, INT_H_SIZE + dlen);
@@ -1442,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 rcv_nxt = l->rcv_nxt;
u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ bool reply = msg_probe(hdr);
void *data;
char *if_name;
int rc = 0;
@@ -1528,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
rcvgap = peers_snd_nxt - l->rcv_nxt;
- if (rcvgap || (msg_probe(hdr)))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
- 0, 0, xmitq);
+ if (rcvgap || reply)
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+ rcvgap, 0, 0, xmitq);
tipc_link_release_pkts(l, ack);
/* If NACK, retransmit will now start at right position */
@@ -2122,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
struct sk_buff_head *xmitq)
{
l->priority = prio;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
}
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 1649d456e22d..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == LAST_FRAGMENT) {
TIPC_SKB_CB(head)->validated = false;
- if (unlikely(!tipc_msg_validate(head)))
+ if (unlikely(!tipc_msg_validate(&head)))
goto err;
*buf = head;
TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
* TIPC will ignore the excess, under the assumption that it is optional info
* introduced by a later release of the protocol.
*/
-bool tipc_msg_validate(struct sk_buff *skb)
+bool tipc_msg_validate(struct sk_buff **_skb)
{
- struct tipc_msg *msg;
+ struct sk_buff *skb = *_skb;
+ struct tipc_msg *hdr;
int msz, hsz;
+ /* Ensure that flow control ratio condition is satisfied */
+ if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (!skb)
+ return false;
+ kfree_skb(*_skb);
+ *_skb = skb;
+ }
+
if (unlikely(TIPC_SKB_CB(skb)->validated))
return true;
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, hsz)))
return false;
- msg = buf_msg(skb);
- if (unlikely(msg_version(msg) != TIPC_VERSION))
+ hdr = buf_msg(skb);
+ if (unlikely(msg_version(hdr) != TIPC_VERSION))
return false;
- msz = msg_size(msg);
+ msz = msg_size(hdr);
if (unlikely(msz < hsz))
return false;
if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
skb_pull(*iskb, offset);
imsz = msg_size(buf_msg(*iskb));
skb_trim(*iskb, imsz);
- if (unlikely(!tipc_msg_validate(*iskb)))
+ if (unlikely(!tipc_msg_validate(iskb)))
goto none;
*pos += align(imsz);
return true;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index cedf811317fb..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 19, 1, d);
}
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 19, 1, d);
+}
+
static inline int msg_src_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 1);
@@ -916,7 +926,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
}
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
-bool tipc_msg_validate(struct sk_buff *skb);
+bool tipc_msg_validate(struct sk_buff **_skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
struct sk_buff_head *xmitq);
@@ -944,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
return msg_seqno(buf_msg(skb));
}
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+ return (skb->len / 1024 + 1) * 1024;
+}
+
/* tipc_skb_peek(): peek and reserve first buffer in list
* @list: list to be peeked in
* Returns pointer to first buffer in list, if any
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 009a81631280..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1539,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
__skb_queue_head_init(&xmitq);
/* Ensure message is well-formed before touching the header */
- if (unlikely(!tipc_msg_validate(skb)))
+ if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
hdr = buf_msg(skb);
usr = msg_user(hdr);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 60aff60e30ad..e07ee3ae0023 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
-static struct proto tls_base_prot;
-static struct proto tls_sw_prot;
+enum {
+ TLS_BASE_TX,
+ TLS_SW_TX,
+ TLS_NUM_CONFIG,
+};
+
+static struct proto tls_prots[TLS_NUM_CONFIG];
+
+static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+{
+ sk->sk_prot = &tls_prots[ctx->tx_conf];
+}
int wait_on_pending_writer(struct sock *sk, long *timeo)
{
@@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
void (*sk_proto_close)(struct sock *sk, long timeout);
lock_sock(sk);
+ sk_proto_close = ctx->sk_proto_close;
+
+ if (ctx->tx_conf == TLS_BASE_TX) {
+ kfree(ctx);
+ goto skip_tx_cleanup;
+ }
if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
tls_handle_open_record(sk, 0);
@@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
sg++;
}
}
- ctx->free_resources(sk);
+
kfree(ctx->rec_seq);
kfree(ctx->iv);
- sk_proto_close = ctx->sk_proto_close;
- kfree(ctx);
+ if (ctx->tx_conf == TLS_SW_TX)
+ tls_sw_free_tx_resources(sk);
+skip_tx_cleanup:
release_sock(sk);
sk_proto_close(sk, timeout);
}
@@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
unsigned int optlen)
{
- struct tls_crypto_info *crypto_info, tmp_crypto_info;
+ struct tls_crypto_info *crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
- struct proto *prot = NULL;
int rc = 0;
+ int tx_conf;
if (!optval || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
- rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+ crypto_info = &ctx->crypto_send;
+ /* Currently we don't support set crypto info more than one time */
+ if (TLS_CRYPTO_INFO_READY(crypto_info))
+ goto out;
+
+ rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto out;
}
/* check version */
- if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+ if (crypto_info->version != TLS_1_2_VERSION) {
rc = -ENOTSUPP;
- goto out;
+ goto err_crypto_info;
}
- /* get user crypto info */
- crypto_info = &ctx->crypto_send;
-
- /* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info))
- goto out;
-
- switch (tmp_crypto_info.cipher_type) {
+ switch (crypto_info->cipher_type) {
case TLS_CIPHER_AES_GCM_128: {
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
rc = -EINVAL;
goto out;
}
- rc = copy_from_user(
- crypto_info,
- optval,
- sizeof(struct tls12_crypto_info_aes_gcm_128));
-
+ rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
+ optlen - sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
goto out;
}
- ctx->sk_write_space = sk->sk_write_space;
- sk->sk_write_space = tls_write_space;
-
- ctx->sk_proto_close = sk->sk_prot->close;
-
/* currently SW is default, we will have ethtool in future */
rc = tls_set_sw_offload(sk, ctx);
- prot = &tls_sw_prot;
+ tx_conf = TLS_SW_TX;
if (rc)
goto err_crypto_info;
- sk->sk_prot = prot;
+ ctx->tx_conf = tx_conf;
+ update_sk_prot(sk, ctx);
+ ctx->sk_write_space = sk->sk_write_space;
+ sk->sk_write_space = tls_write_space;
goto out;
err_crypto_info:
@@ -453,7 +463,10 @@ static int tls_init(struct sock *sk)
icsk->icsk_ulp_data = ctx;
ctx->setsockopt = sk->sk_prot->setsockopt;
ctx->getsockopt = sk->sk_prot->getsockopt;
- sk->sk_prot = &tls_base_prot;
+ ctx->sk_proto_close = sk->sk_prot->close;
+
+ ctx->tx_conf = TLS_BASE_TX;
+ update_sk_prot(sk, ctx);
out:
return rc;
}
@@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.init = tls_init,
};
+static void build_protos(struct proto *prot, struct proto *base)
+{
+ prot[TLS_BASE_TX] = *base;
+ prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
+ prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
+ prot[TLS_BASE_TX].close = tls_sk_proto_close;
+
+ prot[TLS_SW_TX] = prot[TLS_BASE_TX];
+ prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
+ prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
+}
+
static int __init tls_register(void)
{
- tls_base_prot = tcp_prot;
- tls_base_prot.setsockopt = tls_setsockopt;
- tls_base_prot.getsockopt = tls_getsockopt;
-
- tls_sw_prot = tls_base_prot;
- tls_sw_prot.sendmsg = tls_sw_sendmsg;
- tls_sw_prot.sendpage = tls_sw_sendpage;
- tls_sw_prot.close = tls_sk_proto_close;
+ build_protos(tls_prots, &tcp_prot);
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7d80040a37b6..73d19210dd49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -39,22 +39,6 @@
#include <net/tls.h>
-static inline void tls_make_aad(int recv,
- char *buf,
- size_t size,
- char *record_sequence,
- int record_sequence_size,
- unsigned char record_type)
-{
- memcpy(buf, record_sequence, record_sequence_size);
-
- buf[8] = record_type;
- buf[9] = TLS_1_2_VERSION_MAJOR;
- buf[10] = TLS_1_2_VERSION_MINOR;
- buf[11] = size >> 8;
- buf[12] = size & 0xFF;
-}
-
static void trim_sg(struct sock *sk, struct scatterlist *sg,
int *sg_num_elem, unsigned int *sg_size, int target_size)
{
@@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
struct aead_request *aead_req;
int rc;
- aead_req = kmalloc(req_size, flags);
+ aead_req = kzalloc(req_size, flags);
if (!aead_req)
return -ENOMEM;
@@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags,
sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
- tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size,
+ tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
tls_ctx->rec_seq, tls_ctx->rec_seq_size,
record_type);
@@ -639,7 +623,7 @@ sendpage_end:
return ret;
}
-static void tls_sw_free_resources(struct sock *sk)
+void tls_sw_free_tx_resources(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
@@ -650,6 +634,7 @@ static void tls_sw_free_resources(struct sock *sk)
tls_free_both_sg(sk);
kfree(ctx);
+ kfree(tls_ctx);
}
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
@@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
}
ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
- ctx->free_resources = tls_sw_free_resources;
crypto_info = &ctx->crypto_send;
switch (crypto_info->cipher_type) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fce2cbe6a193..a0e1951227fa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6291,7 +6291,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nl80211_put_regdom(regdom, msg))
goto nla_put_failure;
@@ -7722,7 +7722,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
goto nla_put_failure;
@@ -14274,7 +14274,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
- u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
+ u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
if (!nlportid)
return false;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 82d20ee34581..347ab31574d5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -266,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto lock;
}
- daddr = (xfrm_address_t *)(skb_network_header(skb) +
- XFRM_SPI_SKB_CB(skb)->daddroff);
family = XFRM_SPI_SKB_CB(skb)->family;
/* if tunnel is present override skb->mark value with tunnel i_key */
@@ -294,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ daddr = (xfrm_address_t *)(skb_network_header(skb) +
+ XFRM_SPI_SKB_CB(skb)->daddroff);
do {
if (skb->sp->len == XFRM_MAX_DEPTH) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02b1743b239..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1305,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1786,19 +1787,23 @@ void xfrm_policy_cache_flush(void)
put_online_cpus();
}
-static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
+ struct xfrm_state * const xfrm[],
+ int num)
{
- unsigned int num_pols = xdst->num_pols;
- unsigned int pol_dead = 0, i;
+ const struct dst_entry *dst = &xdst->u.dst;
+ int i;
- for (i = 0; i < num_pols; i++)
- pol_dead |= xdst->pols[i]->walk.dead;
+ if (xdst->num_xfrms != num)
+ return false;
- /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- if (pol_dead)
- xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+ for (i = 0; i < num; i++) {
+ if (!dst || dst->xfrm != xfrm[i])
+ return false;
+ dst = dst->child;
+ }
- return pol_dead;
+ return xfrm_bundle_ok(xdst);
}
static struct xfrm_dst *
@@ -1812,26 +1817,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
struct dst_entry *dst;
int err;
+ /* Try to instantiate a bundle */
+ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
+ if (err <= 0) {
+ if (err != 0 && err != -EAGAIN)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+ return ERR_PTR(err);
+ }
+
xdst = this_cpu_read(xfrm_last_dst);
if (xdst &&
xdst->u.dst.dev == dst_orig->dev &&
xdst->num_pols == num_pols &&
- !xfrm_pol_dead(xdst) &&
memcmp(xdst->pols, pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
- xfrm_bundle_ok(xdst)) {
+ xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
+ while (err > 0)
+ xfrm_state_put(xfrm[--err]);
return xdst;
}
old = xdst;
- /* Try to instantiate a bundle */
- err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
- if (err <= 0) {
- if (err != 0 && err != -EAGAIN)
- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
- return ERR_PTR(err);
- }
dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
if (IS_ERR(dst)) {
OpenPOWER on IntegriCloud