From 1b5c5493e3e68181be344cb51bf9df192d05ffc2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:50 -0700 Subject: net_sched: add the ability to defer skb freeing qdisc are changed under RTNL protection and often while blocking BH and root qdisc spinlock. When lots of skbs need to be dropped, we free them under these locks causing TX/RX freezes, and more generally latency spikes. This commit adds rtnl_kfree_skbs(), used to queue skbs for deferred freeing. Actual freeing happens right after RTNL is released, with appropriate scheduling points. rtnl_qdisc_drop() can also be used in place of disc_drop() when RTNL is held. qdisc_reset_queue() and __qdisc_reset_queue() get the new behavior, so standard qdiscs like pfifo, pfifo_fast... have their ->reset() method automatically handled. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d69c4644f8f2..eb49ca24274a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -71,9 +71,31 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +static struct sk_buff *defer_kfree_skb_list; +void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) +{ + if (head && tail) { + tail->next = defer_kfree_skb_list; + defer_kfree_skb_list = head; + } +} +EXPORT_SYMBOL(rtnl_kfree_skbs); + void __rtnl_unlock(void) { + struct sk_buff *head = defer_kfree_skb_list; + + defer_kfree_skb_list = NULL; + mutex_unlock(&rtnl_mutex); + + while (head) { + struct sk_buff *next = head->next; + + kfree_skb(head); + cond_resched(); + head = next; + } } void rtnl_unlock(void) -- cgit v1.2.1 From 80e73cc563c4359be809a03bcb8e7e28141a813a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Jun 2016 16:57:05 +0200 Subject: net: rtnetlink: add support for the IFLA_STATS_LINK_XSTATS_SLAVE attribute This patch adds support for the IFLA_STATS_LINK_XSTATS_SLAVE attribute which allows to export per-slave statistics if the master device supports the linkxstats callback. The attribute is passed down to the linkxstats callback and it is up to the callback user to use it (an example has been added to the only current user - the bridge). This allows us to query only specific slaves of master devices like bridge ports and export only what we're interested in instead of having to dump all ports and searching only for a single one. This will be used to export per-port IGMP/MLD stats and also per-port vlan stats in the future, possibly other statistics as well. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eb49ca24274a..cfed7bc14ee6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3519,7 +3519,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - err = ops->fill_linkxstats(skb, dev, prividx); + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); + nla_nest_end(skb, attr); + if (err) + goto nla_put_failure; + *idxattr = 0; + } + } + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, + *idxattr)) { + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + master = netdev_master_upper_dev_get(dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->fill_linkxstats) { + int err; + + *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; + attr = nla_nest_start(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); + if (!attr) + goto nla_put_failure; + + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); if (err) goto nla_put_failure; @@ -3555,14 +3580,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + int attr = IFLA_STATS_LINK_XSTATS; if (ops && ops->get_linkxstats_size) { - size += nla_total_size(ops->get_linkxstats_size(dev)); + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); /* for IFLA_STATS_LINK_XSTATS */ size += nla_total_size(0); } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) { + struct net_device *_dev = (struct net_device *)dev; + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + /* netdev_master_upper_dev_get can't take const */ + master = netdev_master_upper_dev_get(_dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->get_linkxstats_size) { + int attr = IFLA_STATS_LINK_XSTATS_SLAVE; + + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); + /* for IFLA_STATS_LINK_XSTATS_SLAVE */ + size += nla_total_size(0); + } + } + return size; } -- cgit v1.2.1 From 08294a26e15d7baf1e14ee569e9f2bc82a7ae768 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 30 Jun 2016 14:45:35 +0800 Subject: net: introduce NETDEV_CHANGE_TX_QUEUE_LEN This patch introduces a new event - NETDEV_CHANGE_TX_QUEUE_LEN, this will be triggered when tx_queue_len. It could be used by net device who want to do some processing at that time. An example is tun who may want to resize tx array when tx_queue_len is changed. Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cfed7bc14ee6..a9e3805af739 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - - if (dev->tx_queue_len ^ value) + unsigned long orig_len = dev->tx_queue_len; + + if (dev->tx_queue_len ^ value) { + dev->tx_queue_len = value; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + goto errout; + } status |= DO_SETLINK_NOTIFY; - - dev->tx_queue_len = value; + } } if (tb[IFLA_OPERSTATE]) -- cgit v1.2.1 From d1fdd9138682e0f272beee0cb08b6328c5478b26 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:49 -0700 Subject: rtnl: add option for setting link xdp prog Sets the bpf program represented by fd as an early filter in the rx path of the netdev. The fd must have been created as BPF_PROG_TYPE_XDP. Providing a negative value as fd clears the program. Getting the fd back via rtnl is not possible, therefore reading of this value merely provides a bool whether the program is valid on the link or not. Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9e3805af739..eba2b8260dbd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -891,6 +891,16 @@ static size_t rtnl_port_size(const struct net_device *dev, return port_self_size; } +static size_t rtnl_xdp_size(const struct net_device *dev) +{ + size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */ + + if (!dev->netdev_ops->ndo_xdp) + return 0; + else + return xdp_size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -927,6 +937,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + + rtnl_xdp_size(dev) /* IFLA_XDP */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1211,6 +1222,33 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_xdp xdp_op = {}; + struct nlattr *xdp; + int err; + + if (!dev->netdev_ops->ndo_xdp) + return 0; + xdp = nla_nest_start(skb, IFLA_XDP); + if (!xdp) + return -EMSGSIZE; + xdp_op.command = XDP_QUERY_PROG; + err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); + if (err) + goto err_cancel; + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); + if (err) + goto err_cancel; + + nla_nest_end(skb, xdp); + return 0; + +err_cancel: + nla_nest_cancel(skb, xdp); + return err; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask) @@ -1307,6 +1345,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; + if (rtnl_xdp_fill(skb, dev)) + goto nla_put_failure; + if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { if (rtnl_link_fill(skb, dev) < 0) goto nla_put_failure; @@ -1392,6 +1433,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, + [IFLA_XDP] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1429,6 +1471,11 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, }; +static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { + [IFLA_XDP_FD] = { .type = NLA_S32 }, + [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, +}; + static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) { const struct rtnl_link_ops *ops = NULL; @@ -2054,6 +2101,23 @@ static int do_setlink(const struct sk_buff *skb, status |= DO_SETLINK_NOTIFY; } + if (tb[IFLA_XDP]) { + struct nlattr *xdp[IFLA_XDP_MAX + 1]; + + err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], + ifla_xdp_policy); + if (err < 0) + goto errout; + + if (xdp[IFLA_XDP_FD]) { + err = dev_change_xdp_fd(dev, + nla_get_s32(xdp[IFLA_XDP_FD])); + if (err) + goto errout; + status |= DO_SETLINK_NOTIFY; + } + } + errout: if (status & DO_SETLINK_MODIFIED) { if (status & DO_SETLINK_NOTIFY) -- cgit v1.2.1 From 262d8625045e0c81b7859ecd192e9811710f19da Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Wed, 20 Jul 2016 17:22:34 -0700 Subject: rtnl: protect do_setlink from IFLA_XDP_ATTACHED The IFLA_XDP_ATTACHED nested attribute is meant for read-only, and while do_setlink properly ignores it, it should be more paranoid and reject commands that try to set it. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eba2b8260dbd..189cc78c77eb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2109,6 +2109,10 @@ static int do_setlink(const struct sk_buff *skb, if (err < 0) goto errout; + if (xdp[IFLA_XDP_ATTACHED]) { + err = -EINVAL; + goto errout; + } if (xdp[IFLA_XDP_FD]) { err = dev_change_xdp_fd(dev, nla_get_s32(xdp[IFLA_XDP_FD])); -- cgit v1.2.1