summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/dev.c42
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/devlink.c226
-rw-r--r--net/core/dst_cache.c6
-rw-r--r--net/core/ethtool.c16
-rw-r--r--net/core/fib_rules.c11
-rw-r--r--net/core/filter.c35
-rw-r--r--net/core/flow_dissector.c26
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/gen_estimator.c6
-rw-r--r--net/core/gen_stats.c6
-rw-r--r--net/core/hwbm.c21
-rw-r--r--net/core/link_watch.c7
-rw-r--r--net/core/lwt_bpf.c10
-rw-r--r--net/core/lwtunnel.c7
-rw-r--r--net/core/neighbour.c13
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/net_namespace.c28
-rw-r--r--net/core/netclassid_cgroup.c6
-rw-r--r--net/core/netevent.c6
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c6
-rw-r--r--net/core/page_pool.c95
-rw-r--r--net/core/pktgen.c28
-rw-r--r--net/core/ptp_classifier.c10
-rw-r--r--net/core/request_sock.c6
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c100
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/sysctl_net_core.c7
-rw-r--r--net/core/utils.c6
-rw-r--r--net/core/xdp.c122
35 files changed, 657 insertions, 263 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index f40e3d35fd9c..94c7f77ecb6b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -634,7 +634,8 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
- smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
+ /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+ smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
nbuckets = 1U << smap->bucket_log;
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
diff --git a/net/core/dev.c b/net/core/dev.c
index 66f7508825bd..d6edd218babd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET3 Protocol independent device support routines.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Derived from the non IP parts of dev.c 1.0.19
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -4927,8 +4923,36 @@ skip_classify:
}
if (unlikely(skb_vlan_tag_present(skb))) {
- if (skb_vlan_tag_get_id(skb))
+check_vlan_id:
+ if (skb_vlan_tag_get_id(skb)) {
+ /* Vlan id is non 0 and vlan_do_receive() above couldn't
+ * find vlan device.
+ */
skb->pkt_type = PACKET_OTHERHOST;
+ } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+ skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ /* Outer header is 802.1P with vlan 0, inner header is
+ * 802.1Q or 802.1AD and vlan_do_receive() above could
+ * not find vlan dev for vlan id 0.
+ */
+ __vlan_hwaccel_clear_tag(skb);
+ skb = skb_vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ if (vlan_do_receive(&skb))
+ /* After stripping off 802.1P header with vlan 0
+ * vlan dev is found for inner header.
+ */
+ goto another_round;
+ else if (unlikely(!skb))
+ goto out;
+ else
+ /* We have stripped outer 802.1P vlan 0 header.
+ * But could not find vlan dev.
+ * check again for vlan id to set OTHERHOST.
+ */
+ goto check_vlan_id;
+ }
/* Note: we might in the future use prio bits
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
@@ -5025,12 +5049,12 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
if (list_empty(head))
return;
if (pt_prev->list_func != NULL)
- pt_prev->list_func(head, pt_prev, orig_dev);
+ INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
+ ip_list_rcv, head, pt_prev, orig_dev);
else
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
- INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
- skb->dev, pt_prev, orig_dev);
+ pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index a6723b306717..6393ba930097 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/dev_addr_lists.c - Functions for handling net device lists
* Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
*
* This file contains functions for working with unicast, multicast and device
* addresses lists.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/netdevice.h>
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 9716a7f382cb..4baf716e535e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/devlink.c - Network physical/parent device Netlink interface
*
* Heavily inspired by net/wireless/
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -2673,6 +2669,108 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
return devlink->ops->reload(devlink, info->extack);
}
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ enum devlink_command cmd,
+ const char *status_msg,
+ const char *component,
+ unsigned long done, unsigned long total)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+ goto out;
+
+ if (status_msg &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+ status_msg))
+ goto nla_put_failure;
+ if (component &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+ component))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ done, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+ enum devlink_command cmd,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, status_msg,
+ component, done, total);
+ if (err)
+ goto out_free_msg;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+}
+
+void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE,
+ NULL, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_begin_notify);
+
+void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_END,
+ NULL, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_end_notify);
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ status_msg, component, done, total);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
struct genl_info *info)
{
@@ -4420,6 +4518,35 @@ nla_put_failure:
return err;
}
+static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum devlink_command cmd)
+{
+ int index = cb->args[0];
+ int tmp_index = index;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if ((err && err != -EMSGSIZE) || tmp_index == index)
+ goto nla_put_failure;
+
+ cb->args[0] = index;
+ genlmsg_end(skb, hdr);
+ return skb->len;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return err;
+}
+
struct devlink_health_reporter {
struct list_head list;
void *priv;
@@ -4652,17 +4779,16 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
EXPORT_SYMBOL_GPL(devlink_health_report);
static struct devlink_health_reporter *
-devlink_health_reporter_get_from_info(struct devlink *devlink,
- struct genl_info *info)
+devlink_health_reporter_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
{
struct devlink_health_reporter *reporter;
char *reporter_name;
- if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+ if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
return NULL;
- reporter_name =
- nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+ reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
mutex_lock(&devlink->reporters_lock);
reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
if (reporter)
@@ -4671,6 +4797,48 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
return reporter;
}
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink;
+ struct nlattr **attrs;
+ int err;
+
+ attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return NULL;
+
+ err = nlmsg_parse_deprecated(cb->nlh,
+ GENL_HDRLEN + devlink_nl_family.hdrsize,
+ attrs, DEVLINK_ATTR_MAX,
+ devlink_nl_family.policy, cb->extack);
+ if (err)
+ goto free;
+
+ mutex_lock(&devlink_mutex);
+ devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+ if (IS_ERR(devlink))
+ goto unlock;
+
+ reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+ mutex_unlock(&devlink_mutex);
+ kfree(attrs);
+ return reporter;
+unlock:
+ mutex_unlock(&devlink_mutex);
+free:
+ kfree(attrs);
+ return NULL;
+}
+
static void
devlink_health_reporter_put(struct devlink_health_reporter *reporter)
{
@@ -4906,32 +5074,40 @@ out:
return err;
}
-static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+static int
+devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
- struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
+ u64 start = cb->args[0];
int err;
- reporter = devlink_health_reporter_get_from_info(devlink, info);
+ reporter = devlink_health_reporter_get_from_cb(cb);
if (!reporter)
return -EINVAL;
if (!reporter->ops->dump) {
- devlink_health_reporter_put(reporter);
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out;
}
-
mutex_lock(&reporter->dump_lock);
- err = devlink_health_do_dump(reporter, NULL);
- if (err)
- goto out;
-
- err = devlink_fmsg_snd(reporter->dump_fmsg, info,
- DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0);
+ if (!start) {
+ err = devlink_health_do_dump(reporter, NULL);
+ if (err)
+ goto unlock;
+ cb->args[1] = reporter->dump_ts;
+ }
+ if (!reporter->dump_fmsg || cb->args[1] != reporter->dump_ts) {
+ NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry");
+ err = -EAGAIN;
+ goto unlock;
+ }
-out:
+ err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
+unlock:
mutex_unlock(&reporter->dump_lock);
+out:
devlink_health_reporter_put(reporter);
return err;
}
@@ -5268,7 +5444,7 @@ static const struct genl_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_dump_get_doit,
+ .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
DEVLINK_NL_FLAG_NO_LOCK,
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 64cef977484a..be74ab4551c2 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/dst_cache.c - dst entry cache
*
* Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 43e9add58340..4d1011b2e24f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/ethtool.c - Ethtool ioctl handler
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
*
* This file is where we call all the ethtool_ops commands to get
* the information ethtool needs.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/module.h>
@@ -1359,13 +1355,16 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (!regbuf)
return -ENOMEM;
+ if (regs.len < reglen)
+ reglen = regs.len;
+
ops->get_regs(dev, &regs, regbuf);
ret = -EFAULT;
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
- if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
+ if (copy_to_user(useraddr, regbuf, reglen))
goto out;
ret = 0;
@@ -3021,6 +3020,11 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->mask.vlan.vlan_id =
ntohs(ext_m_spec->vlan_tci) & 0x0fff;
+ match->key.vlan.vlan_dei =
+ !!(ext_h_spec->vlan_tci & htons(0x1000));
+ match->mask.vlan.vlan_dei =
+ !!(ext_m_spec->vlan_tci & htons(0x1000));
+
match->key.vlan.vlan_priority =
(ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
match->mask.vlan.vlan_priority =
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 43f0115cce9c..dd220ce7ca7a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* net/core/fib_rules.c Generic Routing Rules
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2.
- *
* Authors: Thomas Graf <tgraf@suug.ch>
*/
@@ -757,9 +754,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
goto errout;
- if (rule_exists(ops, frh, tb, rule)) {
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- err = -EEXIST;
+ if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
+ rule_exists(ops, frh, tb, rule)) {
+ err = -EEXIST;
goto errout_free;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8c18f2781afa..2014d76e0d2a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux Socket Filter - Kernel level socket filtering
*
@@ -12,11 +13,6 @@
* Alexei Starovoitov <ast@plumgrid.com>
* Daniel Borkmann <dborkman@redhat.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Andi Kleen - Fix a few bad bugs and races.
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
@@ -66,6 +62,7 @@
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
@@ -4674,7 +4671,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (res.type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
- if (res.fi->fib_nhs > 1)
+ if (fib_info_num_path(res.fi) > 1)
fib_select_path(net, &res, &fl4, NULL);
if (check_mtu) {
@@ -5304,7 +5301,13 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *net;
int sdif;
- family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
+ if (len == sizeof(tuple->ipv4))
+ family = AF_INET;
+ else if (len == sizeof(tuple->ipv6))
+ family = AF_INET6;
+ else
+ return NULL;
+
if (unlikely(family == AF_UNSPEC || flags ||
!((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
@@ -5337,8 +5340,14 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
ifindex, proto, netns_id, flags);
- if (sk)
+ if (sk) {
sk = sk_to_full_sk(sk);
+ if (!sk_fullsock(sk)) {
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return NULL;
+ }
+ }
return sk;
}
@@ -5369,8 +5378,14 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
flags);
- if (sk)
+ if (sk) {
sk = sk_to_full_sk(sk);
+ if (!sk_fullsock(sk)) {
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return NULL;
+ }
+ }
return sk;
}
@@ -6786,6 +6801,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
break;
default:
return false;
@@ -6796,6 +6812,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
break;
default:
return false;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index edd622956083..01ad60b5aa75 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -199,6 +199,22 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+void skb_flow_dissect_meta(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_meta *meta;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META))
+ return;
+
+ meta = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_META,
+ target_container);
+ meta->ingress_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(skb_flow_dissect_meta);
+
static void
skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
struct flow_dissector *flow_dissector,
@@ -757,7 +773,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
* @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
* @hlen: packet header length, if @data is NULL use skb_headlen(skb)
* @flags: flags that control the dissection process, e.g.
- * FLOW_DISSECTOR_F_STOP_AT_L3.
+ * FLOW_DISSECTOR_F_STOP_AT_ENCAP.
*
* The function will try to retrieve individual keys into target specified
* by flow_dissector from either the skbuff or a raw buffer specified by the
@@ -922,11 +938,6 @@ proto_again:
__skb_flow_dissect_ipv4(skb, flow_dissector,
target_container, data, iph);
- if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
- fdret = FLOW_DISSECT_RET_OUT_GOOD;
- break;
- }
-
break;
}
case htons(ETH_P_IPV6): {
@@ -975,9 +986,6 @@ proto_again:
__skb_flow_dissect_ipv6(skb, flow_dissector,
target_container, data, iph);
- if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
- fdret = FLOW_DISSECT_RET_OUT_GOOD;
-
break;
}
case htons(ETH_P_8021AD):
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 3d93e51b83e0..f52fe0bc4017 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -25,6 +25,13 @@ EXPORT_SYMBOL(flow_rule_alloc);
(__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key); \
(__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask); \
+void flow_rule_match_meta(const struct flow_rule *rule,
+ struct flow_match_meta *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_META, out);
+}
+EXPORT_SYMBOL(flow_rule_match_meta);
+
void flow_rule_match_basic(const struct flow_rule *rule,
struct flow_match_basic *out)
{
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index e4e442d70c2d..bfe7bdd4c340 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/gen_estimator.c Simple rate estimator.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Eric Dumazet <edumazet@google.com>
*
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9bf1b9ad1780..36888f5e09eb 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/gen_stats.c
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Thomas Graf <tgraf@suug.ch>
* Jamal Hadi Salim
* Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
index 2cab489ae62e..ac1a66df9adc 100644
--- a/net/core/hwbm.c
+++ b/net/core/hwbm.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Support for hardware buffer manager.
*
* Copyright (C) 2016 Marvell
*
* Gregory CLEMENT <gregory.clement@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/printk.h>
@@ -47,34 +43,33 @@ int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(hwbm_pool_refill);
-int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num)
{
int err, i;
- unsigned long flags;
- spin_lock_irqsave(&bm_pool->lock, flags);
+ mutex_lock(&bm_pool->buf_lock);
if (bm_pool->buf_num == bm_pool->size) {
pr_warn("pool already filled\n");
- spin_unlock_irqrestore(&bm_pool->lock, flags);
+ mutex_unlock(&bm_pool->buf_lock);
return bm_pool->buf_num;
}
if (buf_num + bm_pool->buf_num > bm_pool->size) {
pr_warn("cannot allocate %d buffers for pool\n",
buf_num);
- spin_unlock_irqrestore(&bm_pool->lock, flags);
+ mutex_unlock(&bm_pool->buf_lock);
return 0;
}
if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
buf_num, bm_pool->buf_num);
- spin_unlock_irqrestore(&bm_pool->lock, flags);
+ mutex_unlock(&bm_pool->buf_lock);
return 0;
}
for (i = 0; i < buf_num; i++) {
- err = hwbm_pool_refill(bm_pool, gfp);
+ err = hwbm_pool_refill(bm_pool, GFP_KERNEL);
if (err < 0)
break;
}
@@ -83,7 +78,7 @@ int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
bm_pool->buf_num += i;
pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
- spin_unlock_irqrestore(&bm_pool->lock, flags);
+ mutex_unlock(&bm_pool->buf_lock);
return i;
}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 7f51efb2b3ab..04fdc9535772 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux network device link state notification
*
* Author:
* Stefan Rompf <sux@loplof.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/module.h>
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1c94f529f4a1..f93785e5833c 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/kernel.h>
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 69e249fbc02f..2f9c0de533c7 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* lwtunnel Infrastructure for light weight tunnels like mpls
*
* Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/capability.h>
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a5556e4d3f96..742cea4ce72e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic address resolution entity
*
@@ -5,11 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
* Harald Welte Add neighbour cache statistics like rtstat
@@ -3209,6 +3205,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+ __acquires(tbl->lock)
__acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
@@ -3219,6 +3216,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
rcu_read_lock_bh();
state->nht = rcu_dereference_bh(tbl->nht);
+ read_lock(&tbl->lock);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -3252,8 +3250,13 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
+ __releases(tbl->lock)
__releases(rcu_bh)
{
+ struct neigh_seq_state *state = seq->private;
+ struct neigh_table *tbl = state->tbl;
+
+ read_unlock(&tbl->lock);
rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d9c4360257ce..865ba6ca16eb 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net-sysfs.c - network device class and attributes
*
* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/capability.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 470b179d599e..283ddb2dbc7d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -43,6 +43,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
#endif
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+#include <trace/events/page_pool.h>
+#endif
+
#include <trace/events/neigh.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 15f68842ac6b..89dc99a28978 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -145,6 +145,17 @@ static void ops_free(const struct pernet_operations *ops, struct net *net)
}
}
+static void ops_pre_exit_list(const struct pernet_operations *ops,
+ struct list_head *net_exit_list)
+{
+ struct net *net;
+
+ if (ops->pre_exit) {
+ list_for_each_entry(net, net_exit_list, exit_list)
+ ops->pre_exit(net);
+ }
+}
+
static void ops_exit_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
@@ -330,6 +341,12 @@ out_undo:
list_add(&net->exit_list, &net_exit_list);
saved_ops = ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+ ops_pre_exit_list(ops, &net_exit_list);
+
+ synchronize_rcu();
+
+ saved_ops = ops;
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
ops = saved_ops;
@@ -541,10 +558,15 @@ static void cleanup_net(struct work_struct *work)
list_add_tail(&net->exit_list, &net_exit_list);
}
+ /* Run all of the network namespace pre_exit methods */
+ list_for_each_entry_reverse(ops, &pernet_list, list)
+ ops_pre_exit_list(ops, &net_exit_list);
+
/*
* Another CPU might be rcu-iterating the list, wait for it.
* This needs to be before calling the exit() notifiers, so
* the rcu_barrier() below isn't sufficient alone.
+ * Also the pre_exit() and exit() methods need this barrier.
*/
synchronize_rcu();
@@ -1101,6 +1123,8 @@ static int __register_pernet_operations(struct list_head *list,
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
+ ops_pre_exit_list(ops, &net_exit_list);
+ synchronize_rcu();
ops_exit_list(ops, &net_exit_list);
ops_free_list(ops, &net_exit_list);
return error;
@@ -1115,6 +1139,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
/* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
+ ops_pre_exit_list(ops, &net_exit_list);
+ synchronize_rcu();
ops_exit_list(ops, &net_exit_list);
ops_free_list(ops, &net_exit_list);
}
@@ -1139,6 +1165,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
} else {
LIST_HEAD(net_exit_list);
list_add(&init_net.exit_list, &net_exit_list);
+ ops_pre_exit_list(ops, &net_exit_list);
+ synchronize_rcu();
ops_exit_list(ops, &net_exit_list);
ops_free_list(ops, &net_exit_list);
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 7bf833598615..0642f91c4038 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/netclassid_cgroup.c Classid Cgroupfs Handling
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Thomas Graf <tgraf@suug.ch>
*/
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 8b3bc4fac613..d76ed7739c70 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Network event notifiers
*
@@ -5,11 +6,6 @@
* Tom Tucker <tom@opengridcomputing.com>
* Steve Wise <swise@opengridcomputing.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
*/
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dd8b1a460d64..2cf27da1baeb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -696,16 +696,22 @@ int netpoll_setup(struct netpoll *np)
if (!np->local_ip.ip) {
if (!np->ipv6) {
+ const struct in_ifaddr *ifa;
+
in_dev = __in_dev_get_rtnl(ndev);
+ if (!in_dev)
+ goto put_noaddr;
- if (!in_dev || !in_dev->ifa_list) {
+ ifa = rtnl_dereference(in_dev->ifa_list);
+ if (!ifa) {
+put_noaddr:
np_err(np, "no IP address for %s, aborting\n",
np->dev_name);
err = -EDESTADDRREQ;
goto put;
}
- np->local_ip.ip = in_dev->ifa_list->ifa_local;
+ np->local_ip.ip = ifa->ifa_local;
np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 7e3d0d99dfae..256b7954b720 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/core/netprio_cgroup.c Priority Control Group
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Neil Horman <nhorman@tuxdriver.com>
*/
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5b2252c6d49b..b366f59885c1 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -4,9 +4,11 @@
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
* Copyright (C) 2016 Red Hat, Inc.
*/
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/device.h>
#include <net/page_pool.h>
#include <linux/dma-direction.h>
@@ -14,6 +16,8 @@
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
+#include <trace/events/page_pool.h>
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -43,6 +47,11 @@ static int page_pool_init(struct page_pool *pool,
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
+ atomic_set(&pool->pages_state_release_cnt, 0);
+
+ if (pool->p.flags & PP_FLAG_DMA_MAP)
+ get_device(pool->p.dev);
+
return 0;
}
@@ -151,6 +160,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
page->dma_addr = dma;
skip_dma_map:
+ /* Track how many pages are held 'in-flight' */
+ pool->pages_state_hold_cnt++;
+
+ trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
+
/* When page just alloc'ed is should/must have refcnt 1. */
return page;
}
@@ -173,6 +187,33 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
}
EXPORT_SYMBOL(page_pool_alloc_pages);
+/* Calculate distance between two u32 values, valid if distance is below 2^(31)
+ * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
+ */
+#define _distance(a, b) (s32)((a) - (b))
+
+static s32 page_pool_inflight(struct page_pool *pool)
+{
+ u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+ u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+ s32 distance;
+
+ distance = _distance(hold_cnt, release_cnt);
+
+ trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt);
+ return distance;
+}
+
+static bool __page_pool_safe_to_destroy(struct page_pool *pool)
+{
+ s32 inflight = page_pool_inflight(pool);
+
+ /* The distance should not be able to become negative */
+ WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+
+ return (inflight == 0);
+}
+
/* Cleanup page_pool state from page */
static void __page_pool_clean_page(struct page_pool *pool,
struct page *page)
@@ -180,7 +221,7 @@ static void __page_pool_clean_page(struct page_pool *pool,
dma_addr_t dma;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
- return;
+ goto skip_dma_unmap;
dma = page->dma_addr;
/* DMA unmap */
@@ -188,12 +229,27 @@ static void __page_pool_clean_page(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
page->dma_addr = 0;
+skip_dma_unmap:
+ atomic_inc(&pool->pages_state_release_cnt);
+ trace_page_pool_state_release(pool, page,
+ atomic_read(&pool->pages_state_release_cnt));
}
+/* unmap the page and clean our state */
+void page_pool_unmap_page(struct page_pool *pool, struct page *page)
+{
+ /* When page is unmapped, this implies page will not be
+ * returned to page_pool.
+ */
+ __page_pool_clean_page(pool, page);
+}
+EXPORT_SYMBOL(page_pool_unmap_page);
+
/* Return a page to the page allocator, cleaning up our state */
static void __page_pool_return_page(struct page_pool *pool, struct page *page)
{
__page_pool_clean_page(pool, page);
+
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
@@ -285,21 +341,41 @@ static void __page_pool_empty_ring(struct page_pool *pool)
}
}
-static void __page_pool_destroy_rcu(struct rcu_head *rcu)
+static void __warn_in_flight(struct page_pool *pool)
{
- struct page_pool *pool;
+ u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+ u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+ s32 distance;
- pool = container_of(rcu, struct page_pool, rcu);
+ distance = _distance(hold_cnt, release_cnt);
+ /* Drivers should fix this, but only problematic when DMA is used */
+ WARN(1, "Still in-flight pages:%d hold:%u released:%u",
+ distance, hold_cnt, release_cnt);
+}
+
+void __page_pool_free(struct page_pool *pool)
+{
WARN(pool->alloc.count, "API usage violation");
+ WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
+
+ /* Can happen due to forced shutdown */
+ if (!__page_pool_safe_to_destroy(pool))
+ __warn_in_flight(pool);
- __page_pool_empty_ring(pool);
ptr_ring_cleanup(&pool->ring, NULL);
+
+ if (pool->p.flags & PP_FLAG_DMA_MAP)
+ put_device(pool->p.dev);
+
kfree(pool);
}
+EXPORT_SYMBOL(__page_pool_free);
-/* Cleanup and release resources */
-void page_pool_destroy(struct page_pool *pool)
+/* Request to shutdown: release pages cached by page_pool, and check
+ * for in-flight pages
+ */
+bool __page_pool_request_shutdown(struct page_pool *pool)
{
struct page *page;
@@ -317,7 +393,6 @@ void page_pool_destroy(struct page_pool *pool)
*/
__page_pool_empty_ring(pool);
- /* An xdp_mem_allocator can still ref page_pool pointer */
- call_rcu(&pool->rcu, __page_pool_destroy_rcu);
+ return __page_pool_safe_to_destroy(pool);
}
-EXPORT_SYMBOL(page_pool_destroy);
+EXPORT_SYMBOL(__page_pool_request_shutdown);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 319ad5490fb3..bb9915291644 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Authors:
* Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
@@ -8,12 +9,6 @@
* Ben Greear <greearb@candelatech.com>
* Jens Låås <jens.laas@data.slu.se>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
* A tool for loading the network with preconfigurated packets.
* The tool is implemented as a linux module. Parameters are output
* device, delay (to hard_xmit), number of packets, and whether
@@ -60,7 +55,6 @@
*
* Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
*
- *
* 021124 Finished major redesign and rewrite for new functionality.
* See Documentation/networking/pktgen.txt for how to use this.
*
@@ -114,7 +108,6 @@
*
* Fixed src_mac command to set source mac of packet to value specified in
* command by Adit Ranadive <adit.262@gmail.com>
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -2125,9 +2118,11 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
rcu_read_lock();
in_dev = __in_dev_get_rcu(pkt_dev->odev);
if (in_dev) {
- if (in_dev->ifa_list) {
- pkt_dev->saddr_min =
- in_dev->ifa_list->ifa_address;
+ const struct in_ifaddr *ifa;
+
+ ifa = rcu_dereference(in_dev->ifa_list);
+ if (ifa) {
+ pkt_dev->saddr_min = ifa->ifa_address;
pkt_dev->saddr_max = pkt_dev->saddr_min;
}
}
@@ -3066,7 +3061,13 @@ static int pktgen_wait_thread_run(struct pktgen_thread *t)
{
while (thread_is_running(t)) {
+ /* note: 't' will still be around even after the unlock/lock
+ * cycle because pktgen_thread threads are only cleared at
+ * net exit
+ */
+ mutex_unlock(&pktgen_thread_lock);
msleep_interruptible(100);
+ mutex_lock(&pktgen_thread_lock);
if (signal_pending(current))
goto signal;
@@ -3081,6 +3082,10 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
struct pktgen_thread *t;
int sig = 1;
+ /* prevent from racing with rmmod */
+ if (!try_module_get(THIS_MODULE))
+ return sig;
+
mutex_lock(&pktgen_thread_lock);
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
@@ -3094,6 +3099,7 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
t->control |= (T_STOP);
mutex_unlock(&pktgen_thread_lock);
+ module_put(THIS_MODULE);
return sig;
}
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 7109c168b5e0..d964a5147f22 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -1,13 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* PTP classifier
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
/* The below program is the bpf_asm (tools/net/) representation of
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b8727c67b58..c9bb00008528 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET Generic infrastructure for Network protocols.
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* From code originally in include/net/tcp.h
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index adcc045952c2..8ac81630ab5c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -7,11 +8,6 @@
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
@@ -912,6 +908,7 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
size += num_vfs *
(nla_total_size(0) +
nla_total_size(sizeof(struct ifla_vf_mac)) +
+ nla_total_size(sizeof(struct ifla_vf_broadcast)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
nla_total_size(MAX_VLAN_LIST_LEN *
@@ -1201,6 +1198,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct ifla_vf_vlan vf_vlan;
struct ifla_vf_rate vf_rate;
struct ifla_vf_mac vf_mac;
+ struct ifla_vf_broadcast vf_broadcast;
struct ifla_vf_info ivi;
memset(&ivi, 0, sizeof(ivi));
@@ -1235,6 +1233,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_trust.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+ memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len);
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
vf_vlan_info.vlan = ivi.vlan;
@@ -1251,6 +1250,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
if (!vf)
goto nla_put_vfinfo_failure;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+ nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
&vf_rate) ||
@@ -1757,6 +1757,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
+ [IFLA_VF_BROADCAST] = { .type = NLA_REJECT },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
[IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED },
[IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) },
diff --git a/net/core/scm.c b/net/core/scm.c
index 52ef219cf6df..31a38239c92f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* scm.c - Socket level control messages processing.
*
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Alignment and value checking mods by Craig Metz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a712a00243a..5323441a12cc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Routines having to do with the 'struct sk_buff' memory handlers.
*
@@ -25,11 +26,6 @@
* disabled, or you better be *real* sure that the operation is atomic
* with respect to whatever list is being frobbed (e.g. via lock_sock()
* or via disabling bottom half handlers, etc).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/*
@@ -370,19 +366,21 @@ struct napi_alloc_cache {
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
- struct page_frag_cache *nc;
- unsigned long flags;
- void *data;
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- local_irq_save(flags);
- nc = this_cpu_ptr(&netdev_alloc_cache);
- data = page_frag_alloc(nc, fragsz, gfp_mask);
- local_irq_restore(flags);
- return data;
+ return page_frag_alloc(&nc->page, fragsz, gfp_mask);
}
+void *napi_alloc_frag(unsigned int fragsz)
+{
+ fragsz = SKB_DATA_ALIGN(fragsz);
+
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(napi_alloc_frag);
+
/**
* netdev_alloc_frag - allocate a page fragment
* @fragsz: fragment size
@@ -392,26 +390,21 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
- fragsz = SKB_DATA_ALIGN(fragsz);
-
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(netdev_alloc_frag);
-
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
-{
- struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
-
- return page_frag_alloc(&nc->page, fragsz, gfp_mask);
-}
+ struct page_frag_cache *nc;
+ void *data;
-void *napi_alloc_frag(unsigned int fragsz)
-{
fragsz = SKB_DATA_ALIGN(fragsz);
-
- return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+ if (in_irq() || irqs_disabled()) {
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
+ } else {
+ local_bh_disable();
+ data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
+ local_bh_enable();
+ }
+ return data;
}
-EXPORT_SYMBOL(napi_alloc_frag);
+EXPORT_SYMBOL(netdev_alloc_frag);
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -430,7 +423,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
gfp_t gfp_mask)
{
struct page_frag_cache *nc;
- unsigned long flags;
struct sk_buff *skb;
bool pfmemalloc;
void *data;
@@ -451,13 +443,17 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- local_irq_save(flags);
-
- nc = this_cpu_ptr(&netdev_alloc_cache);
- data = page_frag_alloc(nc, len, gfp_mask);
- pfmemalloc = nc->pfmemalloc;
-
- local_irq_restore(flags);
+ if (in_irq() || irqs_disabled()) {
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = page_frag_alloc(nc, len, gfp_mask);
+ pfmemalloc = nc->pfmemalloc;
+ } else {
+ local_bh_disable();
+ nc = this_cpu_ptr(&napi_alloc_cache.page);
+ data = page_frag_alloc(nc, len, gfp_mask);
+ pfmemalloc = nc->pfmemalloc;
+ local_bh_enable();
+ }
if (unlikely(!data))
return NULL;
@@ -914,6 +910,31 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
}
/**
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
+ * @first: first sk_buff of the msg
+ */
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
+{
+ struct sk_buff *n;
+
+ n = alloc_skb(0, GFP_ATOMIC);
+ if (!n)
+ return NULL;
+
+ n->len = first->len;
+ n->data_len = first->len;
+ n->truesize = first->truesize;
+
+ skb_shinfo(n)->frag_list = first;
+
+ __copy_skb_header(n, first);
+ n->destructor = NULL;
+
+ return n;
+}
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
+
+/**
* skb_morph - morph one skb into another
* @dst: the skb to receive the contents
* @src: the skb to supply the contents
@@ -2342,6 +2363,7 @@ do_frag_list:
kv.iov_base = skb->data + offset;
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
if (ret <= 0)
diff --git a/net/core/sock.c b/net/core/sock.c
index 06be30737b69..ef471f643c95 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -6,7 +7,6 @@
* Generic socket support routines. Memory allocators, socket lock/release
* handler for protocols to use and generic option handler.
*
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
@@ -81,12 +81,6 @@
* Arnaldo C. Melo : cleanups, use skb_queue_purge
*
* To Fix:
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1860,6 +1854,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+#endif
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
@@ -2330,6 +2327,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
* skb_page_frag_refill - check that a page_frag contains enough room
@@ -2354,7 +2352,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
pfrag->offset = 0;
- if (SKB_FRAG_PAGE_ORDER) {
+ if (SKB_FRAG_PAGE_ORDER &&
+ !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_NOWARN |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1a2685694abd..f9204719aeee 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
.extra1 = &zero,
.extra2 = &two,
},
+ {
+ .procname = "high_order_alloc_disable",
+ .data = &net_high_order_alloc_disable_key.key,
+ .maxlen = sizeof(net_high_order_alloc_disable_key),
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
{ }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 2a597ac7808e..6b6e51db9f3b 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic address resultion entity
*
@@ -7,11 +8,6 @@
* in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
*
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4b2b194f4f1f..b29d7b513a18 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* net/core/xdp.c
*
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
- * Released under terms in GPL version 2. See COPYING.
*/
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -14,6 +14,8 @@
#include <net/page_pool.h>
#include <net/xdp.h>
+#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
+#include <trace/events/xdp.h>
#define REG_STATE_NEW 0x0
#define REG_STATE_REGISTERED 0x1
@@ -29,17 +31,6 @@ static int mem_id_next = MEM_ID_MIN;
static bool mem_id_init; /* false */
static struct rhashtable *mem_id_ht;
-struct xdp_mem_allocator {
- struct xdp_mem_info mem;
- union {
- void *allocator;
- struct page_pool *page_pool;
- struct zero_copy_allocator *zc_alloc;
- };
- struct rhash_head node;
- struct rcu_head rcu;
-};
-
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
{
const u32 *k = data;
@@ -79,13 +70,13 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
+ /* Allocator have indicated safe to remove before this is called */
+ if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+ page_pool_free(xa->page_pool);
+
/* Allow this ID to be reused */
ida_simple_remove(&mem_id_pool, xa->mem.id);
- /* Notice, driver is expected to free the *allocator,
- * e.g. page_pool, and MUST also use RCU free.
- */
-
/* Poison memory */
xa->mem.id = 0xFFFF;
xa->mem.type = 0xF0F0;
@@ -94,6 +85,64 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
kfree(xa);
}
+bool __mem_id_disconnect(int id, bool force)
+{
+ struct xdp_mem_allocator *xa;
+ bool safe_to_remove = true;
+
+ mutex_lock(&mem_id_lock);
+
+ xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+ if (!xa) {
+ mutex_unlock(&mem_id_lock);
+ WARN(1, "Request remove non-existing id(%d), driver bug?", id);
+ return true;
+ }
+ xa->disconnect_cnt++;
+
+ /* Detects in-flight packet-pages for page_pool */
+ if (xa->mem.type == MEM_TYPE_PAGE_POOL)
+ safe_to_remove = page_pool_request_shutdown(xa->page_pool);
+
+ trace_mem_disconnect(xa, safe_to_remove, force);
+
+ if ((safe_to_remove || force) &&
+ !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+ call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+
+ mutex_unlock(&mem_id_lock);
+ return (safe_to_remove|force);
+}
+
+#define DEFER_TIME (msecs_to_jiffies(1000))
+#define DEFER_WARN_INTERVAL (30 * HZ)
+#define DEFER_MAX_RETRIES 120
+
+static void mem_id_disconnect_defer_retry(struct work_struct *wq)
+{
+ struct delayed_work *dwq = to_delayed_work(wq);
+ struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
+ bool force = false;
+
+ if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
+ force = true;
+
+ if (__mem_id_disconnect(xa->mem.id, force))
+ return;
+
+ /* Periodic warning */
+ if (time_after_eq(jiffies, xa->defer_warn)) {
+ int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
+
+ pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
+ __func__, xa->mem.id, xa->disconnect_cnt, sec);
+ xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+ }
+
+ /* Still not ready to be disconnected, retry later */
+ schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
+}
+
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
@@ -112,16 +161,30 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
+ if (__mem_id_disconnect(id, false))
+ return;
+
+ /* Could not disconnect, defer new disconnect attempt to later */
mutex_lock(&mem_id_lock);
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
- if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
- call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+ if (!xa) {
+ mutex_unlock(&mem_id_lock);
+ return;
+ }
+ xa->defer_start = jiffies;
+ xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+ INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
mutex_unlock(&mem_id_lock);
+ schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
+/* This unregister operation will also cleanup and destroy the
+ * allocator. The page_pool_free() operation is first called when it's
+ * safe to remove, possibly deferred to a workqueue.
+ */
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{
/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -301,12 +364,15 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
+ ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
+ xdp_rxq->mem.id = 0;
errno = PTR_ERR(ptr);
goto err;
}
mutex_unlock(&mem_id_lock);
+ trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
err:
mutex_unlock(&mem_id_lock);
@@ -333,10 +399,13 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
- if (xa) {
+ if (likely(xa)) {
napi_direct &= !xdp_return_frame_no_direct();
page_pool_put_page(xa->page_pool, page, napi_direct);
} else {
+ /* Hopefully stack show who to blame for late return */
+ WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
+ trace_mem_return_failed(mem, page);
put_page(page);
}
rcu_read_unlock();
@@ -379,6 +448,21 @@ void xdp_return_buff(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
+/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
+void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
+{
+ struct xdp_mem_allocator *xa;
+ struct page *page;
+
+ rcu_read_lock();
+ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+ page = virt_to_head_page(data);
+ if (xa)
+ page_pool_release_page(xa->page_pool, page);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__xdp_release_frame);
+
int xdp_attachment_query(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
OpenPOWER on IntegriCloud