summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/datagram.c6
-rw-r--r--net/ipv4/devinet.c149
-rw-r--r--net/ipv4/esp4_offload.c5
-rw-r--r--net/ipv4/fib_frontend.c64
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_rules.c14
-rw-r--r--net/ipv4/fib_semantics.c353
-rw-r--r--net/ipv4/fib_trie.c62
-rw-r--r--net/ipv4/gre_demux.c7
-rw-r--r--net/ipv4/gre_offload.c6
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/igmp.c11
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_fragment.c25
-rw-r--r--net/ipv4/inet_hashtables.c8
-rw-r--r--net/ipv4/ip_fragment.c14
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_input.c9
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/ip_tunnel_core.c15
-rw-r--r--net/ipv4/ip_vti.c7
-rw-r--r--net/ipv4/ipcomp.c6
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c7
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c2
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c2
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c9
-rw-r--r--net/ipv4/nexthop.c355
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/proc.c7
-rw-r--r--net/ipv4/protocol.c6
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/route.c52
-rw-r--r--net/ipv4/syncookies.c6
-rw-r--r--net/ipv4/sysctl_net_ipv4.c25
-rw-r--r--net/ipv4/tcp.c36
-rw-r--r--net/ipv4/tcp_dctcp.c6
-rw-r--r--net/ipv4/tcp_diag.c6
-rw-r--r--net/ipv4/tcp_fastopen.c97
-rw-r--r--net/ipv4/tcp_input.c30
-rw-r--r--net/ipv4/tcp_ipv4.c32
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_offload.c6
-rw-r--r--net/ipv4/tcp_output.c33
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/udp.c44
-rw-r--r--net/ipv4/udp_diag.c6
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv4/udplite.c5
-rw-r--r--net/ipv4/xfrm4_output.c6
-rw-r--r--net/ipv4/xfrm4_protocol.c6
57 files changed, 1074 insertions, 580 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index aff93e7cdb31..52bdb881a506 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -58,11 +59,6 @@
* Some other random speedups.
* Cyrus Durgin : Cleaned up file for kmod hacks.
* Andi Kleen : Fix inet_stream_connect TCP race.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv4: " fmt
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 850a6f13a082..05eb42f347e8 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* linux/net/ipv4/arp.c
*
* Copyright (C) 1994 by Florian La Roche
@@ -7,11 +8,6 @@
* high-level addresses) into a low-level hardware address (like an Ethernet
* address).
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Alan Cox : Removed the Ethernet assumptions in
* Florian's code
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 300921417f89..7bd29e694603 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* common UDP/RAW code
* Linux INET implementation
*
* Authors:
* Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 701c5d113a34..914ccc7f192a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET3 IP device support routines.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Derived from the IP parts of dev.c 1.0.19
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -194,7 +190,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+static void inet_del_ifa(struct in_device *in_dev,
+ struct in_ifaddr __rcu **ifap,
int destroy);
#ifdef CONFIG_SYSCTL
static int devinet_sysctl_register(struct in_device *idev);
@@ -300,8 +297,8 @@ static void in_dev_rcu_put(struct rcu_head *head)
static void inetdev_destroy(struct in_device *in_dev)
{
- struct in_ifaddr *ifa;
struct net_device *dev;
+ struct in_ifaddr *ifa;
ASSERT_RTNL();
@@ -311,7 +308,7 @@ static void inetdev_destroy(struct in_device *in_dev)
ip_mc_destroy_dev(in_dev);
- while ((ifa = in_dev->ifa_list) != NULL) {
+ while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
inet_free_ifa(ifa);
}
@@ -327,30 +324,35 @@ static void inetdev_destroy(struct in_device *in_dev)
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
+ const struct in_ifaddr *ifa;
+
rcu_read_lock();
- for_primary_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (inet_ifa_match(a, ifa)) {
if (!b || inet_ifa_match(b, ifa)) {
rcu_read_unlock();
return 1;
}
}
- } endfor_ifa(in_dev);
+ }
rcu_read_unlock();
return 0;
}
-static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy, struct nlmsghdr *nlh, u32 portid)
+static void __inet_del_ifa(struct in_device *in_dev,
+ struct in_ifaddr __rcu **ifap,
+ int destroy, struct nlmsghdr *nlh, u32 portid)
{
struct in_ifaddr *promote = NULL;
- struct in_ifaddr *ifa, *ifa1 = *ifap;
- struct in_ifaddr *last_prim = in_dev->ifa_list;
+ struct in_ifaddr *ifa, *ifa1;
+ struct in_ifaddr *last_prim;
struct in_ifaddr *prev_prom = NULL;
int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
ASSERT_RTNL();
+ ifa1 = rtnl_dereference(*ifap);
+ last_prim = rtnl_dereference(in_dev->ifa_list);
if (in_dev->dead)
goto no_promotions;
@@ -359,9 +361,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
**/
if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
- struct in_ifaddr **ifap1 = &ifa1->ifa_next;
+ struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
- while ((ifa = *ifap1) != NULL) {
+ while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
last_prim = ifa;
@@ -394,7 +396,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
* and later to add them back with new prefsrc. Do this
* while all addresses are on the device list.
*/
- for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+ for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
if (ifa1->ifa_mask == ifa->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, ifa))
fib_del_ifaddr(ifa, ifa1);
@@ -420,19 +422,24 @@ no_promotions:
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
- struct in_ifaddr *next_sec = promote->ifa_next;
+ struct in_ifaddr *next_sec;
+ next_sec = rtnl_dereference(promote->ifa_next);
if (prev_prom) {
- prev_prom->ifa_next = promote->ifa_next;
- promote->ifa_next = last_prim->ifa_next;
- last_prim->ifa_next = promote;
+ struct in_ifaddr *last_sec;
+
+ last_sec = rtnl_dereference(last_prim->ifa_next);
+ rcu_assign_pointer(prev_prom->ifa_next, next_sec);
+ rcu_assign_pointer(promote->ifa_next, last_sec);
+ rcu_assign_pointer(last_prim->ifa_next, promote);
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
- for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
+ for (ifa = next_sec; ifa;
+ ifa = rtnl_dereference(ifa->ifa_next)) {
if (ifa1->ifa_mask != ifa->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, ifa))
continue;
@@ -444,7 +451,8 @@ no_promotions:
inet_free_ifa(ifa1);
}
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+static void inet_del_ifa(struct in_device *in_dev,
+ struct in_ifaddr __rcu **ifap,
int destroy)
{
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
@@ -457,9 +465,10 @@ static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid, struct netlink_ext_ack *extack)
{
+ struct in_ifaddr __rcu **last_primary, **ifap;
struct in_device *in_dev = ifa->ifa_dev;
- struct in_ifaddr *ifa1, **ifap, **last_primary;
struct in_validator_info ivi;
+ struct in_ifaddr *ifa1;
int ret;
ASSERT_RTNL();
@@ -472,8 +481,10 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
ifa->ifa_flags &= ~IFA_F_SECONDARY;
last_primary = &in_dev->ifa_list;
- for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
- ifap = &ifa1->ifa_next) {
+ ifap = &in_dev->ifa_list;
+ ifa1 = rtnl_dereference(*ifap);
+
+ while (ifa1) {
if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
ifa->ifa_scope <= ifa1->ifa_scope)
last_primary = &ifa1->ifa_next;
@@ -489,6 +500,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
}
ifa->ifa_flags |= IFA_F_SECONDARY;
}
+
+ ifap = &ifa1->ifa_next;
+ ifa1 = rtnl_dereference(*ifap);
}
/* Allow any devices that wish to register ifaddr validtors to weigh
@@ -514,8 +528,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
ifap = last_primary;
}
- ifa->ifa_next = *ifap;
- *ifap = ifa;
+ rcu_assign_pointer(ifa->ifa_next, *ifap);
+ rcu_assign_pointer(*ifap, ifa);
inet_hash_insert(dev_net(in_dev->dev), ifa);
@@ -580,12 +594,14 @@ EXPORT_SYMBOL(inetdev_by_index);
struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
__be32 mask)
{
+ struct in_ifaddr *ifa;
+
ASSERT_RTNL();
- for_primary_ifa(in_dev) {
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
return ifa;
- } endfor_ifa(in_dev);
+ }
return NULL;
}
@@ -613,10 +629,12 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct in_ifaddr __rcu **ifap;
struct nlattr *tb[IFA_MAX+1];
struct in_device *in_dev;
struct ifaddrmsg *ifm;
- struct in_ifaddr *ifa, **ifap;
+ struct in_ifaddr *ifa;
+
int err = -EINVAL;
ASSERT_RTNL();
@@ -633,7 +651,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+ for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
@@ -721,15 +739,19 @@ static void check_lifetime(struct work_struct *work)
if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
age >= ifa->ifa_valid_lft) {
- struct in_ifaddr **ifap;
+ struct in_ifaddr __rcu **ifap;
+ struct in_ifaddr *tmp;
- for (ifap = &ifa->ifa_dev->ifa_list;
- *ifap != NULL; ifap = &(*ifap)->ifa_next) {
- if (*ifap == ifa) {
+ ifap = &ifa->ifa_dev->ifa_list;
+ tmp = rtnl_dereference(*ifap);
+ while (tmp) {
+ if (tmp == ifa) {
inet_del_ifa(ifa->ifa_dev,
ifap, 1);
break;
}
+ ifap = &tmp->ifa_next;
+ tmp = rtnl_dereference(*ifap);
}
} else if (ifa->ifa_preferred_lft !=
INFINITY_LIFE_TIME &&
@@ -873,13 +895,12 @@ errout:
static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
- struct in_ifaddr *ifa1, **ifap;
+ struct in_ifaddr *ifa1;
if (!ifa->ifa_local)
return NULL;
- for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
- ifap = &ifa1->ifa_next) {
+ in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
if (ifa1->ifa_mask == ifa->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, ifa) &&
ifa1->ifa_local == ifa->ifa_local)
@@ -974,8 +995,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
struct sockaddr_in sin_orig;
struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ struct in_ifaddr __rcu **ifap = NULL;
struct in_device *in_dev;
- struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
struct net_device *dev;
char *colon;
@@ -1046,7 +1067,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
/* note: we only do this for a limited set of ioctls
and only if the original address family was AF_INET.
This is checked above. */
- for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+
+ for (ifap = &in_dev->ifa_list;
+ (ifa = rtnl_dereference(*ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
@@ -1059,7 +1082,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
4.3BSD-style and passed in junk so we fall back to
comparing just the label */
if (!ifa) {
- for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+ for (ifap = &in_dev->ifa_list;
+ (ifa = rtnl_dereference(*ifap)) != NULL;
ifap = &ifa->ifa_next)
if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
@@ -1208,7 +1232,7 @@ out:
static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
- struct in_ifaddr *ifa;
+ const struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -1218,7 +1242,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int s
if (!in_dev)
goto out;
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
if (!buf) {
done += size;
continue;
@@ -1246,17 +1270,22 @@ out:
static __be32 in_dev_select_addr(const struct in_device *in_dev,
int scope)
{
- for_primary_ifa(in_dev) {
+ const struct in_ifaddr *ifa;
+
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
if (ifa->ifa_scope != RT_SCOPE_LINK &&
ifa->ifa_scope <= scope)
return ifa->ifa_local;
- } endfor_ifa(in_dev);
+ }
return 0;
}
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
+ const struct in_ifaddr *ifa;
__be32 addr = 0;
struct in_device *in_dev;
struct net *net = dev_net(dev);
@@ -1267,7 +1296,9 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
if (!in_dev)
goto no_in_dev;
- for_primary_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
if (ifa->ifa_scope > scope)
continue;
if (!dst || inet_ifa_match(dst, ifa)) {
@@ -1276,7 +1307,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
}
if (!addr)
addr = ifa->ifa_local;
- } endfor_ifa(in_dev);
+ }
if (addr)
goto out_unlock;
@@ -1321,10 +1352,11 @@ EXPORT_SYMBOL(inet_select_addr);
static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
__be32 local, int scope)
{
- int same = 0;
+ const struct in_ifaddr *ifa;
__be32 addr = 0;
+ int same = 0;
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (!addr &&
(local == ifa->ifa_local || !local) &&
ifa->ifa_scope <= scope) {
@@ -1350,7 +1382,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
same = 0;
}
}
- } endfor_ifa(in_dev);
+ }
return same ? addr : 0;
}
@@ -1424,7 +1456,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
struct in_ifaddr *ifa;
int named = 0;
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
char old[IFNAMSIZ], *dot;
memcpy(old, ifa->ifa_label, IFNAMSIZ);
@@ -1454,10 +1486,9 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
struct in_device *in_dev)
{
- struct in_ifaddr *ifa;
+ const struct in_ifaddr *ifa;
- for (ifa = in_dev->ifa_list; ifa;
- ifa = ifa->ifa_next) {
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
arp_send(ARPOP_REQUEST, ETH_P_ARP,
ifa->ifa_local, dev,
ifa->ifa_local, NULL,
@@ -1727,15 +1758,17 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
int ip_idx = 0;
int err;
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+ if (ip_idx < s_ip_idx) {
+ ip_idx++;
continue;
-
+ }
err = inet_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
goto done;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ ip_idx++;
}
err = 0;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 8edcfa66d1e5..2e5e377f50a1 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* IPV4 GSO/GRO offload support
* Linux INET implementation
@@ -5,10 +6,6 @@
* Copyright (C) 2016 secunet Security Networks AG
* Author: Steffen Klassert <steffen.klassert@secunet.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
* ESP GRO support
*/
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 76055c66326a..108191667531 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -6,11 +7,6 @@
* IPv4 Forwarding Information Base: FIB frontend.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
@@ -43,6 +39,7 @@
#include <net/sock.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
@@ -234,7 +231,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
if (table) {
ret = RTN_UNICAST;
if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
- if (!dev || dev == res.fi->fib_dev)
+ struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
+
+ if (!dev || dev == nhc->nhc_dev)
ret = res.type;
}
}
@@ -321,19 +320,19 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int ret;
- for (ret = 0; ret < fi->fib_nhs; ret++) {
- struct fib_nh *nh = &fi->fib_nh[ret];
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
- if (nh->fib_nh_dev == dev) {
+ if (nhc->nhc_dev == dev) {
dev_match = true;
break;
- } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
dev_match = true;
break;
}
}
#else
- if (fi->fib_nh[0].fib_nh_dev == dev)
+ if (fib_info_nhc(fi, 0)->nhc_dev == dev)
dev_match = true;
#endif
@@ -540,14 +539,22 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
cfg->fc_oif = dev->ifindex;
cfg->fc_table = l3mdev_fib_table(dev);
if (colon) {
- struct in_ifaddr *ifa;
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ const struct in_ifaddr *ifa;
+ struct in_device *in_dev;
+
+ in_dev = __in_dev_get_rtnl(dev);
if (!in_dev)
return -ENODEV;
+
*colon = ':';
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (strcmp(ifa->ifa_label, devname) == 0)
break;
+ }
+ rcu_read_unlock();
+
if (!ifa)
return -ENODEV;
cfg->fc_prefsrc = ifa->ifa_local;
@@ -664,6 +671,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
@@ -801,6 +809,18 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
if (err < 0)
goto errout;
break;
+ case RTA_NH_ID:
+ cfg->fc_nh_id = nla_get_u32(attr);
+ break;
+ }
+ }
+
+ if (cfg->fc_nh_id) {
+ if (cfg->fc_oif || cfg->fc_gw_family ||
+ cfg->fc_encap || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ return -EINVAL;
}
}
@@ -827,6 +847,12 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ err = -EINVAL;
+ goto errout;
+ }
+
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
NL_SET_ERR_MSG(extack, "FIB table does not exist");
@@ -1177,8 +1203,8 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
*
* Scan address list to be sure that addresses are really gone.
*/
-
- for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa1, in_dev) {
if (ifa1 == ifa) {
/* promotion, keep the IP */
gone = 0;
@@ -1246,6 +1272,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
}
}
+ rcu_read_unlock();
no_promotions:
if (!(ok & BRD_OK))
@@ -1415,6 +1442,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
+ struct in_ifaddr *ifa;
unsigned int flags;
if (event == NETDEV_UNREGISTER) {
@@ -1429,9 +1457,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
switch (event) {
case NETDEV_UP:
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
fib_add_ifaddr(ifa);
- } endfor_ifa(in_dev);
+ }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev, RTNH_F_DEAD);
#endif
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 7945f0534db7..a68b5e21ec51 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index cfec3af54c8d..b43a7ba5c6a4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -8,11 +9,6 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Thomas Graf <tgraf@suug.ch>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Rani Assaf : local_rule cannot be deleted
* Marc Boucher : routing by fwmark
@@ -31,6 +27,7 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
struct fib4_rule {
@@ -145,8 +142,11 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
struct fib_result *result = (struct fib_result *) arg->result;
struct net_device *dev = NULL;
- if (result->fi)
- dev = result->fi->fib_dev;
+ if (result->fi) {
+ struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
+
+ dev = nhc->nhc_dev;
+ }
/* do not accept result if the route does
* not meet the required prefix length
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 78648072783e..2db089e10ba0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -6,11 +7,6 @@
* IPv4 Forwarding Information Base: semantics.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/uaccess.h>
@@ -42,6 +38,7 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/netlink.h>
#include <net/rtnh.h>
#include <net/lwtunnel.h>
@@ -60,18 +57,21 @@ static unsigned int fib_info_cnt;
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+/* for_nexthops and change_nexthops only used when nexthop object
+ * is not set in a fib_info. The logic within can reference fib_nh.
+ */
#ifdef CONFIG_IP_ROUTE_MULTIPATH
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
for (nhsel = 0, nh = (fi)->fib_nh; \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nh++, nhsel++)
#define change_nexthops(fi) { \
int nhsel; struct fib_nh *nexthop_nh; \
for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nexthop_nh++, nhsel++)
#else /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -232,9 +232,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
- change_nexthops(fi) {
- fib_nh_release(fi->fib_net, nexthop_nh);
- } endfor_nexthops(fi);
+ if (fi->nh) {
+ nexthop_put(fi->nh);
+ } else {
+ change_nexthops(fi) {
+ fib_nh_release(fi->fib_net, nexthop_nh);
+ } endfor_nexthops(fi);
+ }
ip_fib_metrics_put(fi->fib_metrics);
@@ -260,22 +264,34 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
- change_nexthops(fi) {
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hlist_del(&nexthop_nh->nh_hash);
- } endfor_nexthops(fi)
+ if (fi->nh) {
+ list_del(&fi->nh_list);
+ } else {
+ change_nexthops(fi) {
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
fi->fib_dead = 1;
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
}
-static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
{
- const struct fib_nh *onh = ofi->fib_nh;
+ const struct fib_nh *onh;
+
+ if (fi->nh || ofi->nh)
+ return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+ if (ofi->fib_nhs == 0)
+ return 0;
for_nexthops(fi) {
+ onh = fib_info_nh(ofi, nhsel);
+
if (nh->fib_nh_oif != onh->fib_nh_oif ||
nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
@@ -296,8 +312,6 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
if (nh->fib_nh_gw_family == AF_INET6 &&
ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
return -1;
-
- onh++;
} endfor_nexthops(fi);
return 0;
}
@@ -311,22 +325,78 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
}
-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
+static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
+ u32 prefsrc, u32 priority)
{
- unsigned int mask = (fib_info_hash_size - 1);
- unsigned int val = fi->fib_nhs;
+ unsigned int val = init_val;
- val ^= (fi->fib_protocol << 8) | fi->fib_scope;
- val ^= (__force u32)fi->fib_prefsrc;
- val ^= fi->fib_priority;
- for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
- } endfor_nexthops(fi)
+ val ^= (protocol << 8) | scope;
+ val ^= prefsrc;
+ val ^= priority;
+
+ return val;
+}
+
+static unsigned int fib_info_hashfn_result(unsigned int val)
+{
+ unsigned int mask = (fib_info_hash_size - 1);
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
-static struct fib_info *fib_find_info(const struct fib_info *nfi)
+static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+{
+ unsigned int val;
+
+ val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
+ fi->fib_scope, (__force u32)fi->fib_prefsrc,
+ fi->fib_priority);
+
+ if (fi->nh) {
+ val ^= fib_devindex_hashfn(fi->nh->id);
+ } else {
+ for_nexthops(fi) {
+ val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ } endfor_nexthops(fi)
+ }
+
+ return fib_info_hashfn_result(val);
+}
+
+/* no metrics, only nexthop id */
+static struct fib_info *fib_find_info_nh(struct net *net,
+ const struct fib_config *cfg)
+{
+ struct hlist_head *head;
+ struct fib_info *fi;
+ unsigned int hash;
+
+ hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ cfg->fc_protocol, cfg->fc_scope,
+ (__force u32)cfg->fc_prefsrc,
+ cfg->fc_priority);
+ hash = fib_info_hashfn_result(hash);
+ head = &fib_info_hash[hash];
+
+ hlist_for_each_entry(fi, head, fib_hash) {
+ if (!net_eq(fi->fib_net, net))
+ continue;
+ if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
+ continue;
+ if (cfg->fc_protocol == fi->fib_protocol &&
+ cfg->fc_scope == fi->fib_scope &&
+ cfg->fc_prefsrc == fi->fib_prefsrc &&
+ cfg->fc_priority == fi->fib_priority &&
+ cfg->fc_type == fi->fib_type &&
+ cfg->fc_table == fi->fib_tb_id &&
+ !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK))
+ return fi;
+ }
+
+ return NULL;
+}
+
+static struct fib_info *fib_find_info(struct fib_info *nfi)
{
struct hlist_head *head;
struct fib_info *fi;
@@ -348,7 +418,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
- (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ nh_comp(fi, nfi) == 0)
return fi;
}
@@ -390,34 +460,40 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_PRIORITY */
+ nla_total_size(4) /* RTA_PREFSRC */
+ nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+ unsigned int nhs = fib_info_num_path(fi);
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
- if (fi->fib_nhs) {
+ if (fi->nh)
+ payload += nla_total_size(4); /* RTA_NH_ID */
+
+ if (nhs) {
size_t nh_encapsize = 0;
- /* Also handles the special case fib_nhs == 1 */
+ /* Also handles the special case nhs == 1 */
/* each nexthop is packed in an attribute */
size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+ unsigned int i;
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
/* grab encap info */
- for_nexthops(fi) {
- if (nh->fib_nh_lws) {
+ for (i = 0; i < fib_info_num_path(fi); i++) {
+ struct fib_nh_common *nhc = fib_info_nhc(fi, i);
+
+ if (nhc->nhc_lwtstate) {
/* RTA_ENCAP_TYPE */
nh_encapsize += lwtunnel_get_encap_size(
- nh->fib_nh_lws);
+ nhc->nhc_lwtstate);
/* RTA_ENCAP */
nh_encapsize += nla_total_size(2);
}
- } endfor_nexthops(fi);
+ }
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size((fi->fib_nhs * nhsize) +
- nh_encapsize);
+ payload += nla_total_size((nhs * nhsize) + nh_encapsize);
}
@@ -578,12 +654,14 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
struct net *net = fi->fib_net;
struct fib_config fib_cfg;
+ struct fib_nh *nh;
int ret;
change_nexthops(fi) {
@@ -646,24 +724,25 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
} endfor_nexthops(fi);
ret = -EINVAL;
- if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) {
+ nh = fib_info_nh(fi, 0);
+ if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
NL_SET_ERR_MSG(extack,
"Nexthop device index does not match RTA_OIF");
goto errout;
}
if (cfg->fc_gw_family) {
- if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+ if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
(cfg->fc_gw_family == AF_INET &&
- fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+ nh->fib_nh_gw4 != cfg->fc_gw4) ||
(cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+ ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
NL_SET_ERR_MSG(extack,
"Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
goto errout;
}
}
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+ if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
NL_SET_ERR_MSG(extack,
"Nexthop class id does not match RTA_FLOW");
goto errout;
@@ -674,12 +753,13 @@ errout:
return ret;
}
+/* only called when fib_nh is integrated into fib_info */
static void fib_rebalance(struct fib_info *fi)
{
int total;
int w;
- if (fi->fib_nhs < 2)
+ if (fib_info_num_path(fi) < 2)
return;
total = 0;
@@ -760,28 +840,36 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
+ if (cfg->fc_nh_id) {
+ if (fi->nh && cfg->fc_nh_id == fi->nh->id)
+ return 0;
+ return 1;
+ }
+
if (cfg->fc_oif || cfg->fc_gw_family) {
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
- fi->fib_nh, cfg, extack))
+ nh, cfg, extack))
return 1;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow &&
- cfg->fc_flow != fi->fib_nh->nh_tclassid)
+ cfg->fc_flow != nh->nh_tclassid)
return 1;
#endif
- if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+ if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
(cfg->fc_gw_family &&
- cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+ cfg->fc_gw_family != nh->fib_nh_gw_family))
return 1;
if (cfg->fc_gw_family == AF_INET &&
- cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+ cfg->fc_gw4 != nh->fib_nh_gw4)
return 1;
if (cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+ ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
return 1;
return 0;
@@ -968,7 +1056,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
{
struct net_device *dev;
struct fib_result res;
- int err;
+ int err = 0;
if (nh->fib_nh_flags & RTNH_F_ONLINK) {
unsigned int addr_type;
@@ -1189,9 +1277,15 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
- unsigned char scope)
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+ unsigned char scope)
{
+ struct fib_nh *nh;
+
+ if (nhc->nhc_family != AF_INET)
+ return inet_select_addr(nhc->nhc_dev, 0, scope);
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
@@ -1201,16 +1295,19 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
{
struct fib_nh_common *nhc = res->nhc;
- struct fib_nh *nh;
if (res->fi->fib_prefsrc)
return res->fi->fib_prefsrc;
- nh = container_of(nhc, struct fib_nh, nh_common);
- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
- return nh->nh_saddr;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+ return nh->nh_saddr;
+ }
- return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope);
+ return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
}
static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
@@ -1242,6 +1339,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
{
int err;
struct fib_info *fi = NULL;
+ struct nexthop *nh = NULL;
struct fib_info *ofi;
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1261,6 +1359,23 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
+ if (cfg->fc_nh_id) {
+ if (!cfg->fc_mx) {
+ fi = fib_find_info_nh(net, cfg);
+ if (fi) {
+ fi->fib_treeref++;
+ return fi;
+ }
+ }
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto err_inval;
+ }
+ nhs = 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1296,7 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto failure;
fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
cfg->fc_mx_len, extack);
- if (unlikely(IS_ERR(fi->fib_metrics))) {
+ if (IS_ERR(fi->fib_metrics)) {
err = PTR_ERR(fi->fib_metrics);
kfree(fi);
return ERR_PTR(err);
@@ -1313,14 +1428,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi->fib_tb_id = cfg->fc_table;
fi->fib_nhs = nhs;
- change_nexthops(fi) {
- nexthop_nh->nh_parent = fi;
- } endfor_nexthops(fi)
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -EINVAL;
+ } else {
+ err = 0;
+ fi->nh = nh;
+ }
+ } else {
+ change_nexthops(fi) {
+ nexthop_nh->nh_parent = fi;
+ } endfor_nexthops(fi)
- if (cfg->fc_mp)
- err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
- else
- err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ if (cfg->fc_mp)
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg,
+ extack);
+ else
+ err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ }
if (err != 0)
goto failure;
@@ -1351,7 +1477,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- if (cfg->fc_scope == RT_SCOPE_HOST) {
+ if (fi->nh) {
+ err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);
+ if (err)
+ goto failure;
+ } else if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -1366,7 +1496,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
nh->fib_nh_scope = RT_SCOPE_NOWHERE;
- nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif);
+ nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
err = -ENODEV;
if (!nh->fib_nh_dev)
goto failure;
@@ -1391,13 +1521,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
- if (nexthop_nh->fib_nh_gw_family == AF_INET6)
- fi->fib_nh_is_v6 = true;
- } endfor_nexthops(fi)
+ if (!fi->nh) {
+ change_nexthops(fi) {
+ fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
+ fi->fib_scope);
+ if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+ fi->fib_nh_is_v6 = true;
+ } endfor_nexthops(fi)
- fib_rebalance(fi);
+ fib_rebalance(fi);
+ }
link_it:
ofi = fib_find_info(fi);
@@ -1419,16 +1552,20 @@ link_it:
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
- change_nexthops(fi) {
- struct hlist_head *head;
- unsigned int hash;
+ if (fi->nh) {
+ list_add(&fi->nh_list, &nh->fi_list);
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+ unsigned int hash;
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_add_head(&nexthop_nh->nh_hash, head);
- } endfor_nexthops(fi)
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
spin_unlock_bh(&fib_info_lock);
return fi;
@@ -1555,6 +1692,12 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
if (!mp)
goto nla_put_failure;
+ if (unlikely(fi->nh)) {
+ if (nexthop_mpath_fill_node(skb, fi->nh) < 0)
+ goto nla_put_failure;
+ goto mp_end;
+ }
+
for_nexthops(fi) {
if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
goto nla_put_failure;
@@ -1565,6 +1708,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
} endfor_nexthops(fi);
+mp_end:
nla_nest_end(skb, mp);
return 0;
@@ -1583,6 +1727,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct nlmsghdr *nlh;
struct rtmsg *rtm;
@@ -1618,18 +1763,31 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
- if (fi->fib_nhs == 1) {
- struct fib_nh *nh = &fi->fib_nh[0];
+
+ if (fi->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+ goto nla_put_failure;
+ if (nexthop_is_blackhole(fi->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+ }
+
+ if (nhs == 1) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
unsigned char flags = 0;
- if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
+ if (fib_nexthop_info(skb, nhc, &flags, false) < 0)
goto nla_put_failure;
rtm->rtm_flags = flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_tclassid &&
+ nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+ goto nla_put_failure;
+ }
#endif
} else {
if (fib_add_multipath(skb, fi) < 0)
@@ -1757,6 +1915,8 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
* NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed
* NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
@@ -1838,6 +1998,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
+ struct fib_nh *nh;
if (fa->fa_slen != slen)
continue;
@@ -1859,8 +2020,9 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
- if (!next_fi->fib_nh[0].fib_nh_gw4 ||
- next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK)
+
+ nh = fib_info_nh(next_fi, 0);
+ if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
continue;
fib_alias_accessed(fa);
@@ -1902,6 +2064,8 @@ out:
/*
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
@@ -1996,6 +2160,11 @@ void fib_select_multipath(struct fib_result *res, int hash)
struct net *net = fi->fib_net;
bool first = false;
+ if (unlikely(res->fi->nh)) {
+ nexthop_path_fib_result(res, hash);
+ return;
+ }
+
change_nexthops(fi) {
if (net->ipv4.sysctl_fib_multipath_use_neigh) {
if (!fib_good_nh(nexthop_nh))
@@ -2024,7 +2193,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1) {
+ if (fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b53ecef89d59..90f0fc8c87bd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1,8 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*
* Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
* & Swedish University of Agricultural Sciences.
@@ -18,28 +15,19 @@
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
* http://www.csc.kth.se/~snilsson/software/dyntrie2/
*
- *
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
*
- *
* Code from fib_hash has been reused which includes the following header:
*
- *
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IPv4 FIB: lookup engine and maintenance routines.
*
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Substantial contributions to this work comes from:
*
* David S. Miller, <davem@davemloft.net>
@@ -350,12 +338,18 @@ static struct tnode *tnode_alloc(int bits)
static inline void empty_child_inc(struct key_vector *n)
{
- ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
+ tn_info(n)->empty_children++;
+
+ if (!tn_info(n)->empty_children)
+ tn_info(n)->full_children++;
}
static inline void empty_child_dec(struct key_vector *n)
{
- tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
+ if (!tn_info(n)->empty_children)
+ tn_info(n)->full_children--;
+
+ tn_info(n)->empty_children--;
}
static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
@@ -1461,6 +1455,7 @@ found:
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (unlikely(err < 0)) {
+out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
@@ -1469,7 +1464,13 @@ found:
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+
+ if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+
+ for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
if (nhc->nhc_flags & RTNH_F_DEAD)
@@ -2717,14 +2718,18 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{
unsigned int flags = 0;
if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
flags = RTF_REJECT;
- if (fi && fi->fib_nh->fib_nh_gw4)
- flags |= RTF_GATEWAY;
+ if (fi) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+
+ if (nhc->nhc_gw.ipv4)
+ flags |= RTF_GATEWAY;
+ }
if (mask == htonl(0xFFFFFFFF))
flags |= RTF_HOST;
flags |= RTF_UP;
@@ -2755,7 +2760,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
prefix = htonl(l->key);
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
- const struct fib_info *fi = fa->fa_info;
+ struct fib_info *fi = fa->fa_info;
__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
@@ -2768,26 +2773,31 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
seq_setwidth(seq, 127);
- if (fi)
+ if (fi) {
+ struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+ __be32 gw = 0;
+
+ if (nhc->nhc_gw_family == AF_INET)
+ gw = nhc->nhc_gw.ipv4;
+
seq_printf(seq,
"%s\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
- fi->fib_dev ? fi->fib_dev->name : "*",
- prefix,
- fi->fib_nh->fib_nh_gw4, flags, 0, 0,
+ nhc->nhc_dev ? nhc->nhc_dev->name : "*",
+ prefix, gw, flags, 0, 0,
fi->fib_priority,
mask,
(fi->fib_advmss ?
fi->fib_advmss + 40 : 0),
fi->fib_window,
fi->fib_rtt >> 3);
- else
+ } else {
seq_printf(seq,
"*\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
prefix, 0, flags, 0, 0, 0,
mask, 0, 0, 0);
-
+ }
seq_pad(seq, '\n');
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 7c4a41dc04bb..293acfb36376 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GRE over IPv4 demultiplexer driver
*
* Authors: Dmitry Kozlov (xeb@mail.ru)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6c63524f598a..4de7e962d3da 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPV4 GSO/GRO offload support
* Linux INET implementation
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* GRE GSO support
*/
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f3a5893b1e86..1510e951f451 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET3: Implementation of the ICMP protocol layer.
*
* Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Some of the function names and the icmp unreach table for this
* module were derived from [icmp.c 1.0.11 06/02/93] by
* Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
@@ -59,7 +55,6 @@
*
* - Should use skb_pull() instead of all the manual checking.
* This would also greatly simply some upper layer error handlers. --AK
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -206,7 +201,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
*/
static struct sock *icmp_sk(struct net *net)
{
- return *this_cpu_ptr(net->ipv4.icmp_sk);
+ return this_cpu_read(*net->ipv4.icmp_sk);
}
/* Called with BH disabled */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index eb03153dfe12..9a206931a342 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux NET3: Internet Group Management Protocol [IGMP]
*
@@ -11,11 +12,6 @@
* Authors:
* Alan Cox <alan@lxorguk.ukuu.org.uk>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
*
* Alan Cox : Added lots of __inline__ to optimise
@@ -336,14 +332,15 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
const struct flowi4 *fl4)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ const struct in_ifaddr *ifa;
if (!in_dev)
return htonl(INADDR_ANY);
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
if (fl4->saddr == ifa->ifa_local)
return fl4->saddr;
- } endfor_ifa(in_dev);
+ }
return htonl(INADDR_ANY);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a175e3e7ae97..fb0b4b0994ec 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -6,11 +7,6 @@
* Support for INET connection oriented protocols.
*
* Authors: See the TCP sources
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or(at your option) any later version.
*/
#include <linux/module.h>
@@ -653,8 +649,7 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
EXPORT_SYMBOL(inet_rtx_syn_ack);
/* return true if req was found in the ehash table */
-static bool reqsk_queue_unlink(struct request_sock_queue *queue,
- struct request_sock *req)
+static bool reqsk_queue_unlink(struct request_sock *req)
{
struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
bool found = false;
@@ -673,7 +668,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
{
- if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
+ if (reqsk_queue_unlink(req)) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5731670c560b..bbb005eb5218 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* inet_diag.c Module for monitoring INET transport protocols sockets.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 35e9784fab4e..d666756be5f1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* inet fragments management
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Authors: Pavel Emelyanov <xemul@openvz.org>
* Started as consolidation of ipv4/ip_fragment.c,
* ipv6/reassembly. and ipv6 nf conntrack reassembly
@@ -149,10 +145,9 @@ static void inet_frags_free_cb(void *ptr, void *arg)
inet_frag_destroy(fq);
}
-static void fqdir_rwork_fn(struct work_struct *work)
+static void fqdir_work_fn(struct work_struct *work)
{
- struct fqdir *fqdir = container_of(to_rcu_work(work),
- struct fqdir, destroy_rwork);
+ struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work);
struct inet_frags *f = fqdir->f;
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
@@ -191,18 +186,8 @@ EXPORT_SYMBOL(fqdir_init);
void fqdir_exit(struct fqdir *fqdir)
{
- fqdir->high_thresh = 0; /* prevent creation of new frags */
-
- fqdir->dead = true;
-
- /* call_rcu is supposed to provide memory barrier semantics,
- * separating the setting of fqdir->dead with the destruction
- * work. This implicit barrier is paired with inet_frag_kill().
- */
-
- INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn);
- queue_rcu_work(system_wq, &fqdir->destroy_rwork);
-
+ INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
+ queue_work(system_wq, &fqdir->destroy_work);
}
EXPORT_SYMBOL(fqdir_exit);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 942265d65eb3..97824864e40d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -6,11 +7,6 @@
* Generic INET transport hashtables
*
* Authors: Lotsa people, from code originally in tcp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
@@ -320,7 +316,7 @@ struct sock *__inet_lookup_listener(struct net *net,
saddr, sport, htonl(INADDR_ANY), hnum,
dif, sdif);
done:
- if (unlikely(IS_ERR(result)))
+ if (IS_ERR(result))
return NULL;
return result;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1ffaec056821..4385eb9e781f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -143,6 +143,10 @@ static void ip_expire(struct timer_list *t)
net = qp->q.fqdir->net;
rcu_read_lock();
+
+ if (qp->q.fqdir->dead)
+ goto out_rcu_unlock;
+
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -676,6 +680,11 @@ static int __net_init ipv4_frags_init_net(struct net *net)
return res;
}
+static void __net_exit ipv4_frags_pre_exit_net(struct net *net)
+{
+ fqdir_pre_exit(net->ipv4.fqdir);
+}
+
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
@@ -683,8 +692,9 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
}
static struct pernet_operations ip4_frags_ops = {
- .init = ipv4_frags_init_net,
- .exit = ipv4_frags_exit_net,
+ .init = ipv4_frags_init_net,
+ .pre_exit = ipv4_frags_pre_exit_net,
+ .exit = ipv4_frags_exit_net,
};
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4b0526441476..a53a543fe055 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux NET3: GRE over IP protocol decoder.
*
* Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ed97724c5e33..1e2392b7c64e 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -14,7 +15,6 @@
* Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
*
- *
* Fixes:
* Alan Cox : Commented a couple of minor bits of surplus code
* Alan Cox : Undefining IP_FORWARD doesn't include the code
@@ -96,8 +96,6 @@
* Jos Vos : Do accounting *before* call_in_firewall
* Willy Konynenberg : Transparent proxying support
*
- *
- *
* To Fix:
* IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
* and could be made very efficient with the addition of some virtual memory hacks to permit
@@ -106,11 +104,6 @@
* interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
* output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
* fragmentation anyway.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv4: " fmt
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ceca5285d9b4..cdd6c3418b9e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -575,8 +575,7 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
{
unsigned int first_len = skb_pagelen(skb);
- iter->frag_list = skb_shinfo(skb)->frag_list;
- iter->frag = iter->frag_list;
+ iter->frag = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
iter->offset = 0;
@@ -845,7 +844,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0;
}
- kfree_skb_list(iter.frag_list);
+ kfree_skb_list(iter.frag);
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err;
@@ -995,7 +994,7 @@ static int __ip_append_data(struct sock *sk,
uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
if (!uarg)
return -ENOBUFS;
- extra_uref = !skb; /* only extra ref if !MSG_MORE */
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
if (rt->dst.dev->features & NETIF_F_SG &&
csummode == CHECKSUM_PARTIAL) {
paged = true;
@@ -1633,7 +1632,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
- unsigned int len)
+ unsigned int len, u64 transmit_time)
{
struct ip_options_data replyopts;
struct ipcm_cookie ipc;
@@ -1649,6 +1648,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
ipcm_init(&ipc);
ipc.addr = daddr;
+ ipc.sockc.transmit_time = transmit_time;
if (replyopts.opt.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a5d8cad18ead..38c02bb62e2c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 30c1c264bdfc..9e3846388fb3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 254a42e83ff9..cfb025606793 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux NET3: IP/IP protocol decoder modified to support
* virtual tunnel interface
*
* Authors:
* Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
/*
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 9119d012ba46..2f4cdcc13d53 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IP Payload Compression Protocol (IPComp) - RFC3173.
*
* Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
* Todo:
* - Tunable compression parameters.
* - Compression stats.
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fe10b9a2efc8..43adfc1641ba 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux NET3: IP/IP protocol decoder.
*
@@ -16,12 +17,6 @@
* Carlos Picoto : GRE over IP support
* Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
* I do not want to merge them together.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
/* tunnel.c: an IP tunnel driver
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2c61e10a60e3..c07bc82cbbe9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IP multicast routing support for mrouted 3.6/3.8
*
* (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
* Linux Consultancy and Custom Driver Development
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Fixes:
* Michael Chastain : Incorrect size of copying.
* Alan Cox : Added the cache manager code
@@ -23,7 +19,6 @@
* Carlos Picoto : PIMv1 Support
* Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
* Relax this requirement to work with older peers.
- *
*/
#include <linux/uaccess.h>
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 87ca2c42359b..a4e07e5e9c11 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -17,7 +17,7 @@ target(struct sk_buff *skb, const struct xt_action_param *par)
unsigned char *arpptr;
int pln, hln;
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return NF_DROP;
arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index aaaf9a81fbc9..9f6751893660 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -32,7 +32,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false;
iph = ip_hdr(skb);
oldtos = iph->tos;
@@ -61,7 +61,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
tcph->cwr == einfo->proto.tcp.cwr))
return true;
- if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ if (skb_ensure_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return false;
tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 7875c98072eb..15f2b2604890 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -59,7 +59,7 @@ static int set_addr(struct sk_buff *skb, unsigned int protoff,
net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
return -1;
}
- /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ /* nf_nat_mangle_udp_packet uses skb_ensure_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index 657d2dcec3cc..717b726504fe 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -186,7 +186,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
- if (!skb_make_writable(skb, skb->len)) {
+ if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 164714104965..40c93b3bd731 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -53,6 +53,7 @@ EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4);
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
+ const struct in_ifaddr *ifa;
struct in_device *indev;
__be32 laddr;
@@ -61,10 +62,14 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
- for_primary_ifa(indev) {
+
+ in_dev_for_each_ifa_rcu(ifa, indev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
laddr = ifa->ifa_local;
break;
- } endfor_ifa(indev);
+ }
return laddr ? laddr : daddr;
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 7a5a3d08fec3..5fe5a3981d43 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -105,6 +105,8 @@ static struct nexthop *nexthop_alloc(void)
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
+ INIT_LIST_HEAD(&nh->fi_list);
+ INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
}
return nh;
@@ -515,6 +517,162 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+ int (*cb)(struct fib6_nh *nh, void *arg),
+ void *arg)
+{
+ struct nh_info *nhi;
+ int err;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+ int i;
+
+ nhg = rcu_dereference_rtnl(nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+ } else {
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
+
+static int check_src_addr(const struct in6_addr *saddr,
+ struct netlink_ext_ack *extack)
+{
+ if (!ipv6_addr_any(saddr)) {
+ NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ /* fib6_src is unique to a fib6_info and limits the ability to cache
+ * routes in fib6_nh within a nexthop that is potentially shared
+ * across multiple fib entries. If the config wants to use source
+ * routing it can not use nexthop objects. mlxsw also does not allow
+ * fib6_src on routes.
+ */
+ if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
+ return -EINVAL;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->has_v4)
+ goto no_v4_nh;
+ } else {
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->family == AF_INET)
+ goto no_v4_nh;
+ }
+
+ return 0;
+no_v4_nh:
+ NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+
+/* if existing nexthop has ipv6 routes linked to it, need
+ * to verify this new spec works with ipv6
+ */
+static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_info *f6i;
+
+ if (list_empty(&old->f6i_list))
+ return 0;
+
+ list_for_each_entry(f6i, &old->f6i_list, nh_list) {
+ if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
+ return -EINVAL;
+ }
+
+ return fib6_check_nexthop(new, NULL, extack);
+}
+
+static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have a gateway");
+ return -EINVAL;
+ }
+
+ if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Invoked by fib add code to verify nexthop by id is ok with
+ * config for prefix; parts of fib_check_nh not done when nexthop
+ * object is used.
+ */
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ if (scope == RT_SCOPE_HOST) {
+ NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
+ err = -EINVAL;
+ goto out;
+ }
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ /* all nexthops in a group have the same scope */
+ err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ } else {
+ err = nexthop_check_scope(nh, scope, extack);
+ }
+out:
+ return err;
+}
+
+static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &old->fi_list, nh_list) {
+ int err;
+
+ err = fib_check_nexthop(new, fi->fib_scope, extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
@@ -607,9 +765,33 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
}
}
+/* not called for nexthop replace */
+static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
+{
+ struct fib6_info *f6i, *tmp;
+ bool do_flush = false;
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ do_flush = true;
+ }
+ if (do_flush)
+ fib_flush(net);
+
+ /* ip6_del_rt removes the entry from this list hence the _safe */
+ list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+ /* __ip6_del_rt does a release, so do a hold here */
+ fib6_info_hold(f6i);
+ ipv6_stub->ip6_del_rt(net, f6i);
+ }
+}
+
static void __remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
+ __remove_nexthop_fib(net, nh);
+
if (nh->is_group) {
remove_nexthop_group(nh, nlinfo);
} else {
@@ -638,10 +820,171 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
nexthop_put(nh);
}
+/* if any FIB entries reference this nexthop, any dst entries
+ * need to be regenerated
+ */
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list))
+ rt_cache_flush(net);
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_update_sernum(net, f6i);
+}
+
+static int replace_nexthop_grp(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_group *oldg, *newg;
+ int i;
+
+ if (!new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
+ return -EINVAL;
+ }
+
+ oldg = rtnl_dereference(old->nh_grp);
+ newg = rtnl_dereference(new->nh_grp);
+
+ /* update parents - used by nexthop code for cleanup */
+ for (i = 0; i < newg->num_nh; i++)
+ newg->nh_entries[i].nh_parent = old;
+
+ rcu_assign_pointer(old->nh_grp, newg);
+
+ for (i = 0; i < oldg->num_nh; i++)
+ oldg->nh_entries[i].nh_parent = new;
+
+ rcu_assign_pointer(new->nh_grp, oldg);
+
+ return 0;
+}
+
+static int replace_nexthop_single(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *oldi, *newi;
+
+ if (new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
+ return -EINVAL;
+ }
+
+ oldi = rtnl_dereference(old->nh_info);
+ newi = rtnl_dereference(new->nh_info);
+
+ newi->nh_parent = old;
+ oldi->nh_parent = new;
+
+ old->protocol = new->protocol;
+ old->nh_flags = new->nh_flags;
+
+ rcu_assign_pointer(old->nh_info, newi);
+ rcu_assign_pointer(new->nh_info, oldi);
+
+ return 0;
+}
+
+static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list)) {
+ struct fib_info *fi;
+
+ /* expectation is a few fib_info per nexthop and then
+ * a lot of routes per fib_info. So mark the fib_info
+ * and then walk the fib tables once
+ */
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = true;
+
+ fib_info_notify_update(net, info);
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = false;
+ }
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_rt_update(net, f6i, info);
+}
+
+/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
+ * linked to this nexthop and for all groups that the nexthop
+ * is a member of
+ */
+static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct nh_grp_entry *nhge;
+
+ __nexthop_replace_notify(net, nh, info);
+
+ list_for_each_entry(nhge, &nh->grp_list, nh_list)
+ __nexthop_replace_notify(net, nhge->nh_parent, info);
+}
+
static int replace_nexthop(struct net *net, struct nexthop *old,
struct nexthop *new, struct netlink_ext_ack *extack)
{
- return -EEXIST;
+ bool new_is_reject = false;
+ struct nh_grp_entry *nhge;
+ int err;
+
+ /* check that existing FIB entries are ok with the
+ * new nexthop definition
+ */
+ err = fib_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ if (!new->is_group) {
+ struct nh_info *nhi = rtnl_dereference(new->nh_info);
+
+ new_is_reject = nhi->reject_nh;
+ }
+
+ list_for_each_entry(nhge, &old->grp_list, nh_list) {
+ /* if new nexthop is a blackhole, any groups using this
+ * nexthop cannot have more than 1 path
+ */
+ if (new_is_reject &&
+ nexthop_num_path(nhge->nh_parent) > 1) {
+ NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
+ return -EINVAL;
+ }
+
+ err = fib_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+ }
+
+ if (old->is_group)
+ err = replace_nexthop_grp(net, old, new, extack);
+ else
+ err = replace_nexthop_single(net, old, new, extack);
+
+ if (!err) {
+ nh_rt_cache_flush(net, old);
+
+ __remove_nexthop(net, new, NULL);
+ nexthop_put(new);
+ }
+
+ return err;
}
/* called with rtnl_lock held */
@@ -653,6 +996,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
bool create = !!(cfg->nlflags & NLM_F_CREATE);
u32 new_id = new_nh->id;
+ int replace_notify = 0;
int rc = -EEXIST;
pp = &root->rb_node;
@@ -672,8 +1016,10 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
pp = &next->rb_right;
} else if (replace) {
rc = replace_nexthop(net, nh, new_nh, extack);
- if (!rc)
+ if (!rc) {
new_nh = nh; /* send notification with old nh */
+ replace_notify = 1;
+ }
goto out;
} else {
/* id already exists and not a replace */
@@ -694,6 +1040,8 @@ out:
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+ if (replace_notify)
+ nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
return rc;
@@ -815,7 +1163,8 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
if (!err) {
nh->nh_flags = fib_nh->fib_nh_flags;
- fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
+ fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
+ fib_nh->fib_nh_scope);
} else {
fib_nh_release(net, fib_nh);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 834be7daeb32..9d24ef5c5d8f 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -5,11 +6,6 @@
*
* "Ping" sockets
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Based on ipv4/udp.c code.
*
* Authors: Vasiliy Kulikov / Openwall (for Linux 2.6),
@@ -17,7 +13,6 @@
*
* Pavel gave all rights to bugs to Vasiliy,
* none of the bugs are Pavel's now.
- *
*/
#include <linux/uaccess.h>
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4746f963c439..cc90243ccf76 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -25,11 +26,6 @@
* split functions for more readibility.
* Andi Kleen : Add support for /proc/net/netstat
* Arnaldo C. Melo : Convert to seq_file
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <net/net_namespace.h>
@@ -291,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 92d249e053be..9a8c0892622b 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -16,11 +17,6 @@
* Richard Colella : Hang on hash collision
* Vince Laviano : Modified inet_del_protocol() to correctly
* maintain copy bit.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/cache.h>
#include <linux/module.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0e482f07b37f..0b8e06ca75d6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -30,11 +31,6 @@
* Alan Cox : Added IP_HDRINCL option.
* Alan Cox : Skip broadcast check if BSDism set.
* David S. Miller : New socket lookup architecture.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11ddc276776e..66cbe8a7a168 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -55,11 +56,6 @@
* Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
* Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
* Ilia Sotnikov : Removed TOS from hash calculations
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv4: " fmt
@@ -99,6 +95,7 @@
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/arp.h>
#include <net/tcp.h>
#include <net/icmp.h>
@@ -1584,7 +1581,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
#ifdef CONFIG_IP_ROUTE_CLASSID
- {
+ if (nhc->nhc_family == AF_INET) {
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
@@ -1933,6 +1930,23 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.basic.ip_proto = fl4->flowi4_proto;
}
break;
+ case 2:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ /* skb is currently provided only when forwarding */
+ if (skb) {
+ struct flow_keys keys;
+
+ skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ } else {
+ /* Same as case 0 */
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ }
+ break;
}
mhash = flow_hash_from_keys(&hash_keys);
@@ -1950,7 +1964,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1) {
+ if (res->fi && fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
@@ -1985,7 +1999,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u32 itag = 0;
struct rtable *rth;
struct flowi4 fl4;
- bool do_cache;
+ bool do_cache = true;
/* IP on this device is disabled. */
@@ -2062,6 +2076,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type == RTN_BROADCAST) {
if (IN_DEV_BFORWARD(in_dev))
goto make_route;
+ /* not do cache if bc_forwarding is enabled */
+ if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+ do_cache = false;
goto brd_input;
}
@@ -2099,18 +2116,15 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
- do_cache = false;
- if (res->fi) {
- if (!itag) {
- struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+ do_cache &= res->fi && !itag;
+ if (do_cache) {
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- rth = rcu_dereference(nhc->nhc_rth_input);
- if (rt_cache_valid(rth)) {
- skb_dst_set_noref(skb, &rth->dst);
- err = 0;
- goto out;
- }
- do_cache = true;
+ rth = rcu_dereference(nhc->nhc_rth_input);
+ if (rt_cache_valid(rth)) {
+ skb_dst_set_noref(skb, &rth->dst);
+ err = 0;
+ goto out;
}
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 008545f63667..535b69326f66 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Syncookies implementation for the Linux kernel
*
* Copyright (C) 1997 Andi Kleen
* Based on ideas by D.J.Bernstein and Eric Schenk.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/tcp.h>
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 90f09e47198b..7d802acde040 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+static int tcp_min_snd_mss_max = 65535;
static int ip_privileged_port_min;
static int ip_privileged_port_max = 65535;
static int ip_ttl_min = 1;
@@ -601,6 +603,18 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
+ {
+ .procname = "tcp_rx_skb_cache",
+ .data = &tcp_rx_skb_cache_key.key,
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
+ {
+ .procname = "tcp_tx_skb_cache",
+ .data = &tcp_tx_skb_cache_key.key,
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
{ }
};
@@ -800,6 +814,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_min_snd_mss",
+ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_min_snd_mss_min,
+ .extra2 = &tcp_min_snd_mss_max,
+ },
+ {
.procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold,
.maxlen = sizeof(int),
@@ -1008,7 +1031,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = &zero,
- .extra2 = &one,
+ .extra2 = &two,
},
#endif
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27ce13ece510..efd7f2b1d1f0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -205,11 +206,6 @@
* Hirokazu Takahashi : Use copy_from_user() instead of
* csum_and_copy_from_user() if possible.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or(at your option) any later version.
- *
* Description of States:
*
* TCP_SYN_SENT sent a connection request, waiting for ack
@@ -321,6 +317,11 @@ struct tcp_splice_state {
unsigned long tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL_GPL(tcp_memory_pressure);
+DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
+EXPORT_SYMBOL(tcp_rx_skb_cache_key);
+
+DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
+
void tcp_enter_memory_pressure(struct sock *sk)
{
unsigned long val;
@@ -2740,6 +2741,21 @@ static int tcp_repair_options_est(struct sock *sk,
return 0;
}
+DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
+EXPORT_SYMBOL(tcp_tx_delay_enabled);
+
+static void tcp_enable_tx_delay(void)
+{
+ if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
+ static int __tcp_tx_delay_enabled = 0;
+
+ if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
+ static_branch_enable(&tcp_tx_delay_enabled);
+ pr_info("TCP_TX_DELAY enabled\n");
+ }
+ }
+}
+
/*
* Socket option code for TCP.
*/
@@ -3091,6 +3107,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else
tp->recvmsg_inq = val;
break;
+ case TCP_TX_DELAY:
+ if (val)
+ tcp_enable_tx_delay();
+ tp->tcp_tx_delay = val;
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -3550,6 +3571,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->fastopen_no_cookie;
break;
+ case TCP_TX_DELAY:
+ val = tp->tcp_tx_delay;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp_raw() + tp->tsoffset;
break;
@@ -3883,6 +3908,7 @@ void __init tcp_init(void)
unsigned long limit;
unsigned int i;
+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 477cb4aa456c..79f705450c16 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* DataCenter TCP (DCTCP) congestion control.
*
* http://simula.stanford.edu/~alizade/Site/DCTCP.html
@@ -33,11 +34,6 @@
* Daniel Borkmann <dborkman@redhat.com>
* Florian Westphal <fw@strlen.de>
* Glenn Judd <glenn.judd@morganstanley.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
*/
#include <linux/module.h>
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 81148f7a2323..a3a386236d93 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* tcp_diag.c Module for monitoring TCP transport protocols sockets.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8e1580485c9e..46b67128e1ca 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -7,6 +7,7 @@
#include <linux/tcp.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
+#include <linux/siphash.h>
#include <net/inetpeer.h>
#include <net/tcp.h>
@@ -37,14 +38,8 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
{
struct tcp_fastopen_context *ctx =
container_of(head, struct tcp_fastopen_context, rcu);
- int i;
- /* We own ctx, thus no need to hold the Fastopen-lock */
- for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) {
- if (ctx->tfm[i])
- crypto_free_cipher(ctx->tfm[i]);
- }
- kfree(ctx);
+ kzfree(ctx);
}
void tcp_fastopen_destroy_cipher(struct sock *sk)
@@ -72,41 +67,6 @@ void tcp_fastopen_ctx_destroy(struct net *net)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
}
-struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key,
- void *backup_key,
- unsigned int len)
-{
- struct tcp_fastopen_context *new_ctx;
- void *key = primary_key;
- int err, i;
-
- new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
- if (!new_ctx)
- return ERR_PTR(-ENOMEM);
- for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++)
- new_ctx->tfm[i] = NULL;
- for (i = 0; i < (backup_key ? 2 : 1); i++) {
- new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0);
- if (IS_ERR(new_ctx->tfm[i])) {
- err = PTR_ERR(new_ctx->tfm[i]);
- new_ctx->tfm[i] = NULL;
- pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
- goto out;
- }
- err = crypto_cipher_setkey(new_ctx->tfm[i], key, len);
- if (err) {
- pr_err("TCP: TFO cipher key error: %d\n", err);
- goto out;
- }
- memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len);
- key = backup_key;
- }
- return new_ctx;
-out:
- tcp_fastopen_ctx_free(&new_ctx->rcu);
- return ERR_PTR(err);
-}
-
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
void *primary_key, void *backup_key,
unsigned int len)
@@ -115,11 +75,20 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
struct fastopen_queue *q;
int err = 0;
- ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ err = -ENOMEM;
goto out;
}
+
+ memcpy(ctx->key[0], primary_key, len);
+ if (backup_key) {
+ memcpy(ctx->key[1], backup_key, len);
+ ctx->num = 2;
+ } else {
+ ctx->num = 1;
+ }
+
spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
@@ -141,31 +110,30 @@ out:
static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
struct sk_buff *syn,
- struct crypto_cipher *tfm,
+ const u8 *key,
struct tcp_fastopen_cookie *foc)
{
+ BUILD_BUG_ON(TCP_FASTOPEN_KEY_LENGTH != sizeof(siphash_key_t));
+ BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
+
if (req->rsk_ops->family == AF_INET) {
const struct iphdr *iph = ip_hdr(syn);
- __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- crypto_cipher_encrypt_one(tfm, foc->val, (void *)path);
+ foc->val[0] = siphash(&iph->saddr,
+ sizeof(iph->saddr) +
+ sizeof(iph->daddr),
+ (const siphash_key_t *)key);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
return true;
}
-
#if IS_ENABLED(CONFIG_IPV6)
if (req->rsk_ops->family == AF_INET6) {
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
- struct tcp_fastopen_cookie tmp;
- struct in6_addr *buf;
- int i;
-
- crypto_cipher_encrypt_one(tfm, tmp.val,
- (void *)&ip6h->saddr);
- buf = &tmp.addr;
- for (i = 0; i < 4; i++)
- buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf);
+
+ foc->val[0] = siphash(&ip6h->saddr,
+ sizeof(ip6h->saddr) +
+ sizeof(ip6h->daddr),
+ (const siphash_key_t *)key);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
return true;
}
@@ -173,11 +141,8 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
return false;
}
-/* Generate the fastopen cookie by doing aes128 encryption on both
- * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
- * addresses. For the longer IPv6 addresses use CBC-MAC.
- *
- * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
+/* Generate the fastopen cookie by applying SipHash to both the source and
+ * destination addresses.
*/
static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
@@ -189,7 +154,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk,
rcu_read_lock();
ctx = tcp_fastopen_get_ctx(sk);
if (ctx)
- __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc);
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[0], foc);
rcu_read_unlock();
}
@@ -253,7 +218,7 @@ static int tcp_fastopen_cookie_gen_check(struct sock *sk,
if (!ctx)
goto out;
for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
- __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc);
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->key[i], foc);
if (tcp_fastopen_cookie_match(foc, orig)) {
ret = i + 1;
goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 08a477e74cf3..b71efeb0ae5b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -119,7 +119,7 @@ void clean_acked_data_enable(struct inet_connection_sock *icsk,
void (*cad)(struct sock *sk, u32 ack_seq))
{
icsk->icsk_clean_acked = cad;
- static_branch_inc(&clean_acked_data_enabled.key);
+ static_branch_deferred_inc(&clean_acked_data_enabled);
}
EXPORT_SYMBOL_GPL(clean_acked_data_enable);
@@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
TCP_SKB_CB(skb)->seq += shifted;
tcp_skb_pcount_add(prev, pcount);
- BUG_ON(tcp_skb_pcount(skb) < pcount);
+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb)
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+ int pcount, int shiftlen)
+{
+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+ * to make sure not storing more than 65535 * 8 bytes per skb,
+ * even if current MSS is bigger.
+ */
+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+ return 0;
+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+ return 0;
+ return skb_shift(to, from, shiftlen);
+}
+
/* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb.
*/
@@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback;
- if (!skb_shift(prev, skb, len))
+ if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback;
if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
@@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto out;
len = skb->len;
- if (skb_shift(prev, skb, len)) {
- pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))
+ tcp_shifted_skb(sk, prev, skb, state, pcount,
len, mss, 0);
- }
out:
return prev;
@@ -2648,7 +2662,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
- if ((flag & FLAG_SND_UNA_ADVANCED) &&
+ if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) &&
tcp_try_undo_loss(sk, false))
return;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index af81e4a6a8d8..d57641cb3477 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -7,18 +8,12 @@
*
* IPv4 specific functions
*
- *
* code split from:
* linux/ipv4/tcp.c
* linux/ipv4/tcp_input.c
* linux/ipv4/tcp_output.c
*
* See tcp.c for author information
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/*
@@ -667,8 +662,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
- struct net *net;
+ u64 transmit_time = 0;
struct sock *ctl_sk;
+ struct net *net;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -771,14 +767,17 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
- if (sk)
+ ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
+ transmit_time = tcp_transmit_time(sk);
+ }
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
- &arg, arg.iov[0].iov_len);
+ &arg, arg.iov[0].iov_len,
+ transmit_time);
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -813,6 +812,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
struct sock *ctl_sk;
+ u64 transmit_time;
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -863,14 +863,15 @@ static void tcp_v4_send_ack(const struct sock *sk,
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
- if (sk)
- ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_mark : sk->sk_mark;
+ transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
- &arg, arg.iov[0].iov_len);
+ &arg, arg.iov[0].iov_len,
+ transmit_time);
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -2633,6 +2634,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7c35731816e2..8bcaf2586b68 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,7 +274,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
-
+ tcptw->tw_tx_delay = tp->tcp_tx_delay;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -283,6 +283,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
+ tw->tw_txhash = sk->sk_txhash;
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 0fbf7d4df9da..e09147ac9a99 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPV4 GSO/GRO offload support
* Linux INET implementation
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* TCPv4 GSO/GRO support
*/
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f429e856e263..f016bb516dd6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1153,6 +1153,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
+ tcp_add_tx_delay(skb, tp);
+
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (unlikely(err > 0)) {
@@ -1296,6 +1298,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+ return -ENOMEM;
+ }
+
if (skb_unclone(skb, gfp))
return -ENOMEM;
@@ -1454,8 +1461,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < 48)
- mss_now = 48;
+ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
return mss_now;
}
@@ -2234,6 +2240,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+ tcp_sk(sk)->tcp_tx_delay) {
+ u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+
+ /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
+ * approximate our needs assuming an ~100% skb->truesize overhead.
+ * USEC_PER_SEC is approximated by 2^20.
+ * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift.
+ */
+ extra_bytes >>= (20 - 1);
+ limit += extra_bytes;
+ }
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
@@ -2747,7 +2765,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
if (next_skb_size <= skb_availroom(skb))
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size);
- else if (!skb_shift(skb, next_skb, next_skb_size))
+ else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
return false;
}
tcp_highest_sack_replace(sk, next_skb, skb);
@@ -3212,6 +3230,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
int tcp_header_size;
struct tcphdr *th;
int mss;
+ u64 now;
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
@@ -3243,13 +3262,14 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
memset(&opts, 0, sizeof(opts));
+ now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
{
- skb->skb_mstamp_ns = tcp_clock_ns();
+ skb->skb_mstamp_ns = now;
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3292,8 +3312,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_unlock();
#endif
- /* Do not fool tcpdump (if any), clean our debris */
- skb->tstamp = 0;
+ skb->skb_mstamp_ns = now;
+ tcp_add_tx_delay(skb, tp);
+
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5bad937ce779..c801cd37cc2a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -155,6 +155,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fb250ed53d4..1b971bd95786 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -69,12 +70,6 @@
* a single port at the same time.
* Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
* James Chapman : Add L2TP encapsulation type.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "UDP: " fmt
@@ -130,17 +125,6 @@ EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
-/* IPCB reference means this can not be used from early demux */
-static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- if (!net->ipv4.sysctl_udp_l3mdev_accept &&
- skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
- return true;
-#endif
- return false;
-}
-
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
@@ -369,7 +353,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
static int compute_score(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
- int dif, int sdif, bool exact_dif)
+ int dif, int sdif)
{
int score;
struct inet_sock *inet;
@@ -425,7 +409,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
- int dif, int sdif, bool exact_dif,
+ int dif, int sdif,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
@@ -437,7 +421,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif);
+ daddr, hnum, dif, sdif);
if (score > badness) {
if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
@@ -465,7 +449,6 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
- bool exact_dif = udp_lib_exact_dif_match(net, skb);
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -473,7 +456,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
- exact_dif, hslot2, skb);
+ hslot2, skb);
if (!result) {
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -481,9 +464,9 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
- exact_dif, hslot2, skb);
+ hslot2, skb);
}
- if (unlikely(IS_ERR(result)))
+ if (IS_ERR(result))
return NULL;
return result;
}
@@ -503,7 +486,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table);
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+ iph->daddr, dport, inet_iif(skb),
+ inet_sdif(skb), &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
@@ -538,8 +525,7 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport != rmt_port && inet->inet_dport) ||
(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
ipv6_only_sock(sk) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
- sk->sk_bound_dev_if != sdif))
+ !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return false;
if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
return false;
@@ -1779,6 +1765,10 @@ try_again:
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
+
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin);
}
if (udp_sk(sk)->gro_enabled)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 5cbb9be05295..910555a4d9fe 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* udp_diag.c Module for monitoring UDP transport protocols sockets.
*
* Authors: Pavel Emelyanov, <xemul@parallels.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 065334b41d57..0112f64faf69 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPV4 GSO/GRO offload support
* Linux INET implementation
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* UDPv4 GSO support
*/
@@ -212,7 +208,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
gso_skb->destructor = NULL;
segs = skb_segment(gso_skb, features);
- if (unlikely(IS_ERR_OR_NULL(segs))) {
+ if (IS_ERR_OR_NULL(segs)) {
if (copy_dtor)
gso_skb->destructor = sock_wfree;
return segs;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 3c94b8f0ff27..5936d66d1ce2 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
*
@@ -5,10 +6,6 @@
*
* Changes:
* Fixes:
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "UDPLite: " fmt
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 9bb8905088c7..ecff3fce9807 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* xfrm4_output.c - Common IPsec encapsulation code for IPv4.
* Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/if_ether.h>
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index bcab48944c15..8a4285712808 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* xfrm4_protocol.c - Generic xfrm protocol multiplexer.
*
* Copyright (C) 2013 secunet Security Networks AG
@@ -7,11 +8,6 @@
*
* Based on:
* net/ipv4/tunnel4.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/init.h>
OpenPOWER on IntegriCloud