summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c54
-rw-r--r--net/ipv6/addrconf_core.c12
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/fib6_notifier.c11
-rw-r--r--net/ipv6/fib6_rules.c8
-rw-r--r--net/ipv6/icmp.c22
-rw-r--r--net/ipv6/inet6_connection_sock.c8
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c155
-rw-r--r--net/ipv6/ip6_gre.c16
-rw-r--r--net/ipv6/ip6_icmp.c34
-rw-r--r--net/ipv6/ip6_input.c45
-rw-r--r--net/ipv6/ip6_output.c22
-rw-r--r--net/ipv6/ip6_tunnel.c76
-rw-r--r--net/ipv6/ip6_vti.c15
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ndisc.c1
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/Kconfig38
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c4
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c4
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c2
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c179
-rw-r--r--net/ipv6/seg6_local.c37
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c187
-rw-r--r--net/ipv6/udp.c22
-rw-r--r--net/ipv6/udp_offload.c29
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c5
42 files changed, 770 insertions, 295 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dc73888c7859..cb493e15959c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -478,7 +478,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
if (!idev) {
idev = ipv6_add_dev(dev);
if (IS_ERR(idev))
- return NULL;
+ return idev;
}
if (dev->flags&IFF_UP)
@@ -1045,7 +1045,8 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
int err = 0;
if (addr_type == IPV6_ADDR_ANY ||
- addr_type & IPV6_ADDR_MULTICAST ||
+ (addr_type & IPV6_ADDR_MULTICAST &&
+ !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
(!(idev->dev->flags & IFF_LOOPBACK) &&
!netif_is_l3_master(idev->dev) &&
addr_type & IPV6_ADDR_LOOPBACK))
@@ -2465,8 +2466,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
ASSERT_RTNL();
idev = ipv6_find_idev(dev);
- if (!idev)
- return ERR_PTR(-ENOBUFS);
+ if (IS_ERR(idev))
+ return idev;
if (idev->cnf.disable_ipv6)
return ERR_PTR(-EACCES);
@@ -3158,7 +3159,7 @@ static void init_loopback(struct net_device *dev)
ASSERT_RTNL();
idev = ipv6_find_idev(dev);
- if (!idev) {
+ if (IS_ERR(idev)) {
pr_debug("%s: add_dev failed\n", __func__);
return;
}
@@ -3373,7 +3374,7 @@ static void addrconf_sit_config(struct net_device *dev)
*/
idev = ipv6_find_idev(dev);
- if (!idev) {
+ if (IS_ERR(idev)) {
pr_debug("%s: add_dev failed\n", __func__);
return;
}
@@ -3398,7 +3399,7 @@ static void addrconf_gre_config(struct net_device *dev)
ASSERT_RTNL();
idev = ipv6_find_idev(dev);
- if (!idev) {
+ if (IS_ERR(idev)) {
pr_debug("%s: add_dev failed\n", __func__);
return;
}
@@ -4772,8 +4773,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
idev = ipv6_find_idev(dev);
- if (!idev)
- return -ENOBUFS;
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
@@ -5230,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+
ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
return -EINVAL;
}
- if (!netlink_strict_get_check(skb))
- return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
-
err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv6_policy, extack);
if (err)
@@ -5551,14 +5552,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
goto nla_put_failure;
-
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
- goto nla_put_failure;
-
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -5718,6 +5718,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
struct nlattr *tb[IFLA_INET6_MAX + 1];
int err;
+ if (!idev)
+ return -EAFNOSUPPORT;
+
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
@@ -5963,13 +5966,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
/*
- * If the address was optimistic
- * we inserted the route at the start of
- * our DAD process, so we don't need
- * to do it again
+ * If the address was optimistic we inserted the route at the
+ * start of our DAD process, so we don't need to do it again.
+ * If the device was taken down in the middle of the DAD
+ * cycle there is a race where we could get here without a
+ * host route, so nothing to insert. That will be fixed when
+ * the device is brought up.
*/
- if (!rcu_access_pointer(ifp->rt->fib6_node))
+ if (ifp->rt && !rcu_access_pointer(ifp->rt->fib6_node)) {
ip6_ins_rt(net, ifp->rt);
+ } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) {
+ pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n",
+ &ifp->addr, ifp->idev->dev->name);
+ }
+
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 783f3c1466da..ea00ce3d4117 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ipv6_stubs.h>
+#include <net/addrconf.h>
#include <net/ip.h>
/* if ipv6 module registers this function is used by xfrm to force all
@@ -128,11 +129,12 @@ int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
}
EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
-static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
- struct dst_entry **u2,
- struct flowi6 *u3)
+static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ const struct in6_addr *final_dst)
{
- return -EAFNOSUPPORT;
+ return ERR_PTR(-EAFNOSUPPORT);
}
static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
@@ -189,7 +191,7 @@ static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
- .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
.ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ef37e0574f54..d727c3b41495 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -292,7 +292,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && snum < inet_prot_sock(net) &&
+ if (snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -765,7 +765,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
&final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
sk->sk_route_caps = 0;
sk->sk_err_soft = -PTR_ERR(dst);
@@ -946,7 +946,7 @@ static int ipv6_route_input(struct sk_buff *skb)
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
- .ipv6_dst_lookup = ip6_dst_lookup,
+ .ipv6_dst_lookup_flow = ip6_dst_lookup_flow,
.ipv6_route_input = ipv6_route_input,
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9ab897ded4df..390bedde21a5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
#include <net/ip6_route.h>
#include <net/tcp_states.h>
#include <net/dsfield.h>
+#include <net/sock_reuseport.h>
#include <linux/errqueue.h>
#include <linux/uaccess.h>
@@ -84,7 +85,7 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
final_p = fl6_update_dst(&fl6, opt, &final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
@@ -254,6 +255,7 @@ ipv4_connected:
goto out;
}
+ reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index e31626ffccd1..fd535053245b 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -79,6 +79,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (!x)
goto out_reset;
+ skb->mark = xfrm_smark_get(skb->mark, x);
+
sp->xvec[sp->len++] = x;
sp->olen++;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index b358f1a4dd08..da46c4284676 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -197,10 +197,8 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
struct ipv6hdr _ip6, *ip6;
ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
- if (!ip6 || (ip6->version != 6)) {
- printk(KERN_ERR "IPv6 header not found\n");
+ if (!ip6 || (ip6->version != 6))
return -EBADMSG;
- }
start = *offset + sizeof(struct ipv6hdr);
nexthdr = ip6->nexthdr;
}
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 05f82baaa99e..f87ae33e1d01 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -7,12 +7,12 @@
#include <net/netns/ipv6.h>
#include <net/ip6_fib.h>
-int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+int call_fib6_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET6;
- return call_fib_notifier(nb, net, event_type, info);
+ return call_fib_notifier(nb, event_type, info);
}
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
@@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net)
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
-static int fib6_dump(struct net *net, struct notifier_block *nb)
+static int fib6_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
int err;
- err = fib6_rules_dump(net, nb);
+ err = fib6_rules_dump(net, nb, extack);
if (err)
return err;
- return fib6_tables_dump(net, nb);
+ return fib6_tables_dump(net, nb, extack);
}
static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d22b6c140f23..fafe556d21e0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib6_rule_default);
-int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, AF_INET6);
+ return fib_rules_dump(net, nb, AF_INET6, extack);
}
unsigned int fib6_rules_seq_read(struct net *net)
@@ -287,7 +288,8 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
return false;
suppress_route:
- ip6_rt_put(rt);
+ if (!(arg->flags & FIB_LOOKUP_NOREF))
+ ip6_rt_put(rt);
return true;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 62c997201970..ef408a5090a2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -516,13 +516,29 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
mip6_addr_swap(skb);
+ sk = icmpv6_xmit_lock(net);
+ if (!sk)
+ goto out_bh_enable;
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.daddr = hdr->saddr;
if (force_saddr)
saddr = force_saddr;
- if (saddr)
+ if (saddr) {
fl6.saddr = *saddr;
+ } else {
+ /* select a more meaningful saddr from input if */
+ struct net_device *in_netdev;
+
+ in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+ if (in_netdev) {
+ ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
+ inet6_sk(sk)->srcprefs,
+ &fl6.saddr);
+ dev_put(in_netdev);
+ }
+ }
fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
@@ -531,10 +547,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
- sk = icmpv6_xmit_lock(net);
- if (!sk)
- goto out_bh_enable;
-
sk->sk_mark = mark;
np = inet6_sk(sk);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 4da24aa6c696..e315526fa244 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -48,7 +48,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
fl6->flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi(fl6));
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (IS_ERR(dst))
return NULL;
@@ -103,7 +103,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (!IS_ERR(dst))
ip6_dst_store(sk, dst, NULL, NULL);
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
- np->tclass);
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
return res;
}
@@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
if (IS_ERR(dst))
return NULL;
- dst->ops->update_pmtu(dst, sk, NULL, mtu);
+ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
dst = inet6_csk_route_socket(sk, &fl6);
return IS_ERR(dst) ? NULL : dst;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index cf60fae9533b..fbe9d4295eac 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -105,7 +105,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score = 1;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
return score;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 87f47bc55c5e..58fbde244381 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -318,7 +318,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
if (rt->dst.error == -EAGAIN) {
ip6_rt_put_flags(rt, flags);
rt = net->ipv6.ip6_null_entry;
- if (!(flags | RT6_LOOKUP_F_DST_NOREF))
+ if (!(flags & RT6_LOOKUP_F_DST_NOREF))
dst_hold(&rt->dst);
}
@@ -357,15 +357,32 @@ unsigned int fib6_tables_seq_read(struct net *net)
return fib_seq;
}
-static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib6_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
- struct fib6_info *rt)
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ };
+
+ return call_fib6_notifier(nb, event_type, &info.info);
+}
+
+static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ unsigned int nsiblings,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
.rt = rt,
+ .nsiblings = nsiblings,
};
- return call_fib6_notifier(nb, net, event_type, &info.info);
+ return call_fib6_notifier(nb, event_type, &info.info);
}
int call_fib6_entry_notifiers(struct net *net,
@@ -398,43 +415,72 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info);
}
+int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ .nsiblings = rt->fib6_nsiblings,
+ };
+
+ rt->fib6_table->fib_seq++;
+ return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
+}
+
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
+ struct netlink_ext_ack *extack;
};
-static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
+static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
- if (rt == arg->net->ipv6.fib6_null_entry)
- return;
- call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+ enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ int err;
+
+ if (!rt || rt == arg->net->ipv6.fib6_null_entry)
+ return 0;
+
+ if (rt->fib6_nsiblings)
+ err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
+ rt,
+ rt->fib6_nsiblings,
+ arg->extack);
+ else
+ err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
+ arg->extack);
+
+ return err;
}
static int fib6_node_dump(struct fib6_walker *w)
{
- struct fib6_info *rt;
+ int err;
- for_each_fib6_walker_rt(w)
- fib6_rt_dump(rt, w->args);
+ err = fib6_rt_dump(w->leaf, w->args);
w->leaf = NULL;
- return 0;
+ return err;
}
-static void fib6_table_dump(struct net *net, struct fib6_table *tb,
- struct fib6_walker *w)
+static int fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
{
+ int err;
+
w->root = &tb->tb6_root;
spin_lock_bh(&tb->tb6_lock);
- fib6_walk(net, w);
+ err = fib6_walk(net, w);
spin_unlock_bh(&tb->tb6_lock);
+ return err;
}
/* Called with rcu_read_lock() */
-int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+int fib6_tables_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib6_dump_arg arg;
struct fib6_walker *w;
unsigned int h;
+ int err = 0;
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
@@ -443,19 +489,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
w->func = fib6_node_dump;
arg.net = net;
arg.nb = nb;
+ arg.extack = extack;
w->args = &arg;
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib6_table_dump(net, tb, w);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ err = fib6_table_dump(net, tb, w);
+ if (err < 0)
+ goto out;
+ }
}
+out:
kfree(w);
- return 0;
+ return err;
}
static int fib6_dump_node(struct fib6_walker *w)
@@ -1021,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool notify_sibling_rt = false;
u16 nlflags = NLM_F_EXCL;
int err;
@@ -1112,6 +1164,7 @@ next_iter:
/* Find the first route that have the same metric */
sibling = leaf;
+ notify_sibling_rt = true;
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -1121,6 +1174,7 @@ next_iter:
}
sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
+ notify_sibling_rt = false;
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -1147,10 +1201,21 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- if (!info->skip_notify_kernel) {
+ /* The route should only be notified if it is the first
+ * route in the node or if it is added as a sibling
+ * route to the first route in the node.
+ */
+ if (!info->skip_notify_kernel &&
+ (notify_sibling_rt || ins == &fn->leaf)) {
+ enum fib_event_type fib_event;
+
+ if (notify_sibling_rt)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
+ fib_event, rt,
+ extack);
if (err) {
struct fib6_info *sibling, *next_sibling;
@@ -1194,7 +1259,7 @@ add:
return -ENOENT;
}
- if (!info->skip_notify_kernel) {
+ if (!info->skip_notify_kernel && ins == &fn->leaf) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1443,6 +1508,8 @@ out:
}
#endif
goto failure;
+ } else if (fib6_requires_src(rt)) {
+ fib6_routes_require_src_inc(info->nl_net);
}
return err;
@@ -1825,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct fib6_info __rcu **rtp, struct nl_info *info)
{
+ struct fib6_info *leaf, *replace_rt = NULL;
struct fib6_walker *w;
struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
+ bool notify_del = false;
RT6_TRACE("fib6_del_route\n");
+ /* If the deleted route is the first in the node and it is not part of
+ * a multipath route, then we need to replace it with the next route
+ * in the node, if exists.
+ */
+ leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (leaf == rt && !rt->fib6_nsiblings) {
+ if (rcu_access_pointer(rt->fib6_next))
+ replace_rt = rcu_dereference_protected(rt->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ else
+ notify_del = true;
+ }
+
/* Unlink it */
*rtp = rt->fib6_next;
rt->fib6_node = NULL;
@@ -1849,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
+ /* The route is deleted from a multipath route. If this
+ * multipath route is the first route in the node, then we need
+ * to emit a delete notification. Otherwise, we need to skip
+ * the notification.
+ */
+ if (rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf))
+ notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
@@ -1884,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
- if (!info->skip_notify_kernel)
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+ if (!info->skip_notify_kernel) {
+ if (notify_del)
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
+ rt, NULL);
+ else if (replace_rt)
+ call_fib6_entry_notifiers_replace(net, replace_rt);
+ }
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
@@ -1915,6 +2011,8 @@ int fib6_del(struct fib6_info *rt, struct nl_info *info)
struct fib6_info *cur = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
if (rt == cur) {
+ if (fib6_requires_src(cur))
+ fib6_routes_require_src_dec(info->nl_net);
fib6_del_route(table, fn, rtp, info);
return 0;
}
@@ -2473,14 +2571,13 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
+ ++(*pos);
if (!v)
goto iter_table;
n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
- if (n) {
- ++*pos;
+ if (n)
return n;
- }
iter_table:
ipv6_route_check_sernum(iter);
@@ -2488,8 +2585,6 @@ iter_table:
r = fib6_walk_continue(&iter->w);
spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
- if (v)
- ++*pos;
return iter->w.leaf;
} else if (r < 0) {
fib6_walker_unlink(net, &iter->w);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index dd2d0b963260..55bfc5149d0c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (unlikely(!tun_info ||
!(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET6))
- return -EINVAL;
+ goto tx_err;
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
@@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
dsfield = key->tos;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto tx_err;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (tun_info->options_len < sizeof(*md))
goto tx_err;
+ md = ip_tunnel_info_opts(tun_info);
tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
NEXTHDR_GRE);
@@ -1466,7 +1466,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
dev->mtu -= 8;
if (tunnel->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
netif_keep_dst(dev);
}
ip6gre_tnl_init_features(dev);
@@ -1894,7 +1893,6 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2170,8 +2168,8 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
@@ -2192,11 +2190,11 @@ static void ip6erspan_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6erspan_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..e0086758b6ee 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ out:
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fa014d5f1732..7b089d0ac8cd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -80,15 +80,33 @@ static void ip6_sublist_rcv_finish(struct list_head *head)
{
struct sk_buff *skb, *next;
- list_for_each_entry_safe(skb, next, head, list)
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
dst_input(skb);
+ }
+}
+
+static bool ip6_can_use_hint(const struct sk_buff *skb,
+ const struct sk_buff *hint)
+{
+ return hint && !skb_dst(skb) &&
+ ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr);
+}
+
+static struct sk_buff *ip6_extract_route_hint(const struct net *net,
+ struct sk_buff *skb)
+{
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ return NULL;
+
+ return skb;
}
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
+ struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
@@ -102,9 +120,15 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip6_rcv(skb);
if (!skb)
continue;
- ip6_rcv_finish_core(net, sk, skb);
+
+ if (ip6_can_use_hint(skb, hint))
+ skb_dst_copy(skb, hint);
+ else
+ ip6_rcv_finish_core(net, sk, skb);
dst = skb_dst(skb);
if (curr_dst != dst) {
+ hint = ip6_extract_route_hint(net, skb);
+
/* dispatch old sublist */
if (!list_empty(&sublist))
ip6_sublist_rcv_finish(&sublist);
@@ -221,6 +245,16 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (ipv6_addr_is_multicast(&hdr->saddr))
goto err;
+ /* While RFC4291 is not explicit about v4mapped addresses
+ * in IPv6 headers, it seems clear linux dual-stack
+ * model can not deal properly with these.
+ * Security models could be fooled by ::ffff:127.0.0.1 for example.
+ *
+ * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
+ */
+ if (ipv6_addr_v4mapped(&hdr->saddr))
+ goto err;
+
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
@@ -313,7 +347,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
- ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+ if (!list_empty(&sublist))
+ ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
@@ -369,7 +404,7 @@ resubmit_final:
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded
indefinitely. */
- nf_reset(skb);
+ nf_reset_ct(skb);
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e49fd62eea9..087304427bbb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -160,7 +160,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
skb->protocol = htons(ETH_P_IPV6);
@@ -173,7 +173,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
+ net, sk, skb, indev, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = priority;
skb->mark = mark;
mtu = dst_mtu(dst);
@@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
+ ktime_t tstamp = skb->tstamp;
int hroom, err = 0;
__be32 frag_id;
u8 *prevhdr, nexthdr = 0;
@@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -913,6 +915,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
+ frag->tstamp = tstamp;
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1141,19 +1144,19 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
int err;
- err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1185,7 +1188,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (dst)
return dst;
- dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
if (connected && !IS_ERR(dst))
ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
@@ -1294,6 +1297,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
+ cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1764,7 +1768,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 754a484d35df..5d65436ad5ad 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, int link,
+ const struct in6_addr *remote, const struct in6_addr *local)
{
unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
+ struct ip6_tnl *t, *cand = NULL;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else
+ cand = t;
}
memset(&any, 0, sizeof(any));
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_any(&t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !ipv6_addr_any(&t->parms.laddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
+ if (cand)
+ return cand;
+
t = rcu_dereference(ip6n->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
@@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ p->link == t->parms.link) {
if (create)
return ERR_PTR(-EEXIST);
@@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr);
if (!t)
goto out;
@@ -640,7 +661,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst_update_pmtu(skb2, rel_info);
+ skb_dst_update_pmtu_no_confirm(skb2, rel_info);
}
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -887,7 +908,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
int ret = -1;
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr);
if (t) {
u8 tproto = READ_ONCE(t->parms.proto);
@@ -1132,7 +1153,7 @@ route_lookup:
mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
IPV6_MIN_MTU : IPV4_MIN_MTU);
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
@@ -1420,8 +1441,10 @@ tx_err:
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
+ struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
+ unsigned int mtu;
int t_hlen;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1457,22 +1480,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, NULL, strict);
+ if (rt) {
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
- if (!rt)
- return;
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ dev->mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
- ip6_rt_put(rt);
}
}
@@ -1877,10 +1903,8 @@ static int ip6_tnl_dev_init(struct net_device *dev)
if (err)
return err;
ip6_tnl_link_config(t);
- if (t->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
+ if (t->parms.collect_md)
netif_keep_dst(dev);
- }
return 0;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 024db17386d2..524006aa0d78 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -449,8 +449,17 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
int err = -1;
int mtu;
- if (!dst)
- goto tx_err_link_failure;
+ if (!dst) {
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+ skb_dst_set(skb, dst);
+ }
dst_hold(dst);
dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
@@ -479,7 +488,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e80d36c5073d..bfa49ff70531 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
static unsigned int ip6mr_rules_seq_read(struct net *net)
@@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1148,8 +1150,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
* Create a new entry if allowable
*/
- if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
- (c = ip6mr_cache_alloc_unres()) == NULL) {
+ c = ip6mr_cache_alloc_unres();
+ if (!c) {
spin_unlock_bh(&mfc_unres_lock);
kfree_skb(skb);
@@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net)
return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
}
-static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
- ip6mr_mr_table_iter, &mrt_lock);
+ ip6mr_mr_table_iter, &mrt_lock, extack);
}
static struct notifier_block ip6_mr_notifier = {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 264c292e7dcc..79fc012dd2ca 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -363,8 +363,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
- !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_RAW) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
retv = -EPERM;
break;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7f3f13c37916..eaa4c2cc2fbb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -787,14 +787,15 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
if (pmc) {
im->idev = pmc->idev;
if (im->mca_sfmode == MCAST_INCLUDE) {
- im->mca_tomb = pmc->mca_tomb;
- im->mca_sources = pmc->mca_sources;
+ swap(im->mca_tomb, pmc->mca_tomb);
+ swap(im->mca_sources, pmc->mca_sources);
for (psf = im->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = idev->mc_qrv;
} else {
im->mca_crcount = idev->mc_qrv;
}
in6_dev_put(pmc->idev);
+ ip6_mc_clear_src(pmc);
kfree(pmc);
}
spin_unlock_bh(&im->mca_lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 083cc1c94cd3..53caf59c591e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -196,6 +196,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 61819ed858b1..409e79b84a83 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -113,12 +113,13 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
EXPORT_SYMBOL_GPL(__nf_ip6_route);
int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
- struct nf_ct_bridge_frag_data *data,
+ struct nf_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
- const struct nf_ct_bridge_frag_data *data,
+ const struct nf_bridge_frag_data *data,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
unsigned int mtu, hlen;
@@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -215,6 +217,7 @@ slow_path:
goto blackhole;
}
+ skb2->tstamp = tstamp;
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6120a7800975..0594131fa46d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -128,9 +128,9 @@ config IP6_NF_MATCH_HL
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
---help---
- This is a backwards-compat option for the user's convenience
- (e.g. when running oldconfig). It selects
- CONFIG_NETFILTER_XT_MATCH_HL.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
config IP6_NF_MATCH_IPV6HEADER
tristate '"ipv6header" IPv6 Extension Headers Match'
@@ -170,13 +170,13 @@ config IP6_NF_MATCH_RT
To compile it as a module, choose M here. If unsure, say N.
config IP6_NF_MATCH_SRH
- tristate '"srh" Segment Routing header match support'
- depends on NETFILTER_ADVANCED
- help
- srh matching allows you to match packets based on the segment
+ tristate '"srh" Segment Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ srh matching allows you to match packets based on the segment
routing header of the packet.
- To compile it as a module, choose M here. If unsure, say N.
+ To compile it as a module, choose M here. If unsure, say N.
# The targets
config IP6_NF_TARGET_HL
@@ -184,9 +184,9 @@ config IP6_NF_TARGET_HL
depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
select NETFILTER_XT_TARGET_HL
---help---
- This is a backwards-compatible option for the user's convenience
- (e.g. when running oldconfig). It selects
- CONFIG_NETFILTER_XT_TARGET_HL.
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
config IP6_NF_FILTER
tristate "Packet filtering"
@@ -245,14 +245,14 @@ config IP6_NF_RAW
# security table for MAC policy
config IP6_NF_SECURITY
- tristate "Security table"
- depends on SECURITY
- depends on NETFILTER_ADVANCED
- help
- This option adds a `security' table to iptables, for use
- with Mandatory Access Control (MAC) policy.
-
- If unsure, say N.
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
config IP6_NF_NAT
tristate "ip6tables NAT support"
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5cdb4a69d277..fd1f52a21bf1 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
- opts.mss_encode = opts.mss;
- opts.mss = info->mss;
+ opts.mss_encode = opts.mss_option;
+ opts.mss_option = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 0fc6326ef499..c52ff929c93b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -16,7 +16,7 @@
#include <net/ipv6.h>
#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
MODULE_LICENSE("GPL");
@@ -42,7 +42,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
len = skb->len - ptr;
temp = 0;
- while (ip6t_ext_hdr(nexthdr)) {
+ while (nf_ip6_ext_hdr(nexthdr)) {
const struct ipv6_opt_hdr *hp;
struct ipv6_opt_hdr _hdr;
int hdrlen;
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index e6c9da9866b1..a0a2de30be3e 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -54,7 +54,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
return;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- nf_reset(skb);
+ nf_reset_ct(skb);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
#endif
if (hooknum == NF_INET_PRE_ROUTING ||
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index f6d9a48c7a2a..a8566ee12e83 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -10,6 +10,8 @@
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_ipv6,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index f53bd8f01219..22b80db6d882 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -18,7 +18,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -70,7 +70,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
fragment = 0;
ptr = ip6hoff + sizeof(struct ipv6hdr);
currenthdr = ih->nexthdr;
- while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+ while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
struct ipv6_opt_hdr _hdr;
const struct ipv6_opt_hdr *hp;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 437d95545c31..b9df879c48d3 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -12,7 +12,6 @@
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/inet6_hashtables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_socket.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 34d51cd426b0..6bac68fb27a3 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -150,4 +150,4 @@ EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
-MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
+MODULE_DESCRIPTION("Netfilter IPv6 transparent proxy support");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 87d2d8c1db7c..98ac32b49d8c 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -223,7 +223,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net)
return 0;
}
-static void __net_init ping_v6_proc_exit_net(struct net *net)
+static void __net_exit ping_v6_proc_exit_net(struct net *net)
{
remove_proc_entry("icmp6", net->proc_net);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8a6131991e38..dfe5e603ffe1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -215,7 +215,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
/* Not releasing hash table! */
if (clone) {
- nf_reset(clone);
+ nf_reset_ct(clone);
rawv6_rcv(sk, clone);
}
}
@@ -646,7 +646,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_put(skb, length);
@@ -810,6 +810,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -891,6 +892,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = ipv6_fixup_options(&opt_space, opt);
fl6.flowi6_proto = proto;
+ fl6.flowi6_mark = ipc6.sockc.mark;
if (!hdrincl) {
rfv.msg = msg;
@@ -923,7 +925,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fd059e08785a..4fbdc60b4e07 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -95,7 +95,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
@@ -227,7 +228,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
struct net_device *dev = dst->dev;
struct rt6_info *rt = (struct rt6_info *)dst;
- daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
+ daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
return;
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
}
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
}
@@ -621,6 +623,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
+ unsigned long last_probe;
struct neighbour *neigh;
struct net_device *dev;
struct inet6_dev *idev;
@@ -633,12 +636,13 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (fib6_nh->fib_nh_gw_family)
+ if (!fib6_nh->fib_nh_gw_family)
return;
nh_gw = &fib6_nh->fib_nh_gw6;
dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
+ last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
@@ -654,13 +658,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, fib6_nh->last_probe +
+ } else if (time_after(jiffies, last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
- if (work) {
- fib6_nh->last_probe = jiffies;
+ if (!work || cmpxchg(&fib6_nh->last_probe,
+ last_probe, jiffies) != last_probe) {
+ kfree(work);
+ } else {
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -1475,11 +1481,11 @@ static u32 rt6_exception_hash(const struct in6_addr *dst,
u32 val;
net_get_random_once(&seed, sizeof(seed));
- val = jhash(dst, sizeof(*dst), seed);
+ val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
#ifdef CONFIG_IPV6_SUBTREES
if (src)
- val = jhash(src, sizeof(*src), val);
+ val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
#endif
return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}
@@ -2291,10 +2297,7 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
if (!icmph)
goto out;
- if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
- icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
- icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
- icmph->icmp6_type != ICMPV6_PARAMPROB)
+ if (!icmpv6_is_err(icmph->icmp6_type))
goto out;
inner_iph = skb_header_pointer(skb,
@@ -2691,7 +2694,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
- const struct ipv6hdr *iph, u32 mtu)
+ const struct ipv6hdr *iph, u32 mtu,
+ bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
struct rt6_info *rt6 = (struct rt6_info *)dst;
@@ -2709,7 +2713,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
daddr = NULL;
saddr = NULL;
}
- dst_confirm_neigh(dst, daddr);
+
+ if (confirm_neigh)
+ dst_confirm_neigh(dst, daddr);
+
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu >= dst_mtu(dst))
return;
@@ -2725,10 +2732,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_lock();
res.f6i = rcu_dereference(rt6->from);
- if (!res.f6i) {
- rcu_read_unlock();
- return;
- }
+ if (!res.f6i)
+ goto out_unlock;
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
@@ -2744,10 +2750,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
/* fib6_info uses a nexthop that does not have fib6_nh
* using the dst->dev + gw. Should be impossible.
*/
- if (!arg.match) {
- rcu_read_unlock();
- return;
- }
+ if (!arg.match)
+ goto out_unlock;
res.nh = arg.match;
} else {
@@ -2760,14 +2764,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6_insert_exception(nrt6, &res))
dst_release_immediate(&nrt6->dst);
}
+out_unlock:
rcu_read_unlock();
}
}
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
+ confirm_neigh);
}
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@@ -2786,7 +2793,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -3385,6 +3392,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
int err;
fib6_nh->fib_nh_family = AF_INET6;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ fib6_nh->last_probe = jiffies;
+#endif
err = -ENODEV;
if (cfg->fc_ifindex) {
@@ -3747,6 +3757,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
struct fib6_info *sibling, *next_sibling;
+ struct fib6_node *fn;
/* prefer to send a single notification with all hops */
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
@@ -3762,12 +3773,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1;
}
+ /* 'rt' points to the first sibling route. If it is not the
+ * leaf, then we do not need to send a notification. Otherwise,
+ * we need to check if the last sibling has a next route or not
+ * and emit a replace or delete notification, respectively.
+ */
info->skip_notify_kernel = 1;
- call_fib6_multipath_entry_notifiers(net,
- FIB_EVENT_ENTRY_DEL,
- rt,
- rt->fib6_nsiblings,
- NULL);
+ fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (rcu_access_pointer(fn->leaf) == rt) {
+ struct fib6_info *last_sibling, *replace_rt;
+
+ last_sibling = list_last_entry(&rt->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
+ replace_rt = rcu_dereference_protected(
+ last_sibling->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ if (replace_rt)
+ call_fib6_entry_notifiers_replace(net,
+ replace_rt);
+ else
+ call_fib6_multipath_entry_notifiers(net,
+ FIB_EVENT_ENTRY_DEL,
+ rt, rt->fib6_nsiblings,
+ NULL);
+ }
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
@@ -4388,13 +4419,14 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
.fc_ifindex = idev->dev->ifindex,
- .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
+ .fc_flags = RTF_UP | RTF_NONEXTHOP,
.fc_dst = *addr,
.fc_dst_len = 128,
.fc_protocol = RTPROT_KERNEL,
.fc_nlinfo.nl_net = net,
.fc_ignore_dev_down = true,
};
+ struct fib6_info *f6i;
if (anycast) {
cfg.fc_type = RTN_ANYCAST;
@@ -4404,7 +4436,10 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
cfg.fc_flags |= RTF_LOCAL;
}
- return ip6_route_info_create(&cfg, gfp_flags, NULL);
+ f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+ if (!IS_ERR(f6i))
+ f6i->dst_nocount = true;
+ return f6i;
}
/* remove deleted ip from prefsrc entries */
@@ -5011,12 +5046,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
+static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
+{
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool should_notify = false;
+ struct fib6_info *leaf;
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+
+ leaf = rcu_dereference(fn->leaf);
+ if (!leaf)
+ goto out;
+
+ if (rt == leaf ||
+ (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf)))
+ should_notify = true;
+out:
+ rcu_read_unlock();
+
+ return should_notify;
+}
+
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
- enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
@@ -5141,13 +5201,27 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++;
}
- event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
- err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
- rt_notif, nhn - 1, extack);
- if (err) {
- /* Delete all the siblings that were just added */
- err_nh = NULL;
- goto add_errout;
+ /* An in-kernel notification should only be sent in case the new
+ * multipath route is added as the first route in the node, or if
+ * it was appended to it. We pass 'rt_notif' since it is the first
+ * sibling and might allow us to skip some checks in the replace case.
+ */
+ if (ip6_route_mpath_should_notify(rt_notif)) {
+ enum fib_event_type fib_event;
+
+ if (rt_notif->fib6_nsiblings != nhn - 1)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+
+ err = call_fib6_multipath_entry_notifiers(info->nl_net,
+ fib_event, rt_notif,
+ nhn - 1, extack);
+ if (err) {
+ /* Delete all the siblings that were just added */
+ err_nh = NULL;
+ goto add_errout;
+ }
}
/* success ... tell user about new route */
@@ -5325,11 +5399,11 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
if (nexthop_is_multipath(nh)) {
struct nlattr *mp;
- mp = nla_nest_start(skb, RTA_MULTIPATH);
+ mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
if (!mp)
goto nla_put_failure;
- if (nexthop_mpath_fill_node(skb, nh))
+ if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
goto nla_put_failure;
nla_nest_end(skb, mp);
@@ -5337,7 +5411,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
struct fib6_nh *fib6_nh;
fib6_nh = nexthop_fib6_nh(nh);
- if (fib_nexthop_info(skb, &fib6_nh->nh_common,
+ if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
flags, false) < 0)
goto nla_put_failure;
}
@@ -5466,13 +5540,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
- rt->fib6_nh->fib_nh_weight) < 0)
+ rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
- sibling->fib6_nh->fib_nh_weight) < 0)
+ sibling->fib6_nh->fib_nh_weight,
+ AF_INET6) < 0)
goto nla_put_failure;
}
@@ -5489,7 +5564,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
rtm->rtm_flags |= nh_flags;
} else {
- if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
+ if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
&nh_flags, false) < 0)
goto nla_put_failure;
@@ -5501,6 +5576,13 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
expires -= jiffies;
}
+ if (!dst) {
+ if (rt->offload)
+ rtm->rtm_flags |= RTM_F_OFFLOAD;
+ if (rt->trap)
+ rtm->rtm_flags |= RTM_F_TRAP;
+ }
+
if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
goto nla_put_failure;
@@ -6192,6 +6274,9 @@ static int __net_init ip6_route_net_init(struct net *net)
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
+#ifdef CONFIG_IPV6_SUBTREES
+ net->ipv6.fib6_routes_require_src = 0;
+#endif
#endif
net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9d4f75e0d33a..7cbc19731997 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -23,6 +23,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/dst_cache.h>
+#include <net/ip_tunnels.h>
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
@@ -81,6 +82,11 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
if (!pskb_may_pull(skb, srhoff + len))
return NULL;
+ /* note that pskb_may_pull may change pointers in header;
+ * for this reason it is necessary to reload them when needed.
+ */
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
if (!seg6_validate_srh(srh, len))
return NULL;
@@ -130,7 +136,8 @@ static bool decap_and_validate(struct sk_buff *skb, int proto)
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
- skb->encapsulation = 0;
+ if (iptunnel_pull_offloads(skb))
+ return false;
return true;
}
@@ -144,8 +151,9 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
*daddr = *addr;
}
-int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
- u32 tbl_id)
+static int
+seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id, bool local_delivery)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -153,6 +161,7 @@ int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
struct dst_entry *dst = NULL;
struct rt6_info *rt;
struct flowi6 fl6;
+ int dev_flags = 0;
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
@@ -177,7 +186,13 @@ int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
dst = &rt->dst;
}
- if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+ /* we want to discard traffic destined for local packet processing,
+ * if @local_delivery is set to false.
+ */
+ if (!local_delivery)
+ dev_flags |= IFF_LOOPBACK;
+
+ if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
dst_release(dst);
dst = NULL;
}
@@ -194,6 +209,12 @@ out:
return dst->error;
}
+int seg6_lookup_nexthop(struct sk_buff *skb,
+ struct in6_addr *nhaddr, u32 tbl_id)
+{
+ return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
+}
+
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -336,6 +357,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
@@ -365,6 +388,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
@@ -385,7 +410,9 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- seg6_lookup_nexthop(skb, NULL, slwt->table);
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
return dst_input(skb);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b2ccbc473127..98954830c40b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr)
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 16632e02e9b0..13235a012388 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -178,6 +178,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->tfo_listener = false;
+ if (IS_ENABLED(CONFIG_MPTCP))
+ treq->is_mptcp = 0;
+
if (security_inet_conn_request(sk, skb, req))
goto out_free;
@@ -235,7 +238,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5da069e91cac..eaf09e6b7844 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,13 +75,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
-static const struct inet_connection_sock_af_ops ipv6_specific;
+const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ int l3index)
{
return NULL;
}
@@ -215,7 +216,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
!ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
sk->sk_v6_daddr = usin->sin6_addr;
@@ -237,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
icsk->icsk_af_ops = &ipv6_mapped;
+ if (sk_is_mptcp(sk))
+ mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
@@ -247,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
+ if (sk_is_mptcp(sk))
+ mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_specific;
@@ -275,7 +280,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
@@ -311,10 +316,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcpv6_seq(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport));
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
@@ -406,7 +412,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
- fastopen = tp->fastopen_rsk;
+ fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
@@ -512,7 +518,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
+ sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -530,15 +537,22 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ int l3index)
{
- return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+ return tcp_md5_do_lookup(sk, l3index,
+ (union tcp_md5_addr *)addr, AF_INET6);
}
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
- return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
+ int l3index;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
+ l3index);
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
@@ -546,6 +560,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ int l3index = 0;
u8 prefixlen;
if (optlen < sizeof(cmd))
@@ -567,12 +582,30 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
+ if (dev && netif_is_l3_master(dev))
+ l3index = dev->ifindex;
+ rcu_read_unlock();
+
+ /* ok to reference set/not set outside of rcu;
+ * right now device MUST be an L3 master
+ */
+ if (!dev || !l3index)
+ return -EINVAL;
+ }
+
if (!cmd.tcpm_keylen) {
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
+ AF_INET, prefixlen,
+ l3index);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
+ AF_INET6, prefixlen, l3index);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
@@ -580,12 +613,13 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
+ AF_INET, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen,
+ GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
+ AF_INET6, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -696,17 +730,23 @@ clear_hash_noput:
#endif
static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ int dif, int sdif)
{
#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
+ int genhash, l3index;
u8 newhash[16];
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
@@ -730,10 +770,10 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
genhash ? "failed" : "mismatch",
&ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest));
+ &ip6h->daddr, ntohs(th->dest), l3index);
return true;
}
#endif
@@ -783,7 +823,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
#ifdef CONFIG_TCP_MD5SIG
@@ -803,7 +843,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label)
+ u8 tclass, __be32 label, u32 priority)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -904,10 +944,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
* Underlying function will use this to retrieve the network
* namespace
*/
- dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+ priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -930,6 +971,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
struct sock *sk1 = NULL;
#endif
__be32 label = 0;
+ u32 priority = 0;
struct net *net;
int oif = 0;
@@ -947,8 +989,18 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ int l3index;
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and inet_iif is set to it.
+ */
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
} else if (hash_location) {
+ int dif = tcp_v6_iif_l3_slave(skb);
+ int sdif = tcp_v6_sdif(skb);
+ int l3index;
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -960,13 +1012,16 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source),
- tcp_v6_iif_l3_slave(skb),
- tcp_v6_sdif(skb));
+ ntohs(th->source), dif, sdif);
if (!sk1)
goto out;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to it.
+ */
+ l3index = tcp_v6_sdif(skb) ? dif : 0;
+
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
if (!key)
goto out;
@@ -990,16 +1045,19 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb);
if (np->repflow)
label = ip6_flowlabel(ipv6h);
+ priority = sk->sk_priority;
}
- if (sk->sk_state == TCP_TIME_WAIT)
+ if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
+ priority = inet_twsk(sk)->tw_priority;
+ }
} else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h);
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
- label);
+ label, priority);
#ifdef CONFIG_TCP_MD5SIG
out:
@@ -1010,10 +1068,10 @@ out:
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
- __be32 label)
+ __be32 label, u32 priority)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label);
+ tclass, label, priority);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1025,7 +1083,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
inet_twsk_put(tw);
}
@@ -1033,6 +1091,10 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
@@ -1047,8 +1109,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
- 0, 0);
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+ 0, 0, sk->sk_priority);
}
@@ -1063,6 +1125,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
return sk;
}
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+ struct tcphdr *th, u32 *cookie)
+{
+ u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+ mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, th);
+ if (mss) {
+ *cookie = __cookie_v6_init_sequence(iph, th, &mss);
+ tcp_synq_overflow(sk);
+ }
+#endif
+ return mss;
+}
+
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
@@ -1104,6 +1181,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
+ int l3index;
#endif
struct flowi6 fl6;
@@ -1129,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->saddr = newsk->sk_v6_rcv_saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
+ if (sk_is_mptcp(newsk))
+ mptcpv6_handle_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
@@ -1247,8 +1327,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+
/* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
if (key) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
@@ -1256,7 +1338,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
+ AF_INET6, 128, l3index, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
@@ -1458,6 +1540,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
+ int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
bool refcounted;
@@ -1506,7 +1589,7 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1561,7 +1644,7 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v6_inbound_md5_hash(sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;
if (tcp_filter(sk, skb))
@@ -1717,7 +1800,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor,
};
-static const struct inet_connection_sock_af_ops ipv6_specific = {
+const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
@@ -1869,12 +1952,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
- rx_queue = sp->sk_ack_backlog;
+ rx_queue = READ_ONCE(sp->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
@@ -1885,7 +1969,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -2085,9 +2169,16 @@ int __init tcpv6_init(void)
ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
+
+ ret = mptcpv6_init();
+ if (ret)
+ goto out_tcpv6_pernet_subsys;
+
out:
return ret;
+out_tcpv6_pernet_subsys:
+ unregister_pernet_subsys(&tcpv6_net_ops);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..5dc439a391fe 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -135,7 +135,7 @@ static int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
return score;
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport) {
+ if (sk->sk_reuseport &&
+ sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (result)
+ if (result && !reuseport_has_conns(sk, false))
return result;
}
result = sk;
@@ -689,8 +690,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
__skb_push(skb, -skb_mac_offset(skb));
segs = udp_rcv_segment(sk, skb, false);
- for (skb = segs; skb; skb = next) {
- next = skb->next;
+ skb_list_walk_safe(segs, skb, next) {
__skb_pull(skb, skb_transport_offset(skb));
ret = udpv6_queue_rcv_one_skb(sk, skb);
@@ -1108,6 +1108,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
__wsum csum = 0;
int offset = skb_transport_offset(skb);
int len = skb->len - offset;
+ int datalen = len - sizeof(*uh);
/*
* Create a UDP header
@@ -1140,8 +1141,12 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
return -EIO;
}
- skb_shinfo(skb)->gso_size = cork->gso_size;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ if (datalen > cork->gso_size) {
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
+ cork->gso_size);
+ }
goto csum_partial;
}
@@ -1230,6 +1235,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = up->gso_size;
ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
/* destination address check */
if (sin6) {
@@ -1352,7 +1358,7 @@ do_udp_sendmsg:
if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
if (msg->msg_controllen) {
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 64b8f05d6735..584157a07759 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -115,8 +115,10 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
- if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
+ if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -127,12 +129,16 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
ip6_gro_compute_pseudo))
goto flush;
else if (uh->check)
- skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP,
ip6_gro_compute_pseudo);
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
- return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -144,6 +150,23 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index eecac1b7148e..fbe51d40bd7e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -187,7 +187,7 @@ skip_frag:
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb_dst(skb)->dev,
+ net, sk, skb, skb->dev, skb_dst(skb)->dev,
__xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 699e0730ce8e..af7a4b8b1e9c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
}
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, sk, skb, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
}
static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
OpenPOWER on IntegriCloud