summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c321
-rw-r--r--net/ipv6/addrlabel.c34
-rw-r--r--net/ipv6/af_inet6.c23
-rw-r--r--net/ipv6/anycast.c80
-rw-r--r--net/ipv6/esp6.c7
-rw-r--r--net/ipv6/ip6_checksum.c20
-rw-r--r--net/ipv6/ip6_fib.c80
-rw-r--r--net/ipv6/ip6_gre.c27
-rw-r--r--net/ipv6/ip6_input.c7
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c50
-rw-r--r--net/ipv6/ip6_tunnel.c29
-rw-r--r--net/ipv6/ip6_udp_tunnel.c3
-rw-r--r--net/ipv6/ip6_vti.c5
-rw-r--r--net/ipv6/ip6mr.c81
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/mcast.c18
-rw-r--r--net/ipv6/ndisc.c7
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c5
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c24
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c68
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c4
-rw-r--r--net/ipv6/raw.c31
-rw-r--r--net/ipv6/reassembly.c22
-rw-r--r--net/ipv6/route.c322
-rw-r--r--net/ipv6/seg6_iptunnel.c1
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/udp.c67
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipv6/xfrm6_input.c1
-rw-r--r--net/ipv6/xfrm6_mode_transport.c4
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c4
36 files changed, 960 insertions, 428 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2fac4ad74867..045597b9a7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -666,6 +666,7 @@ errout:
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
int h, s_h;
int idx, s_idx;
@@ -673,6 +674,21 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct inet6_dev *idev;
struct hlist_head *head;
+ if (cb->strict_check) {
+ struct netlink_ext_ack *extack = cb->extack;
+ struct netconfmsg *ncm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
+ return -EINVAL;
+ }
+ }
+
s_h = cb->args[0];
s_idx = idx = cb->args[1];
@@ -692,7 +708,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
if (inet6_netconf_fill_devconf(skb, dev->ifindex,
&idev->cnf,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF,
NLM_F_MULTI,
NETCONFA_ALL) < 0) {
@@ -709,7 +725,7 @@ cont:
if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
@@ -720,7 +736,7 @@ cont:
if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
@@ -997,6 +1013,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
if (addr_type == IPV6_ADDR_ANY ||
addr_type & IPV6_ADDR_MULTICAST ||
(!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
addr_type & IPV6_ADDR_LOOPBACK))
return ERR_PTR(-EADDRNOTAVAIL);
@@ -2398,7 +2415,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- ip6_route_add(&cfg, GFP_ATOMIC, NULL);
+ ip6_route_add(&cfg, GFP_KERNEL, NULL);
}
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -3062,7 +3079,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (addr.s6_addr32[3]) {
add_addr(idev, &addr, plen, scope);
addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
- GFP_ATOMIC);
+ GFP_KERNEL);
return;
}
@@ -3087,7 +3104,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
add_addr(idev, &addr, plen, flag);
addrconf_prefix_route(&addr, plen, 0, idev->dev,
- 0, pflags, GFP_ATOMIC);
+ 0, pflags, GFP_KERNEL);
}
}
}
@@ -3422,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
@@ -3496,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (idev->if_flags & IF_READY) {
/* device is already configured -
* but resend MLD reports, we might
@@ -3504,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ addrconf_dad_run(idev, true);
rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
@@ -3538,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
- addrconf_dad_run(idev);
+ addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4156,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
addrconf_verify_rtnl();
}
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
- if (ifp->flags & IFA_F_TENTATIVE &&
- ifp->state == INET6_IFADDR_STATE_DAD)
+ if ((ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+ if (restart)
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
addrconf_dad_kick(ifp);
+ }
spin_unlock(&ifp->lock);
}
read_unlock_bh(&idev->lock);
@@ -4201,7 +4225,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
p++;
continue;
}
- state->offset++;
return ifa;
}
@@ -4225,13 +4248,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
return ifa;
}
+ state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
- state->offset = 0;
hlist_for_each_entry_rcu(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
- state->offset++;
return ifa;
}
}
@@ -4491,6 +4513,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .len = sizeof(u32) },
[IFA_RT_PRIORITY] = { .len = sizeof(u32) },
+ [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
};
static int
@@ -4793,19 +4816,40 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
+enum addr_type_t {
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
+ int ifindex;
+ enum addr_type_t type;
+};
+
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
+ struct inet6_fill_args *args)
{
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ goto error;
+
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -4855,7 +4899,7 @@ error:
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- u32 portid, u32 seq, int event, u16 flags)
+ struct inet6_fill_args *args)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -4864,10 +4908,15 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ return -EMSGSIZE;
+
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
@@ -4881,7 +4930,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- u32 portid, u32 seq, int event, unsigned int flags)
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
@@ -4891,10 +4940,15 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ return -EMSGSIZE;
+
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
@@ -4907,68 +4961,56 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
return 0;
}
-enum addr_type_t {
- UNICAST_ADDR,
- MULTICAST_ADDR,
- ANYCAST_ADDR,
-};
-
/* called with rcu_read_lock() */
static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
- struct netlink_callback *cb, enum addr_type_t type,
- int s_ip_idx, int *p_ip_idx)
+ struct netlink_callback *cb, int s_ip_idx,
+ struct inet6_fill_args *fillargs)
{
struct ifmcaddr6 *ifmca;
struct ifacaddr6 *ifaca;
+ int ip_idx = 0;
int err = 1;
- int ip_idx = *p_ip_idx;
read_lock_bh(&idev->lock);
- switch (type) {
+ switch (fillargs->type) {
case UNICAST_ADDR: {
struct inet6_ifaddr *ifa;
+ fillargs->event = RTM_NEWADDR;
/* unicast address incl. temp addr */
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (++ip_idx < s_ip_idx)
- continue;
- err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDR,
- NLM_F_MULTI);
+ if (ip_idx < s_ip_idx)
+ goto next;
+ err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
break;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+next:
+ ip_idx++;
}
break;
}
case MULTICAST_ADDR:
+ fillargs->event = RTM_GETMULTICAST;
+
/* multicast address */
for (ifmca = idev->mc_list; ifmca;
ifmca = ifmca->next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_GETMULTICAST,
- NLM_F_MULTI);
+ err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
break;
case ANYCAST_ADDR:
+ fillargs->event = RTM_GETANYCAST;
/* anycast address */
for (ifaca = idev->ac_list; ifaca;
ifaca = ifaca->aca_next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_GETANYCAST,
- NLM_F_MULTI);
+ err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
if (err < 0)
break;
}
@@ -4977,42 +5019,128 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
break;
}
read_unlock_bh(&idev->lock);
- *p_ip_idx = ip_idx;
+ cb->args[2] = ip_idx;
return err;
}
+static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
+ struct inet6_fill_args *fillargs,
+ struct net **tgt_net, struct sock *sk,
+ struct netlink_callback *cb)
+{
+ struct netlink_ext_ack *extack = cb->extack;
+ struct nlattr *tb[IFA_MAX+1];
+ struct ifaddrmsg *ifm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
+ return -EINVAL;
+ }
+
+ fillargs->ifindex = ifm->ifa_index;
+ if (fillargs->ifindex) {
+ cb->answer_flags |= NLM_F_DUMP_FILTERED;
+ fillargs->flags |= NLM_F_DUMP_FILTERED;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= IFA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ if (i == IFA_TARGET_NETNSID) {
+ struct net *net;
+
+ fillargs->netnsid = nla_get_s32(tb[i]);
+ net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
+ if (IS_ERR(net)) {
+ fillargs->netnsid = -1;
+ NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
+ return PTR_ERR(net);
+ }
+ *tgt_net = net;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct inet6_fill_args fillargs = {
+ .portid = NETLINK_CB(cb->skb).portid,
+ .seq = cb->nlh->nlmsg_seq,
+ .flags = NLM_F_MULTI,
+ .netnsid = -1,
+ .type = type,
+ };
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
+ int idx, s_idx, s_ip_idx;
int h, s_h;
- int idx, ip_idx;
- int s_idx, s_ip_idx;
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
+ int err = 0;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
- s_ip_idx = ip_idx = cb->args[2];
+ s_ip_idx = cb->args[2];
+
+ if (cb->strict_check) {
+ err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
+ skb->sk, cb);
+ if (err < 0)
+ goto put_tgt_net;
+
+ err = 0;
+ if (fillargs.ifindex) {
+ dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto put_tgt_net;
+ }
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+ &fillargs);
+ }
+ goto put_tgt_net;
+ }
+ }
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
+ cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
if (h > s_h || idx > s_idx)
s_ip_idx = 0;
- ip_idx = 0;
idev = __in6_dev_get(dev);
if (!idev)
goto cont;
- if (in6_dump_addrs(idev, skb, cb, type,
- s_ip_idx, &ip_idx) < 0)
+ if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
+ &fillargs) < 0)
goto done;
cont:
idx++;
@@ -5022,9 +5150,11 @@ done:
rcu_read_unlock();
cb->args[0] = h;
cb->args[1] = idx;
- cb->args[2] = ip_idx;
+put_tgt_net:
+ if (fillargs.netnsid >= 0)
+ put_net(tgt_net);
- return skb->len;
+ return err < 0 ? err : skb->len;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5053,6 +5183,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
+ struct inet6_fill_args fillargs = {
+ .portid = NETLINK_CB(in_skb).portid,
+ .seq = nlh->nlmsg_seq,
+ .event = RTM_NEWADDR,
+ .flags = 0,
+ .netnsid = -1,
+ };
+ struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5066,15 +5204,24 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFA_TARGET_NETNSID]) {
+ fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]);
+
+ tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(in_skb).sk,
+ fillargs.netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
if (!addr)
return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = dev_get_by_index(net, ifm->ifa_index);
+ dev = dev_get_by_index(tgt_net, ifm->ifa_index);
- ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+ ifa = ipv6_get_ifaddr(tgt_net, addr, dev, 1);
if (!ifa) {
err = -EADDRNOTAVAIL;
goto errout;
@@ -5086,20 +5233,22 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_ifa;
}
- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, RTM_NEWADDR, 0);
+ err = inet6_fill_ifaddr(skb, ifa, &fillargs);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+ err = rtnl_unicast(skb, tgt_net, NETLINK_CB(in_skb).portid);
errout_ifa:
in6_ifa_put(ifa);
errout:
if (dev)
dev_put(dev);
+ if (fillargs.netnsid >= 0)
+ put_net(tgt_net);
+
return err;
}
@@ -5107,13 +5256,20 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
{
struct sk_buff *skb;
struct net *net = dev_net(ifa->idev->dev);
+ struct inet6_fill_args fillargs = {
+ .portid = 0,
+ .seq = 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ };
int err = -ENOBUFS;
skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
if (!skb)
goto errout;
- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ err = inet6_fill_ifaddr(skb, ifa, &fillargs);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
WARN_ON(err == -EMSGSIZE);
@@ -5529,6 +5685,31 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ifinfomsg *ifm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+ ifm->ifi_change || ifm->ifi_index) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -5538,6 +5719,16 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct inet6_dev *idev;
struct hlist_head *head;
+ /* only requests using strict checking can pass data to
+ * influence the dump
+ */
+ if (cb->strict_check) {
+ int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_h = cb->args[0];
s_idx = cb->args[1];
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..0d1ee82ee55b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -458,20 +458,52 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
return 0;
}
+static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrlblmsg *ifal;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
+ return -EINVAL;
+ }
+
+ ifal = nlmsg_data(nlh);
+ if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
+ ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
int idx = 0, s_idx = cb->args[0];
int err;
+ if (cb->strict_check) {
+ err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
rcu_read_lock();
hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
net->ipv6.ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
NLM_F_MULTI);
if (err < 0)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 673bba31eb18..f0cd291034f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -209,6 +209,7 @@ lookup_protocol:
np->hop_limit = -1;
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
+ np->mc_all = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
@@ -467,12 +468,10 @@ void inet6_destroy_sock(struct sock *sk)
/* Release rx options */
skb = xchg(&np->pktoptions, NULL);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
skb = xchg(&np->rxpmtu, NULL);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
/* Free flowlabels */
fl6_free_socklist(sk);
@@ -902,6 +901,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.inet6_bind = __inet6_bind,
+ .udp6_lib_lookup = __udp6_lib_lookup,
};
static int __init inet6_init(void)
@@ -938,14 +938,14 @@ static int __init inet6_init(void)
err = proto_register(&pingv6_prot, 1);
if (err)
- goto out_unregister_ping_proto;
+ goto out_unregister_raw_proto;
/* We MUST register RAW sockets before we create the ICMP6,
* IGMP6, or NDISC control sockets.
*/
err = rawv6_init();
if (err)
- goto out_unregister_raw_proto;
+ goto out_unregister_ping_proto;
/* Register the family here so that the init calls below will
* be able to create sockets. (?? is this dangerous ??)
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = ipv6_anycast_init();
+ if (err)
+ goto ipv6_anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ ipv6_frag_fail:
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ ipv6_anycast_cleanup();
+ipv6_anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
@@ -1113,11 +1118,11 @@ netfilter_fail:
igmp_fail:
ndisc_cleanup();
ndisc_fail:
- ip6_mr_cleanup();
+ icmpv6_cleanup();
icmp_fail:
- unregister_pernet_subsys(&inet6_net_ops);
+ ip6_mr_cleanup();
ipmr_fail:
- icmpv6_cleanup();
+ unregister_pernet_subsys(&inet6_net_ops);
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..94999058e110 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+ unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+ spin_lock(&acaddr_hash_lock);
+ hlist_del_init_rcu(&aca->aca_addr_lst);
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
}
+static void aca_free_rcu(struct rcu_head *h)
+{
+ struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+ fib6_info_release(aca->aca_rt);
+ kfree(aca);
+}
+
static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
- fib6_info_release(ac->aca_rt);
- kfree(ac);
+ call_rcu(&ac->rcu, aca_free_rcu);
}
}
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
aca->aca_addr = *addr;
fib6_info_hold(f6i);
aca->aca_rt = f6i;
+ INIT_HLIST_NODE(&aca->aca_addr_lst);
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
+ ipv6_add_acaddr_hash(net, aca);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
else
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
+
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct net_device *nh_dev;
+ struct ifacaddr6 *aca;
bool found = false;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+ aca_addr_lst) {
+ nh_dev = fib6_info_nh_dev(aca->aca_rt);
+ if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+ continue;
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
+
+/* Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 88a7579c23bd..63b2b66f9dfa 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -601,12 +601,11 @@ static void esp_input_done_esn(struct crypto_async_request *base, int err)
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ip_esp_hdr *esph;
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int ivlen = crypto_aead_ivsize(aead);
- int elen = skb->len - sizeof(*esph) - ivlen;
+ int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
int nfrags;
int assoclen;
int seqhilen;
@@ -616,7 +615,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
u8 *iv;
struct scatterlist *sg;
- if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen)) {
ret = -EINVAL;
goto out;
}
@@ -626,7 +625,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
}
- assoclen = sizeof(*esph);
+ assoclen = sizeof(struct ip_esp_hdr);
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 547515e8450a..377717045f8f 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
* Note, we are only interested in != 0 or == 0, thus the
* force to int.
*/
- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
- ip6_compute_pseudo);
+ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+ if (err)
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+ /* If SW calculated the value, we know it's bad */
+ if (skb->csum_complete_sw)
+ return 1;
+
+ /* HW says the value is bad. Let's validate that.
+ * skb->csum is no longer the full packet checksum,
+ * so don't treat is as such.
+ */
+ skb_checksum_complete_unset(skb);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d212738e9d10..ae3786132c23 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -29,6 +29,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
@@ -46,6 +47,7 @@ struct fib6_cleaner {
int (*func)(struct fib6_info *, void *arg);
int sernum;
void *arg;
+ bool skip_notify;
};
#ifdef CONFIG_IPV6_SUBTREES
@@ -160,8 +162,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
-
atomic_inc(&f6i->fib6_ref);
return f6i;
@@ -171,7 +171,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
struct rt6_exception_bucket *bucket;
- struct dst_metrics *m;
WARN_ON(f6i->fib6_node);
@@ -196,14 +195,16 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
*ppcpu_rt = NULL;
}
}
+
+ free_percpu(f6i->rt6i_pcpu);
}
+ lwtstate_put(f6i->fib6_nh.nh_lwtstate);
+
if (f6i->fib6_nh.nh_dev)
dev_put(f6i->fib6_nh.nh_dev);
- m = f6i->fib6_metrics;
- if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
- kfree(m);
+ ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
@@ -566,17 +567,31 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
+ struct rt6_rtnl_dump_arg arg = {};
unsigned int h, s_h;
unsigned int e = 0, s_e;
- struct rt6_rtnl_dump_arg arg;
struct fib6_walker *w;
struct fib6_table *tb;
struct hlist_head *head;
int res = 0;
- s_h = cb->args[0];
- s_e = cb->args[1];
+ if (cb->strict_check) {
+ int err;
+
+ err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
+ if (err < 0)
+ return err;
+ } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
+ struct rtmsg *rtm = nlmsg_data(nlh);
+
+ arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED);
+ }
+
+ /* fib entries are never clones */
+ if (arg.filter.flags & RTM_F_CLONED)
+ goto out;
w = (void *)cb->args[2];
if (!w) {
@@ -602,6 +617,23 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
arg.net = net;
w->args = &arg;
+ if (arg.filter.table_id) {
+ tb = fib6_get_table(net, arg.filter.table_id);
+ if (!tb) {
+ if (arg.filter.dump_all_families)
+ goto out;
+
+ NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
+ return -ENOENT;
+ }
+
+ res = fib6_dump_table(tb, skb, cb);
+ goto out;
+ }
+
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
@@ -611,16 +643,16 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
res = fib6_dump_table(tb, skb, cb);
if (res != 0)
- goto out;
+ goto out_unlock;
next:
e++;
}
}
-out:
+out_unlock:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
-
+out:
res = res < 0 ? res : skb->len;
if (res <= 0)
fib6_dump_end(cb);
@@ -987,7 +1019,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_clean_expires(iter);
else
fib6_set_expires(iter, rt->expires);
- fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
+
+ if (rt->fib6_pmtu)
+ fib6_metric_set(iter, RTAX_MTU,
+ rt->fib6_pmtu);
return -EEXIST;
}
/* If we have the same destination and the same metric,
@@ -1947,6 +1982,7 @@ static int fib6_clean_node(struct fib6_walker *w)
struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
struct nl_info info = {
.nl_net = c->net,
+ .skip_notify = c->skip_notify,
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
@@ -1998,7 +2034,7 @@ static int fib6_clean_node(struct fib6_walker *w)
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct fib6_info *, void *arg),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_cleaner c;
@@ -2010,13 +2046,14 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.sernum = sernum;
c.arg = arg;
c.net = net;
+ c.skip_notify = skip_notify;
fib6_walk(net, &c.w);
}
static void __fib6_clean_all(struct net *net,
int (*func)(struct fib6_info *, void *),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_table *table;
struct hlist_head *head;
@@ -2028,7 +2065,7 @@ static void __fib6_clean_all(struct net *net,
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, sernum, arg);
+ func, sernum, arg, skip_notify);
spin_unlock_bh(&table->tb6_lock);
}
}
@@ -2038,14 +2075,21 @@ static void __fib6_clean_all(struct net *net,
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
void *arg)
{
- __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
+}
+
+void fib6_clean_all_skip_notify(struct net *net,
+ int (*func)(struct fib6_info *, void *),
+ void *arg)
+{
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
}
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
- __fib6_clean_all(net, NULL, new_sernum, NULL);
+ __fib6_clean_all(net, NULL, new_sernum, NULL, false);
}
/*
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3794b0f52..515adbdba1d2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -427,35 +427,17 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
- int grehlen = sizeof(*greh);
+ struct tnl_ptk_info tpi;
struct ip6_tnl *t;
- int key_off = 0;
- __be16 flags;
- __be32 key;
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- flags = greh->flags;
- if (flags & (GRE_VERSION | GRE_ROUTING))
+ if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
+ offset) < 0)
return;
- if (flags & GRE_CSUM)
- grehlen += 4;
- if (flags & GRE_KEY) {
- key_off = grehlen + offset;
- grehlen += 4;
- }
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
ipv6h = (const struct ipv6hdr *)skb->data;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
-
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
- key, greh->protocol);
+ tpi.key, tpi.proto);
if (!t)
return;
@@ -1778,6 +1760,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
+ parms->erspan_ver = 1;
if (data[IFLA_GRE_ERSPAN_VER])
parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6242682be876..c1d85830c906 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -178,7 +178,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
*/
if ((ipv6_addr_loopback(&hdr->saddr) ||
ipv6_addr_loopback(&hdr->daddr)) &&
- !(dev->flags & IFF_LOOPBACK))
+ !(dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(dev))
goto err;
/* RFC4291 Errata ID: 3480
@@ -295,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
continue;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 37ff4805b20c..c7e495f12011 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -115,6 +115,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
payload_len = skb->len - nhoff - sizeof(*ipv6h);
ipv6h->payload_len = htons(payload_len);
skb->network_header = (u8 *)ipv6h - skb->head;
+ skb_reset_mac_len(skb);
if (udpfrag) {
int err = ip6_find_1stfragopt(skb, &prevhdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16f200f06500..4591ca4bdbe8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -195,39 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
int hlimit = -1;
u32 mtu;
- if (opt) {
- unsigned int head_room;
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ if (opt)
+ head_room += opt->opt_nflen + opt->opt_flen;
- /* First: exthdrs may take lots of space (~8K for now)
- MAX_HEADER is not enough.
- */
- head_room = opt->opt_nflen + opt->opt_flen;
- seg_len += head_room;
- head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
-
- if (skb_headroom(skb) < head_room) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
- * it is safe to call in our context (socket lock not held)
- */
- skb_set_owner_w(skb, (struct sock *)sk);
+ if (unlikely(skb_headroom(skb) < head_room)) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ if (!skb2) {
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -ENOBUFS;
}
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ if (opt) {
+ seg_len += opt->opt_nflen + opt->opt_flen;
+
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
+
if (opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
&fl6->saddr);
@@ -380,6 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+ skb->tstamp = 0;
return dst_output(net, sk, skb);
}
@@ -727,7 +726,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb = frag;
frag = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
kfree(tmp_hdr);
@@ -1356,7 +1355,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1380,6 +1379,7 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5df2a58d945c..a9d06d4dd057 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1184,11 +1184,14 @@ route_lookup:
}
skb_dst_set(skb, dst);
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_frag_opts(skb, &opt.ops, &proto);
+ if (hop_limit == 0) {
+ if (skb->protocol == htons(ETH_P_IP))
+ hop_limit = ip_hdr(skb)->ttl;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+ else
+ hop_limit = ip6_dst_hoplimit(dst);
}
- hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
/* Calculate max headroom for all the headers and adjust
* needed_headroom if necessary.
@@ -1202,6 +1205,11 @@ route_lookup:
if (err)
return err;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_frag_opts(skb, &opt.ops, &proto);
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1226,7 +1234,7 @@ static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph;
int encap_limit = -1;
struct flowi6 fl6;
__u8 dsfield;
@@ -1234,6 +1242,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ /* ensure we can access the full inner ip header */
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return -1;
+
+ iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
tproto = READ_ONCE(t->parms.proto);
@@ -1298,7 +1311,7 @@ static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
int encap_limit = -1;
__u16 offset;
struct flowi6 fl6;
@@ -1307,6 +1320,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ return -1;
+
+ ipv6h = ipv6_hdr(skb);
tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index b283f293ee4a..caad40d6e74d 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -15,7 +15,7 @@
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
- struct sockaddr_in6 udp6_addr;
+ struct sockaddr_in6 udp6_addr = {};
int err;
struct socket *sock = NULL;
@@ -42,6 +42,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->peer_udp_port) {
+ memset(&udp6_addr, 0, sizeof(udp6_addr));
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
sizeof(udp6_addr.sin6_addr));
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 38dec9da90d3..eeaf7455d51e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
mtu = dst_mtu(dst);
- if (!skb->ignore_df && skb->len > mtu) {
+ if (skb->len > mtu) {
skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
}
t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, list);
+ if (t)
+ unregister_netdevice_queue(t->dev, list);
}
static int __net_init vti6_init_net(struct net *net)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d0b7e0249c13..377a2ee5d9ad 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -52,6 +52,8 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
+#include <linux/nospec.h>
+
struct ip6mr_rule {
struct fib_rule common;
};
@@ -85,7 +87,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr_table *mrt);
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache);
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc6_cache *cache);
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
@@ -138,6 +141,9 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
.flags = FIB_LOOKUP_NOREF,
};
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(flp6));
+
err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
flowi6_to_flowi(flp6), 0, &arg);
if (err < 0)
@@ -164,7 +170,9 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
return -EINVAL;
}
- mrt = ip6mr_get_table(rule->fr_net, rule->table);
+ arg->table = fib_rule_get_table(rule, arg);
+
+ mrt = ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -1014,7 +1022,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
}
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
- ip6_mr_forward(net, mrt, skb, c);
+ ip6_mr_forward(net, mrt, skb->dev, skb, c);
}
}
@@ -1120,7 +1128,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
/* Queue a packet for resolution. It gets locked cache entry! */
static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
struct mfc6_cache *c;
bool found = false;
@@ -1180,6 +1188,10 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
kfree_skb(skb);
err = -ENOBUFS;
} else {
+ if (dev) {
+ skb->dev = dev;
+ skb->skb_iif = dev->ifindex;
+ }
skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1831,6 +1843,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
return -EFAULT;
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
+ vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -1905,6 +1918,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
return -EFAULT;
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
+ vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
read_lock(&mrt_lock);
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
@@ -2043,11 +2057,12 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
}
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *c)
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
- int true_vifi = ip6mr_find_vif(mrt, skb->dev);
+ int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
c->_c.mfc_un.res.pkt++;
@@ -2073,7 +2088,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif_table[vif].dev != skb->dev) {
+ if (mrt->vif_table[vif].dev != dev) {
c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2154,6 +2169,19 @@ int ip6_mr_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
};
int err;
+ struct net_device *dev;
+
+ /* skb->dev passed in is the master dev for vrfs.
+ * Get the proper interface that does have a vif associated with it.
+ */
+ dev = skb->dev;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+ if (!dev) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+ }
err = ip6mr_fib_lookup(net, &fl6, &mrt);
if (err < 0) {
@@ -2165,7 +2193,7 @@ int ip6_mr_input(struct sk_buff *skb)
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
if (!cache) {
- int vif = ip6mr_find_vif(mrt, skb->dev);
+ int vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0)
cache = ip6mr_cache_find_any(mrt,
@@ -2179,9 +2207,9 @@ int ip6_mr_input(struct sk_buff *skb)
if (!cache) {
int vif;
- vif = ip6mr_find_vif(mrt, skb->dev);
+ vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0) {
- int err = ip6mr_cache_unresolved(mrt, vif, skb);
+ int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
read_unlock(&mrt_lock);
return err;
@@ -2191,7 +2219,7 @@ int ip6_mr_input(struct sk_buff *skb)
return -ENODEV;
}
- ip6_mr_forward(net, mrt, skb, cache);
+ ip6_mr_forward(net, mrt, dev, skb, cache);
read_unlock(&mrt_lock);
@@ -2257,7 +2285,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
iph->saddr = rt->rt6i_src.addr;
iph->daddr = rt->rt6i_dst.addr;
- err = ip6mr_cache_unresolved(mrt, vif, skb2);
+ err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
read_unlock(&mrt_lock);
return err;
@@ -2433,6 +2461,33 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct fib_dump_filter filter = {};
+ int err;
+
+ if (cb->strict_check) {
+ err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
+ &filter, cb);
+ if (err < 0)
+ return err;
+ }
+
+ if (filter.table_id) {
+ struct mr_table *mrt;
+
+ mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+ if (!mrt) {
+ if (filter.dump_all_families)
+ return skb->len;
+
+ NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
+ return -ENOENT;
+ }
+ err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
+ &mfc_unres_lock, &filter);
+ return skb->len ? : err;
+ }
+
return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
- _ip6mr_fill_mroute, &mfc_unres_lock);
+ _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c0cac9cc3a28..381ce38940ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -674,6 +674,13 @@ done:
retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
break;
}
+ case IPV6_MULTICAST_ALL:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->mc_all = valbool;
+ retv = 0;
+ break;
+
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
{
@@ -1266,6 +1273,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->mcast_oif;
break;
+ case IPV6_MULTICAST_ALL:
+ val = np->mc_all;
+ break;
+
case IPV6_UNICAST_IF:
val = (__force int)htonl((__u32) np->ucast_oif);
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4ae54aaca373..21f6deb2aec9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -636,7 +636,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return true;
+ return np->mc_all;
}
read_lock(&mc->sflock);
psl = mc->sflist;
@@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
- /* callers have the socket lock and rtnl lock
- * so no other readers or writers of iml or its sflist
- */
+ write_lock_bh(&iml->sflock);
if (!iml->sflist) {
/* any-source empty exclude case */
- return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ } else {
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+ iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+ iml->sflist = NULL;
}
- err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
- iml->sflist->sl_count, iml->sflist->sl_addr, 0);
- sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
- iml->sflist = NULL;
+ write_unlock_bh(&iml->sflock);
return err;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0ec273997d1d..659ecf4e4b3c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1533,7 +1533,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!ndopts.nd_opts_rh) {
ip6_redirect_no_header(skb, dev_net(skb->dev),
- skb->dev->ifindex, 0);
+ skb->dev->ifindex);
return;
}
@@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb)
return 0;
}
- memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
-
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
+ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
ndisc_recv_ns(skb);
break;
@@ -1784,6 +1783,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&nd_tbl, dev);
+ if (!netif_carrier_ok(dev))
+ neigh_carrier_down(&nd_tbl, dev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5ae8e1c51079..8b075f0bc351 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
- .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+ .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+ rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a..29c7f1915a96 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0)
- nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ return err;
+
+ err = nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ xt_unregister_target(&masquerade_tg6_reg);
return err;
}
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8b147440fbdc..af737b47b9b5 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -65,7 +65,10 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
}
hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- BUG_ON(hp == NULL);
+ if (!hp) {
+ par->hotdrop = true;
+ return false;
+ }
/* Calculate the header length */
if (nexthdr == NEXTHDR_FRAGMENT)
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2c99b94eeca3..21bf6bf04323 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -137,7 +137,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_addr),
&_addr);
- BUG_ON(ap == NULL);
+ if (ap == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
pr_debug("i=%d temp=%d;\n", i, temp);
@@ -166,7 +169,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+ temp * sizeof(_addr),
sizeof(_addr),
&_addr);
- BUG_ON(ap == NULL);
+ if (ap == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2a14d8b65924..181da2c40f9a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
return false;
}
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
return false;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
@@ -445,11 +451,12 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
+ fp->sk = NULL;
}
sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
@@ -586,11 +593,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
*/
ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
- else
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ if (nf_ct_frag6_reasm(fq, skb, dev))
+ ret = 0;
+ skb->_skb_refdst = orefdst;
+ } else {
skb_dst_drop(skb);
+ }
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index e6eb7cf9b54f..0ad0da5a2600 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -87,18 +87,30 @@ static struct notifier_block masq_dev_notifier = {
struct masq_dev_work {
struct work_struct work;
struct net *net;
+ struct in6_addr addr;
int ifindex;
};
+static int inet_cmp(struct nf_conn *ct, void *work)
+{
+ struct masq_dev_work *w = (struct masq_dev_work *)work;
+ struct nf_conntrack_tuple *tuple;
+
+ if (!device_cmp(ct, (void *)(long)w->ifindex))
+ return 0;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
- long index;
w = container_of(work, struct masq_dev_work, work);
- index = w->ifindex;
- nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
+ nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
put_net(w->net);
kfree(w);
@@ -120,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
@@ -147,6 +159,7 @@ static int masq_inet_event(struct notifier_block *this,
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = dev->ifindex;
w->net = net;
+ w->addr = ifa->addr;
schedule_work(&w->work);
return NOTIFY_DONE;
@@ -158,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+ .notifier_call = masq_inet6_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier is already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
+
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
+ ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
- unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffe..e06c82e9dfcd 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv6_register_notifier();
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv6_type);
return ret;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 413d98bf24f4..fc2b5e845fdf 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb->tstamp = sockc->transmit_time;
- skb_dst_set(skb, &rt->dst);
- *dstp = NULL;
skb_put(skb, length);
skb_reset_network_header(skb);
@@ -660,13 +658,21 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
+
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
skb->transport_header = skb->network_header;
err = memcpy_from_msg(iph, msg, length);
- if (err)
- goto error_fault;
+ if (err) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+ *dstp = NULL;
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -675,21 +681,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (unlikely(!skb))
return 0;
+ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+ * in the error path. Since skb has been freed, the dst could
+ * have been queued for deletion.
+ */
+ rcu_read_lock();
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
- if (err)
- goto error;
+ if (err) {
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
+ goto error_check;
+ }
+ rcu_read_unlock();
out:
return 0;
-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c5b4f79296e..aa26c45486d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -145,7 +145,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
*/
if (end < fq->q.len ||
((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
- goto err;
+ goto discard_fq;
fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
@@ -162,20 +162,20 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (fq->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_fq;
fq->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_fq;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
- goto err;
+ goto discard_fq;
if (pskb_trim_rcsum(skb, end - offset))
- goto err;
+ goto discard_fq;
/* Find out which fragments are in front and at the back of us
* in the chain of fragments so far. We must know where to put
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
unsigned int nhoff;
int sum_truesize;
u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
@@ -388,7 +394,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
sub_frag_mem_limit(fq->q.net, sum_truesize);
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
@@ -418,6 +424,7 @@ out_fail:
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
+ inet_frag_kill(&fq->q);
return -1;
}
@@ -553,7 +560,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[0].data = &net->ipv6.frags.high_thresh;
table[0].extra1 = &net->ipv6.frags.low_thresh;
- table[0].extra2 = &init_net.ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7208c16302f6..059f0531f7c1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -368,7 +368,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
struct fib6_info *from;
struct inet6_dev *idev;
- dst_destroy_metrics_generic(dst);
+ ip_dst_metrics_put(dst);
rt6_uncached_list_del(rt);
idev = rt->rt6i_idev;
@@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct work_struct *w)
static void rt6_probe(struct fib6_info *rt)
{
- struct __rt6_probe_work *work;
+ struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
struct neighbour *neigh;
struct net_device *dev;
+ struct inet6_dev *idev;
/*
* Okay, this does not seem to be appropriate
@@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info *rt)
nh_gw = &rt->fib6_nh.nh_gw;
dev = rt->fib6_nh.nh_dev;
rcu_read_lock_bh();
+ idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
- struct inet6_dev *idev;
-
if (neigh->nud_state & NUD_VALID)
goto out;
- idev = __in6_dev_get(dev);
- work = NULL;
write_lock(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
@@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else {
+ } else if (time_after(jiffies, rt->last_probe +
+ idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
+ rt->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -946,8 +946,6 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
{
- rt->dst.flags |= fib6_info_dst_flags(ort);
-
if (ort->fib6_flags & RTF_REJECT) {
ip6_rt_init_dst_reject(rt, ort);
return;
@@ -956,7 +954,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
rt->dst.error = 0;
rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL) {
+ if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
rt->dst.input = ip6_input;
} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
@@ -977,7 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
rcu_assign_pointer(rt->from, from);
- dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
+ ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
/* Caller must already hold reference to @ort */
@@ -995,8 +993,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES
rt->rt6i_src = ort->fib6_src;
#endif
- rt->rt6i_prefsrc = ort->fib6_prefsrc;
- rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
}
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1450,11 +1446,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
if (ort->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
-
- /* Update rt6i_prefsrc as it could be changed
- * in rt6_remove_prefsrc()
- */
- nrt->rt6i_prefsrc = ort->fib6_prefsrc;
/* rt6_mtu_change() might lower mtu on ort.
* Only insert this exception route if its mtu
* is less than ort's mtu value.
@@ -1636,25 +1627,6 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
rcu_read_unlock();
}
-static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
-{
- struct rt6_exception_bucket *bucket;
- struct rt6_exception *rt6_ex;
- int i;
-
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
- if (bucket) {
- for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
- rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
- }
- bucket++;
- }
- }
-}
-
static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
struct rt6_info *rt, int mtu)
{
@@ -2099,7 +2071,8 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
{
bool any_src;
- if (rt6_need_strict(&fl6->daddr)) {
+ if (ipv6_addr_type(&fl6->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
struct dst_entry *dst;
dst = l3mdev_link_scope_lookup(net, fl6);
@@ -2259,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
- if (dst_hold_safe(&rt->dst))
- rt6_remove_exception_rt(rt);
+ rt6_remove_exception_rt(rt);
} else {
struct fib6_info *from;
struct fib6_node *fn;
@@ -2369,15 +2341,14 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_oif = oif,
+ .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -2388,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ int oif = sk->sk_bound_dev_if;
struct dst_entry *dst;
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
+ if (!oif && skb->dev)
+ oif = l3mdev_master_ifindex(skb->dev);
+
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -2528,16 +2502,15 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .flowi6_mark = mark,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2545,21 +2518,18 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
}
EXPORT_SYMBOL_GPL(ip6_redirect);
-void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
- u32 mark)
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = msg->dest;
- fl6.saddr = iph->daddr;
- fl6.flowi6_uid = sock_net_uid(net, NULL);
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .daddr = msg->dest,
+ .saddr = iph->daddr,
+ .flowi6_uid = sock_net_uid(net, NULL),
+ };
dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2730,24 +2700,6 @@ out:
return entries > rt_max_size;
}
-static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
- struct fib6_config *cfg)
-{
- struct dst_metrics *p;
-
- if (!cfg->fc_mx)
- return 0;
-
- p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
- if (unlikely(!p))
- return -ENOMEM;
-
- refcount_set(&p->refcnt, 1);
- rt->fib6_metrics = p;
-
- return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
-}
-
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
const struct in6_addr *gw_addr,
@@ -2795,6 +2747,8 @@ static int ip6_route_check_nh_onlink(struct net *net,
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
if (!grt->dst.error &&
+ /* ignore match if it is the default route */
+ grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
(grt->rt6i_flags & flags || dev != grt->dst.dev)) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway or device mismatch");
@@ -3023,13 +2977,17 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
+ rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
+ if (IS_ERR(rt->fib6_metrics)) {
+ err = PTR_ERR(rt->fib6_metrics);
+ /* Do not leave garbage there. */
+ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+ goto out;
+ }
+
if (cfg->fc_flags & RTF_ADDRCONF)
rt->dst_nocount = true;
- err = ip6_convert_metrics(net, rt, cfg);
- if (err < 0)
- goto out;
-
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
@@ -3138,8 +3096,6 @@ install_route:
rt->fib6_nh.nh_dev = dev;
rt->fib6_table = table;
- cfg->fc_nlinfo.nl_net = dev_net(dev);
-
if (idev)
in6_dev_put(idev);
@@ -3260,8 +3216,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
goto out;
- if (dst_hold_safe(&rt->dst))
- rc = rt6_remove_exception_rt(rt);
+
+ rc = rt6_remove_exception_rt(rt);
out:
return rc;
}
@@ -3631,23 +3587,23 @@ static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
{
- memset(cfg, 0, sizeof(*cfg));
+ *cfg = (struct fib6_config){
+ .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN,
+ .fc_ifindex = rtmsg->rtmsg_ifindex,
+ .fc_metric = rtmsg->rtmsg_metric,
+ .fc_expires = rtmsg->rtmsg_info,
+ .fc_dst_len = rtmsg->rtmsg_dst_len,
+ .fc_src_len = rtmsg->rtmsg_src_len,
+ .fc_flags = rtmsg->rtmsg_flags,
+ .fc_type = rtmsg->rtmsg_type,
- cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
- : RT6_TABLE_MAIN;
- cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
- cfg->fc_metric = rtmsg->rtmsg_metric;
- cfg->fc_expires = rtmsg->rtmsg_info;
- cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
- cfg->fc_src_len = rtmsg->rtmsg_src_len;
- cfg->fc_flags = rtmsg->rtmsg_flags;
- cfg->fc_type = rtmsg->rtmsg_type;
-
- cfg->fc_nlinfo.nl_net = net;
+ .fc_nlinfo.nl_net = net,
- cfg->fc_dst = rtmsg->rtmsg_dst;
- cfg->fc_src = rtmsg->rtmsg_src;
- cfg->fc_gateway = rtmsg->rtmsg_gateway;
+ .fc_dst = rtmsg->rtmsg_dst,
+ .fc_src = rtmsg->rtmsg_src,
+ .fc_gateway = rtmsg->rtmsg_gateway,
+ };
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -3754,6 +3710,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
if (!f6i)
return ERR_PTR(-ENOMEM);
+ f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
f6i->dst_nocount = true;
f6i->dst_host = true;
f6i->fib6_protocol = RTPROT_KERNEL;
@@ -3796,8 +3753,6 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
- /* need to update cache as well */
- rt6_exceptions_remove_prefsrc(rt);
spin_unlock_bh(&rt6_exception_lock);
}
return 0;
@@ -4075,8 +4030,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
.event = event,
},
};
+ struct net *net = dev_net(dev);
- fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+ if (net->ipv6.sysctl.skip_notify_on_dev_down)
+ fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
+ else
+ fib6_clean_all(net, fib6_ifdown, &arg);
}
void rt6_disable_ip(struct net_device *dev, unsigned long event)
@@ -4166,20 +4125,25 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = rtm->rtm_table;
- cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_src_len = rtm->rtm_src_len;
- cfg->fc_flags = RTF_UP;
- cfg->fc_protocol = rtm->rtm_protocol;
- cfg->fc_type = rtm->rtm_type;
+ *cfg = (struct fib6_config){
+ .fc_table = rtm->rtm_table,
+ .fc_dst_len = rtm->rtm_dst_len,
+ .fc_src_len = rtm->rtm_src_len,
+ .fc_flags = RTF_UP,
+ .fc_protocol = rtm->rtm_protocol,
+ .fc_type = rtm->rtm_type,
+
+ .fc_nlinfo.portid = NETLINK_CB(skb).portid,
+ .fc_nlinfo.nlh = nlh,
+ .fc_nlinfo.nl_net = sock_net(skb->sk),
+ };
if (rtm->rtm_type == RTN_UNREACHABLE ||
rtm->rtm_type == RTN_BLACKHOLE ||
@@ -4195,10 +4159,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
- cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
- cfg->fc_nlinfo.nlh = nlh;
- cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
-
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4317,11 +4277,6 @@ static int ip6_route_info_append(struct net *net,
if (!nh)
return -ENOMEM;
nh->fib6_info = rt;
- err = ip6_convert_metrics(net, rt, r_cfg);
- if (err) {
- kfree(nh);
- return err;
- }
memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
list_add_tail(&nh->next, rt6_nh_list);
@@ -4671,20 +4626,31 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rtmsg *rtm;
+ struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6key *rt6_dst, *rt6_src;
+ u32 *pmetrics, table, rt6_flags;
struct nlmsghdr *nlh;
+ struct rtmsg *rtm;
long expires = 0;
- u32 *pmetrics;
- u32 table;
nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
if (!nlh)
return -EMSGSIZE;
+ if (rt6) {
+ rt6_dst = &rt6->rt6i_dst;
+ rt6_src = &rt6->rt6i_src;
+ rt6_flags = rt6->rt6i_flags;
+ } else {
+ rt6_dst = &rt->fib6_dst;
+ rt6_src = &rt->fib6_src;
+ rt6_flags = rt->fib6_flags;
+ }
+
rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET6;
- rtm->rtm_dst_len = rt->fib6_dst.plen;
- rtm->rtm_src_len = rt->fib6_src.plen;
+ rtm->rtm_dst_len = rt6_dst->plen;
+ rtm->rtm_src_len = rt6_src->plen;
rtm->rtm_tos = 0;
if (rt->fib6_table)
table = rt->fib6_table->tb6_id;
@@ -4699,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->fib6_protocol;
- if (rt->fib6_flags & RTF_CACHE)
+ if (rt6_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
if (dest) {
@@ -4707,7 +4673,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_dst_len = 128;
} else if (rtm->rtm_dst_len)
- if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
+ if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
goto nla_put_failure;
#ifdef CONFIG_IPV6_SUBTREES
if (src) {
@@ -4715,12 +4681,12 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_src_len = 128;
} else if (rtm->rtm_src_len &&
- nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
+ nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
goto nla_put_failure;
#endif
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
- if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
+ if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
int err = ip6mr_get_route(net, skb, rtm, portid);
if (err == 0)
@@ -4755,7 +4721,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
/* For multipath routes, walk the siblings list and add
* each as a nexthop within RTA_MULTIPATH.
*/
- if (rt->fib6_nsiblings) {
+ if (rt6) {
+ if (rt6_flags & RTF_GATEWAY &&
+ nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
+ goto nla_put_failure;
+
+ if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
+ goto nla_put_failure;
+ } else if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
struct nlattr *mp;
@@ -4778,7 +4751,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
}
- if (rt->fib6_flags & RTF_EXPIRES) {
+ if (rt6_flags & RTF_EXPIRES) {
expires = dst ? dst->expires : rt->expires;
expires -= jiffies;
}
@@ -4786,7 +4759,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
goto nla_put_failure;
- if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
+ if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
goto nla_put_failure;
@@ -4798,28 +4771,52 @@ nla_put_failure:
return -EMSGSIZE;
}
+static bool fib6_info_uses_dev(const struct fib6_info *f6i,
+ const struct net_device *dev)
+{
+ if (f6i->fib6_nh.nh_dev == dev)
+ return true;
+
+ if (f6i->fib6_nsiblings) {
+ struct fib6_info *sibling, *next_sibling;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &f6i->fib6_siblings, fib6_siblings) {
+ if (sibling->fib6_nh.nh_dev == dev)
+ return true;
+ }
+ }
+
+ return false;
+}
+
int rt6_dump_route(struct fib6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+ struct fib_dump_filter *filter = &arg->filter;
+ unsigned int flags = NLM_F_MULTI;
struct net *net = arg->net;
if (rt == net->ipv6.fib6_null_entry)
return 0;
- if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
- struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
-
- /* user wants prefix routes only */
- if (rtm->rtm_flags & RTM_F_PREFIX &&
- !(rt->fib6_flags & RTF_PREFIX_RT)) {
- /* success since this is not a prefix route */
+ if ((filter->flags & RTM_F_PREFIX) &&
+ !(rt->fib6_flags & RTF_PREFIX_RT)) {
+ /* success since this is not a prefix route */
+ return 1;
+ }
+ if (filter->filter_set) {
+ if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
+ (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
+ (filter->protocol && rt->fib6_protocol != filter->protocol)) {
return 1;
}
+ flags |= NLM_F_DUMP_FILTERED;
}
return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
- arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ arg->cb->nlh->nlmsg_seq, flags);
}
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4833,7 +4830,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi6 fl6;
+ struct flowi6 fl6 = {};
bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
@@ -4842,7 +4839,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
err = -EINVAL;
- memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
@@ -5067,7 +5063,10 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
return 0;
}
-struct ctl_table ipv6_route_table_template[] = {
+static int zero;
+static int one = 1;
+
+static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
.data = &init_net.ipv6.sysctl.flush_delay,
@@ -5138,6 +5137,15 @@ struct ctl_table ipv6_route_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
+ {
+ .procname = "skip_notify_on_dev_down",
+ .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -5161,6 +5169,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -5225,6 +5234,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ net->ipv6.sysctl.skip_notify_on_dev_down = 0;
net->ipv6.ip6_rt_gc_expire = 30*HZ;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index a8854dd3e9c5..8181ee7e1e27 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -347,6 +347,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
+ memset(&fl6, 0, sizeof(fl6));
fl6.daddr = hdr->daddr;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9400ffa7875..51c9f75f34b9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -534,13 +534,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
+ t->parms.link, iph->protocol);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link,
+ iph->protocol);
err = 0;
goto out;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 83f4c77c79d8..d2d97d07ef27 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), 0, udptable, skb);
+ inet6_iif(skb), inet6_sdif(skb), udptable, skb);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -548,7 +548,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
-static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_enable(&udpv6_encap_needed_key);
@@ -752,6 +752,26 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
}
}
+/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
+ * return code conversion for ip layer consumption
+ */
+static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
+ struct udphdr *uh)
+{
+ int ret;
+
+ if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
+ skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ ip6_compute_pseudo);
+
+ ret = udpv6_queue_rcv_skb(sk, skb);
+
+ /* a return value > 0 means to resubmit the input */
+ if (ret > 0)
+ return ret;
+ return 0;
+}
+
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
@@ -803,13 +823,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (unlikely(sk->sk_rx_dst != dst))
udp6_sk_rx_dst_set(sk, dst);
- ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ goto report_csum_error;
+ }
- /* a return value > 0 means to resubmit the input */
- if (ret > 0)
- return ret;
- return 0;
+ ret = udp6_unicast_rcv_skb(sk, skb, uh);
+ sock_put(sk);
+ return ret;
}
/*
@@ -822,30 +843,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* Unicast */
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk) {
- int ret;
-
- if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- udp6_csum_zero_error(skb);
- goto csum_error;
- }
-
- if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
- skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
- ip6_compute_pseudo);
-
- ret = udpv6_queue_rcv_skb(sk, skb);
-
- /* a return value > 0 means to resubmit the input */
- if (ret > 0)
- return ret;
-
- return 0;
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ goto report_csum_error;
+ return udp6_unicast_rcv_skb(sk, skb, uh);
}
- if (!uh->check) {
- udp6_csum_zero_error(skb);
- goto csum_error;
- }
+ if (!uh->check)
+ goto report_csum_error;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
@@ -866,6 +870,9 @@ short_packet:
ulen, skb->len,
daddr, ntohs(uh->dest));
goto discard;
+
+report_csum_error:
+ udp6_csum_zero_error(skb);
csum_error:
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 95dee9ca8d22..1b8e161ac527 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -119,7 +119,7 @@ static struct sk_buff *udp6_gro_receive(struct list_head *head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
- if (unlikely(!uh))
+ if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 841f4a07438e..9ef490dddcea 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
if (xo && (xo->flags & XFRM_GRO)) {
skb_mac_header_rebuild(skb);
+ skb_reset_transport_header(skb);
return -1;
}
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 9ad07a91708e..3c29da5defe6 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
int ihl = skb->data - skb_transport_header(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
@@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
}
ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
sizeof(struct ipv6hdr));
- if (!xo || !(xo->flags & XFRM_GRO))
- skb_reset_transport_header(skb);
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5959ce9620eb..6a74080005cf 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
+ kfree_skb(skb);
return -EMSGSIZE;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
+ kfree_skb(skb);
return -EMSGSIZE;
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef3defaf43b9..d35bcf92969c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
- while (nh + offset + 1 < skb->data ||
- pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+ while (nh + offset + sizeof(*exthdr) < skb->data ||
+ pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
nh = skb_network_header(skb);
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
OpenPOWER on IntegriCloud