summaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c298
1 files changed, 142 insertions, 156 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index becb048d18d4..ceff71d24f8e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -99,10 +99,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
if (!(rt->dst.flags & DST_HOST))
return NULL;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
-
- peer = rt->rt6i_peer;
+ peer = rt6_get_peer_create(rt);
if (peer) {
u32 *old_p = __DST_METRICS_PTR(old);
unsigned long prev, new;
@@ -123,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct in6_addr *p = &rt->rt6i_gateway;
if (!ipv6_addr_any(p))
return (const void *) p;
+ else if (skb)
+ return &ipv6_hdr(skb)->daddr;
return daddr;
}
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct rt6_info *rt = (struct rt6_info *) dst;
struct neighbour *n;
- daddr = choose_neigh_daddr(rt, daddr);
+ daddr = choose_neigh_daddr(rt, skb, daddr);
n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
if (n)
return n;
@@ -152,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
if (IS_ERR(n))
return PTR_ERR(n);
}
- dst_set_neighbour(&rt->dst, n);
+ rt->n = n;
return 0;
}
@@ -261,16 +264,19 @@ static struct rt6_info ip6_blk_hole_entry_template = {
#endif
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct net_device *dev,
- int flags)
+ int flags,
+ struct fib6_table *table)
{
- struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+ struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+ 0, 0, flags);
- if (rt)
+ if (rt) {
memset(&rt->rt6i_table, 0,
sizeof(*rt) - sizeof(struct dst_entry));
-
+ rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+ }
return rt;
}
@@ -278,7 +284,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
- struct inet_peer *peer = rt->rt6i_peer;
+
+ if (rt->n)
+ neigh_release(rt->n);
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
@@ -291,8 +299,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
dst_release(dst->from);
- if (peer) {
- rt->rt6i_peer = NULL;
+ if (rt6_has_peer(rt)) {
+ struct inet_peer *peer = rt6_peer_ptr(rt);
inet_putpeer(peer);
}
}
@@ -306,13 +314,20 @@ static u32 rt6_peer_genid(void)
void rt6_bind_peer(struct rt6_info *rt, int create)
{
+ struct inet_peer_base *base;
struct inet_peer *peer;
- peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
- if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
- inet_putpeer(peer);
- else
- rt->rt6i_peer_genid = rt6_peer_genid();
+ base = inetpeer_base_ptr(rt->_rt6i_peer);
+ if (!base)
+ return;
+
+ peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+ if (peer) {
+ if (!rt6_set_peer(rt, peer))
+ inet_putpeer(peer);
+ else
+ rt->rt6i_peer_genid = rt6_peer_genid();
+ }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -323,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
+ if (dev != loopback_dev) {
+ if (idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
+ if (rt->n && rt->n->dev == dev) {
+ rt->n->dev = loopback_dev;
+ dev_hold(loopback_dev);
+ dev_put(dev);
}
}
}
@@ -418,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt)
* to no more than one per minute.
*/
rcu_read_lock();
- neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+ neigh = rt ? rt->n : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
goto out;
read_lock_bh(&neigh->lock);
@@ -465,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
int m;
rcu_read_lock();
- neigh = dst_get_neighbour_noref(&rt->dst);
+ neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -812,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
if (rt) {
rt->rt6i_flags |= RTF_CACHE;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+ rt->n = neigh_clone(ort->n);
}
return rt;
}
@@ -846,7 +868,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -931,6 +953,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
{
int flags = 0;
+ fl6->flowi6_iif = net->loopback_dev->ifindex;
+
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
@@ -952,6 +976,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
if (rt) {
memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+ rt6_init_peer(rt, net->ipv6.peers);
new = &rt->dst;
@@ -996,7 +1021,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
if (rt->rt6i_peer_genid != rt6_peer_genid()) {
- if (!rt->rt6i_peer)
+ if (!rt6_has_peer(rt))
rt6_bind_peer(rt, 0);
rt->rt6i_peer_genid = rt6_peer_genid();
}
@@ -1042,7 +1067,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
+ dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ struct net *net = dev_net(dst->dev);
+
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1079,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
dst_metric_set(dst, RTAX_FEATURES, features);
}
dst_metric_set(dst, RTAX_MTU, mtu);
+ rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+ int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ ip6_rt_update_pmtu(dst, ntohl(mtu));
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+ ip6_update_pmtu(skb, sock_net(sk), mtu,
+ sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
@@ -1110,7 +1168,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+ rt = ip6_dst_alloc(net, dev, 0, NULL);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1120,7 +1178,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (neigh)
neigh_hold(neigh);
else {
- neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
if (IS_ERR(neigh)) {
in6_dev_put(idev);
dst_free(&rt->dst);
@@ -1130,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
@@ -1292,7 +1350,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+ rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1639,6 +1697,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
struct rt6_info *rt, *nrt = NULL;
struct netevent_redirect netevent;
struct net *net = dev_net(neigh->dev);
+ struct neighbour *old_neigh;
rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
@@ -1666,7 +1725,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
dst_confirm(&rt->dst);
/* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
+ old_neigh = rt->n;
+ if (neigh == old_neigh)
goto out;
nrt = ip6_rt_copy(rt, dest);
@@ -1678,13 +1738,16 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+ nrt->n = neigh_clone(neigh);
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
+ netevent.old_neigh = old_neigh;
netevent.new = &nrt->dst;
+ netevent.new_neigh = neigh;
+ netevent.daddr = dest;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
if (rt->rt6i_flags & RTF_CACHE) {
@@ -1697,116 +1760,6 @@ out:
}
/*
- * Handle ICMP "packet too big" messages
- * i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net *net, u32 pmtu, int ifindex)
-{
- struct rt6_info *rt, *nrt;
- int allfrag = 0;
-again:
- rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
- if (!rt)
- return;
-
- if (rt6_check_expired(rt)) {
- ip6_del_rt(rt);
- goto again;
- }
-
- if (pmtu >= dst_mtu(&rt->dst))
- goto out;
-
- if (pmtu < IPV6_MIN_MTU) {
- /*
- * According to RFC2460, PMTU is set to the IPv6 Minimum Link
- * MTU (1280) and a fragment header should always be included
- * after a node receiving Too Big message reporting PMTU is
- * less than the IPv6 Minimum Link MTU.
- */
- pmtu = IPV6_MIN_MTU;
- allfrag = 1;
- }
-
- /* New mtu received -> path was valid.
- They are sent only in response to data packets,
- so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Host route. If it is static, it would be better
- not to override it, but add new one, so that
- when cache entry will expire old pmtu
- would return automatically.
- */
- if (rt->rt6i_flags & RTF_CACHE) {
- dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&rt->dst, RTAX_FEATURES, features);
- }
- rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- rt->rt6i_flags |= RTF_MODIFIED;
- goto out;
- }
-
- /* Network route.
- Two cases are possible:
- 1. It is connected route. Action: COW
- 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
- */
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, daddr, saddr);
- else
- nrt = rt6_alloc_clone(rt, daddr);
-
- if (nrt) {
- dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
- }
-
- /* According to RFC 1981, detecting PMTU increase shouldn't be
- * happened within 5 mins, the recommended timer is 10 mins.
- * Here this route expiration time is set to ip6_rt_mtu_expires
- * which is 10 mins. After 10 mins the decreased pmtu is expired
- * and detecting PMTU increase will be automatically happened.
- */
- rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- nrt->rt6i_flags |= RTF_DYNAMIC;
- ip6_ins_rt(nrt);
- }
-out:
- dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net_device *dev, u32 pmtu)
-{
- struct net *net = dev_net(dev);
-
- /*
- * RFC 1981 states that a node "MUST reduce the size of the packets it
- * is sending along the path" that caused the Packet Too Big message.
- * Since it's not possible in the general case to determine which
- * interface was used to send the original packet, we update the MTU
- * on the interface that will be used to send future packets. We also
- * update the MTU on the interface that received the Packet Too Big in
- * case the original packet was forced out that interface with
- * SO_BINDTODEVICE or similar. This is the next best thing to the
- * correct behaviour, which would be to update the MTU on all
- * interfaces.
- */
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
-}
-
-/*
* Misc support functions
*/
@@ -1814,8 +1767,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest)
{
struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- ort->dst.dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+ ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
@@ -2099,8 +2052,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- net->loopback_dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
int err;
if (!rt) {
@@ -2500,7 +2452,7 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
rcu_read_unlock();
@@ -2521,7 +2473,9 @@ static int rt6_fill_node(struct net *net,
else
expires = INT_MAX;
- peer = rt->rt6i_peer;
+ peer = NULL;
+ if (rt6_has_peer(rt))
+ peer = rt6_peer_ptr(rt);
ts = tsage = 0;
if (peer && peer->tcp_ts_stamp) {
ts = peer->tcp_ts;
@@ -2722,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
@@ -3007,6 +2961,31 @@ static struct pernet_operations ip6_route_net_ops = {
.exit = ip6_route_net_exit,
};
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+ struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+ if (!bp)
+ return -ENOMEM;
+ inet_peer_base_init(bp);
+ net->ipv6.peers = bp;
+ return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+ struct inet_peer_base *bp = net->ipv6.peers;
+
+ net->ipv6.peers = NULL;
+ inetpeer_invalidate_tree(bp);
+ kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+ .init = ipv6_inetpeer_init,
+ .exit = ipv6_inetpeer_exit,
+};
+
static struct pernet_operations ip6_route_net_late_ops = {
.init = ip6_route_net_init_late,
.exit = ip6_route_net_exit_late,
@@ -3032,10 +3011,14 @@ int __init ip6_route_init(void)
if (ret)
goto out_kmem_cache;
- ret = register_pernet_subsys(&ip6_route_net_ops);
+ ret = register_pernet_subsys(&ipv6_inetpeer_ops);
if (ret)
goto out_dst_entries;
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_register_inetpeer;
+
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
/* Registering of the loopback is done before this portion of code,
@@ -3088,6 +3071,8 @@ out_fib6_init:
fib6_gc_cleanup();
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
out_dst_entries:
dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
@@ -3102,6 +3087,7 @@ void ip6_route_cleanup(void)
fib6_rules_cleanup();
xfrm6_fini();
fib6_gc_cleanup();
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
unregister_pernet_subsys(&ip6_route_net_ops);
dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
OpenPOWER on IntegriCloud