summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/ah6.c11
-rw-r--r--net/ipv6/esp6.c11
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/icmp.c23
-rw-r--r--net/ipv6/inet6_connection_sock.c103
-rw-r--r--net/ipv6/ip6_fib.c5
-rw-r--r--net/ipv6/ip6_input.c9
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c96
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipcomp6.c11
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/ndisc.c129
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c131
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c51
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/route.c536
-rw-r--r--net/ipv6/sit.c25
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c174
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/xfrm6_policy.c26
24 files changed, 697 insertions, 754 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8f6411c97189..79181819a24f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -63,6 +63,7 @@
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/string.h>
+#include <linux/hash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -579,15 +580,9 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
list_add_tail(&ifp->if_list, p);
}
-static u32 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 inet6_addr_hash(const struct in6_addr *addr)
{
- /*
- * We perform the hash function over the last 64 bits of the address
- * This will include the IEEE address token on links that support it.
- */
- return jhash_2words((__force u32)addr->s6_addr32[2],
- (__force u32)addr->s6_addr32[3], 0)
- & (IN6_ADDR_HSIZE - 1);
+ return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
}
/* On success it returns ifp with increased reference count */
@@ -662,7 +657,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
in6_ifa_hold(ifa);
/* Add to big hash table */
- hash = ipv6_addr_hash(addr);
+ hash = inet6_addr_hash(addr);
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
spin_unlock(&addrconf_hash_lock);
@@ -1270,7 +1265,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
struct hlist_node *node;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1293,7 +1288,7 @@ EXPORT_SYMBOL(ipv6_chk_addr);
static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev)
{
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
struct inet6_ifaddr *ifp;
struct hlist_node *node;
@@ -1336,7 +1331,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp, *result = NULL;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
struct hlist_node *node;
rcu_read_lock_bh();
@@ -3223,7 +3218,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
struct inet6_ifaddr *ifp = NULL;
struct hlist_node *n;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f1a4a2c28ed3..7e6139508ee7 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,6 +35,7 @@
#include <linux/pfkeyv2.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -612,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct xfrm_state *x;
if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
return;
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
- ntohl(ah->spi), &iph->daddr);
-
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index db1521fcda5b..6dc7fd353ef5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -39,6 +39,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -433,15 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct xfrm_state *x;
if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
return;
- pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n",
- ntohl(esph->spi), &iph->daddr);
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6447dc49429f..fa3d9c328092 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -791,14 +791,14 @@ static int ipv6_renew_option(void *ohdr,
if (ohdr) {
memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
*hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
+ *p += CMSG_ALIGN(ipv6_optlen(*hdr));
}
} else {
if (newopt) {
if (copy_from_user(*p, newopt, newoptlen))
return -EFAULT;
*hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
+ if (ipv6_optlen(*hdr) > newoptlen)
return -EINVAL;
*p += CMSG_ALIGN(newoptlen);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 091a2971c7b7..24d69dbca4d6 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -188,14 +188,16 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
int tmo = net->ipv6.sysctl.icmpv6_time;
+ struct inet_peer *peer;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ res = inet_peer_xrlim_allow(peer, tmo);
+ if (peer)
+ inet_putpeer(peer);
}
dst_release(dst);
return res;
@@ -596,13 +598,12 @@ out:
icmpv6_xmit_unlock(sk);
}
-static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
{
const struct inet6_protocol *ipprot;
int inner_offset;
- int hash;
- u8 nexthdr;
__be16 frag_off;
+ u8 nexthdr;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return;
@@ -629,10 +630,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- hash = nexthdr & (MAX_INET_PROTOS - 1);
-
rcu_read_lock();
- ipprot = rcu_dereference(inet6_protos[hash]);
+ ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
rcu_read_unlock();
@@ -649,7 +648,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
- const struct ipv6hdr *orig_hdr;
struct icmp6hdr *hdr;
u8 type;
@@ -661,7 +659,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
XFRM_STATE_ICMP))
goto drop_no_count;
- if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+ if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
nh = skb_network_offset(skb);
@@ -722,9 +720,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto discard_it;
hdr = icmp6_hdr(skb);
- orig_hdr = (struct ipv6hdr *) (hdr + 1);
- rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
- ntohl(hdr->icmp6_mtu));
/*
* Drop through to notify
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e6cee5292a0b..0251a6005be8 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
struct dst_entry *inet6_csk_route_req(struct sock *sk,
+ struct flowi6 *fl6,
const struct request_sock *req)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
- struct flowi6 fl6;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = treq->loc_addr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->daddr = treq->rmt_addr;
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ fl6->saddr = treq->loc_addr;
+ fl6->flowi6_oif = treq->iif;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_dport = inet_rsk(req)->rmt_port;
+ fl6->fl6_sport = inet_rsk(req)->loc_port;
+ security_req_classify_flow(req, flowi6_to_flowi(fl6));
+
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
if (IS_ERR(dst))
return NULL;
@@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
__ip6_dst_store(sk, dst, daddr, saddr);
@@ -203,43 +204,52 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+ struct flowi6 *fl6)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
- struct dst_entry *dst;
struct in6_addr *final_p, final;
- int res;
+ struct dst_entry *dst;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl6.flowlabel);
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_sport = inet->inet_sport;
- fl6.fl6_dport = inet->inet_dport;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->daddr = np->daddr;
+ fl6->saddr = np->saddr;
+ fl6->flowlabel = np->flow_label;
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_sport = inet->inet_sport;
+ fl6->fl6_dport = inet->inet_dport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+ if (!dst) {
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
- if (dst == NULL) {
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (!IS_ERR(dst))
+ __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ }
+ return dst;
+}
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- sk->sk_route_caps = 0;
- kfree_skb(skb);
- return PTR_ERR(dst);
- }
+int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+{
+ struct sock *sk = skb->sk;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int res;
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ dst = inet6_csk_route_socket(sk, &fl6);
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
}
rcu_read_lock();
@@ -253,3 +263,16 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
return res;
}
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
+
+struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
+
+ if (IS_ERR(dst))
+ return NULL;
+ dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+ return inet6_csk_route_socket(sk, &fl6);
+}
+EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 608327661960..13690d650c3e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -197,6 +197,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table->tb6_id = id;
table->tb6_root.leaf = net->ipv6.ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&table->tb6_peers);
}
return table;
@@ -1633,6 +1634,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -1643,6 +1645,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
#endif
fib6_tables_init(net);
@@ -1666,8 +1669,10 @@ static void fib6_net_exit(struct net *net)
del_timer_sync(&net->ipv6.ip6_fib_timer);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
kfree(net->ipv6.fib6_local_tbl);
#endif
+ inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 21a15dfe4a9e..5ab923e51af3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,13 +168,12 @@ drop:
static int ip6_input_finish(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
+ struct inet6_dev *idev;
unsigned int nhoff;
int nexthdr;
bool raw;
- u8 hash;
- struct inet6_dev *idev;
- struct net *net = dev_net(skb_dst(skb)->dev);
/*
* Parse extension headers
@@ -189,9 +188,7 @@ resubmit:
nexthdr = skb_network_header(skb)[nhoff];
raw = raw6_local_deliver(skb, nexthdr);
-
- hash = nexthdr & (MAX_INET_PROTOS - 1);
- if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+ if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
int ret;
if (ipprot->flags & INET6_PROTO_FINAL) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index decc21d19c53..5b2d63ed793e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -83,24 +83,12 @@ int ip6_local_out(struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ip6_local_out);
-/* dev_loopback_xmit for use with netfilter. */
-static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
-{
- skb_reset_mac_header(newskb);
- __skb_pull(newskb, skb_network_offset(newskb));
- newskb->pkt_type = PACKET_LOOPBACK;
- newskb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(newskb));
-
- netif_rx_ni(newskb);
- return 0;
-}
-
static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct neighbour *neigh;
+ struct rt6_info *rt;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -121,7 +109,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
newskb, NULL, newskb->dev,
- ip6_dev_loopback_xmit);
+ dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
IP6_INC_STATS(dev_net(dev), idev,
@@ -136,9 +124,10 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
+ rt = (struct rt6_info *) dst;
+ neigh = rt->n;
if (neigh) {
- int res = neigh_output(neigh, skb);
+ int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock();
return res;
@@ -463,6 +452,7 @@ int ip6_forward(struct sk_buff *skb)
*/
if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
+ struct inet_peer *peer;
struct rt6_info *rt;
/*
@@ -476,14 +466,15 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
- if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
+ if (peer)
+ inet_putpeer(peer);
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -604,12 +595,13 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
if (rt && !(rt->dst.flags & DST_NOPEER)) {
struct inet_peer *peer;
+ struct net *net;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
if (peer) {
fhdr->identification = htonl(inet_getid(peer, 0));
+ inet_putpeer(peer);
return;
}
}
@@ -960,6 +952,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
+ struct rt6_info *rt;
#endif
int err;
@@ -988,7 +981,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry of the nexthop router
*/
rcu_read_lock();
- n = dst_get_neighbour_noref(*dst);
+ rt = (struct rt6_info *) *dst;
+ n = rt->n;
if (n && !(n->nud_state & NUD_VALID)) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c9015fad8d65..9a1d5fe6aef8 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -40,6 +40,7 @@
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
+#include <linux/hash.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -70,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0");
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT 20
-#define HASH_SIZE 32
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
-#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
- (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
- (HASH_SIZE - 1))
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
@@ -166,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
{
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(local);
+ unsigned int hash = HASH(remote, local);
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
@@ -205,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
prio = 1;
- h = HASH(remote) ^ HASH(local);
+ h = HASH(remote, local);
}
return &ip6n->tnls[prio][h];
}
@@ -252,7 +256,7 @@ static void ip6_dev_free(struct net_device *dev)
}
/**
- * ip6_tnl_create() - create a new tunnel
+ * ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
* @pt: pointer to new tunnel
*
@@ -550,6 +554,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
+ case NDISC_REDIRECT:
+ rel_type = ICMP_REDIRECT;
+ rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
@@ -606,8 +613,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
}
+ if (rel_type == ICMP_REDIRECT)
+ skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -684,24 +693,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
+static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ltype = ipv6_addr_type(laddr);
+ int rtype = ipv6_addr_type(raddr);
+ __u32 flags = 0;
+
+ if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
+ flags = IP6_TNL_F_CAP_PER_PACKET;
+ } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+ (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_RCV;
+ }
+ return flags;
+}
+
/* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
{
struct ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
- if (p->flags & IP6_TNL_F_CAP_RCV) {
+ if ((p->flags & IP6_TNL_F_CAP_RCV) ||
+ ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+ (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
struct net_device *ldev = NULL;
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if ((ipv6_addr_is_multicast(&p->laddr) ||
- likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ if ((ipv6_addr_is_multicast(laddr) ||
+ likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
ret = 1;
-
}
return ret;
}
@@ -740,7 +775,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
goto discard;
}
- if (!ip6_tnl_rcv_ctl(t)) {
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
t->dev->stats.rx_dropped++;
rcu_read_unlock();
goto discard;
@@ -921,7 +956,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
*pmtu = mtu;
err = -EMSGSIZE;
@@ -1114,25 +1149,6 @@ tx_err:
return NETDEV_TX_OK;
}
-static void ip6_tnl_set_cap(struct ip6_tnl *t)
-{
- struct ip6_tnl_parm *p = &t->parms;
- int ltype = ipv6_addr_type(&p->laddr);
- int rtype = ipv6_addr_type(&p->raddr);
-
- p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
-
- if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
- (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
- if (ltype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_XMIT;
- if (rtype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_RCV;
- }
-}
-
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
@@ -1153,7 +1169,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
- ip6_tnl_set_cap(t);
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
dev->flags |= IFF_POINTOPOINT;
@@ -1438,6 +1455,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
+
+ ip6_tnl_link_config(t);
+
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 461e47c8e956..4532973f0dd4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2104,8 +2104,9 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (c->mf6c_parent >= MAXMIFS)
return -ENOENT;
- if (MIF_EXISTS(mrt, c->mf6c_parent))
- RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
+ if (MIF_EXISTS(mrt, c->mf6c_parent) &&
+ nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5cb75bfe45b1..7af5aee75d98 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -46,6 +46,7 @@
#include <linux/list.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -63,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
spi = htonl(ntohs(ipcomph->cpi));
@@ -72,8 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!x)
return;
- pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n",
- spi, &iph->daddr);
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6d0f5dc8e3a6..92f8e48e4ba4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -211,6 +211,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_mc_socklist __rcu **lnk;
struct net *net = sock_net(sk);
+ if (!ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
spin_lock(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list;
(mc_lst = rcu_dereference_protected(*lnk,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 54f62d3b8dd6..ff36194a71aa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,40 +143,6 @@ struct neigh_table nd_tbl = {
.gc_thresh3 = 1024,
};
-/* ND options */
-struct ndisc_options {
- struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
-#ifdef CONFIG_IPV6_ROUTE_INFO
- struct nd_opt_hdr *nd_opts_ri;
- struct nd_opt_hdr *nd_opts_ri_end;
-#endif
- struct nd_opt_hdr *nd_useropts;
- struct nd_opt_hdr *nd_useropts_end;
-};
-
-#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
-
-#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-
-/*
- * Return the padding between the option length and the start of the
- * link addr. Currently only IP-over-InfiniBand needs this, although
- * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
- * also need a pad of 2.
- */
-static int ndisc_addr_option_pad(unsigned short type)
-{
- switch (type) {
- case ARPHRD_INFINIBAND: return 2;
- default: return 0;
- }
-}
-
static inline int ndisc_opt_addr_space(struct net_device *dev)
{
return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
@@ -233,8 +199,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
}
-static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
- struct ndisc_options *ndopts)
+struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+ struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -297,17 +263,6 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
return ndopts;
}
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
- struct net_device *dev)
-{
- u8 *lladdr = (u8 *)(p + 1);
- int lladdrlen = p->nd_opt_len << 3;
- int prepad = ndisc_addr_option_pad(dev->type);
- if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
- return NULL;
- return lladdr + prepad;
-}
-
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
{
switch (dev->type) {
@@ -1379,16 +1334,6 @@ out:
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
- struct inet6_dev *in6_dev;
- struct icmp6hdr *icmph;
- const struct in6_addr *dest;
- const struct in6_addr *target; /* new first hop to destination */
- struct neighbour *neigh;
- int on_link = 0;
- struct ndisc_options ndopts;
- int optlen;
- u8 *lladdr = NULL;
-
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
@@ -1405,65 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- optlen = skb->tail - skb->transport_header;
- optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
-
- if (optlen < 0) {
- ND_PRINTK(2, warn, "Redirect: packet too short\n");
- return;
- }
-
- icmph = icmp6_hdr(skb);
- target = (const struct in6_addr *) (icmph + 1);
- dest = target + 1;
-
- if (ipv6_addr_is_multicast(dest)) {
- ND_PRINTK(2, warn,
- "Redirect: destination address is multicast\n");
- return;
- }
-
- if (ipv6_addr_equal(dest, target)) {
- on_link = 1;
- } else if (ipv6_addr_type(target) !=
- (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK(2, warn,
- "Redirect: target address is not link-local unicast\n");
- return;
- }
-
- in6_dev = __in6_dev_get(skb->dev);
- if (!in6_dev)
- return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
- return;
-
- /* RFC2461 8.1:
- * The IP source address of the Redirect MUST be the same as the current
- * first-hop router for the specified ICMP Destination Address.
- */
-
- if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
- ND_PRINTK(2, warn, "Redirect: invalid ND options\n");
- return;
- }
- if (ndopts.nd_opts_tgt_lladdr) {
- lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
- skb->dev);
- if (!lladdr) {
- ND_PRINTK(2, warn,
- "Redirect: invalid link-layer address length\n");
- return;
- }
- }
-
- neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
- if (neigh) {
- rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr, neigh, lladdr,
- on_link);
- neigh_release(neigh);
- }
+ icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
@@ -1472,6 +1359,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.ndisc_sk;
int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ struct inet_peer *peer;
struct sk_buff *buff;
struct icmp6hdr *icmph;
struct in6_addr saddr_buf;
@@ -1485,6 +1373,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
int rd_len;
int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
@@ -1518,9 +1407,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ ret = inet_peer_xrlim_allow(peer, 1*HZ);
+ if (peer)
+ inet_putpeer(peer);
+ if (!ret)
goto release;
if (dev->addr_len) {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3224ef90a21a..4794f96cf2e0 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv6_helper(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
const struct nf_conn_help *help;
@@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- goto out;
+ return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
- goto out;
+ return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
- goto out;
+ return NF_ACCEPT;
protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
skb->len - extoff);
@@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
}
ret = helper->help(skb, protoff, ct, ctinfo);
- if (ret != NF_ACCEPT) {
+ if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
"nf_ct_%s: dropping packet", helper->name);
- return ret;
}
-out:
+ return ret;
+}
+
+static unsigned int ipv6_confirm(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
@@ -254,6 +261,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_CONNTRACK,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -261,6 +275,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_LAST,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -333,37 +354,75 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
-static int __init nf_conntrack_l3proto_ipv6_init(void)
+static int ipv6_net_init(struct net *net)
{
int ret = 0;
- need_conntrack();
- nf_defrag_ipv6_enable();
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_tcp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register tcp.\n");
- return ret;
+ printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
+ goto out;
}
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_udp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register udp.\n");
- goto cleanup_tcp;
+ printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
+ goto cleanup_tcp6;
}
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_icmpv6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
- goto cleanup_udp;
+ printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
+ goto cleanup_udp6;
}
-
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ ret = nf_conntrack_l3proto_register(net,
+ &nf_conntrack_l3proto_ipv6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register ipv6\n");
+ printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
goto cleanup_icmpv6;
}
+ return 0;
+ cleanup_icmpv6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp6);
+ out:
+ return ret;
+}
+static void ipv6_net_exit(struct net *net)
+{
+ nf_conntrack_l3proto_unregister(net,
+ &nf_conntrack_l3proto_ipv6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmpv6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp6);
+}
+
+static struct pernet_operations ipv6_net_ops = {
+ .init = ipv6_net_init,
+ .exit = ipv6_net_exit,
+};
+
+static int __init nf_conntrack_l3proto_ipv6_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv6_enable();
+
+ ret = register_pernet_subsys(&ipv6_net_ops);
+ if (ret < 0)
+ goto cleanup_pernet;
ret = nf_register_hooks(ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (ret < 0) {
@@ -374,13 +433,8 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
return ret;
cleanup_ipv6:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- cleanup_icmpv6:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_pernet:
return ret;
}
@@ -388,10 +442,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ unregister_pernet_subsys(&ipv6_net_ops);
}
module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3e81904fbbcd..2d54b2061d68 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -29,6 +29,11 @@
static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6;
+}
+
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
@@ -90,7 +95,7 @@ static int icmpv6_print_tuple(struct seq_file *s,
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
- return &nf_ct_icmpv6_timeout;
+ return &icmpv6_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -281,16 +286,18 @@ static int icmpv6_nlattr_tuple_size(void)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_cttimeout.h>
-static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
{
unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmpv6_pernet(net);
if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
} else {
/* Set default ICMPv6 timeout. */
- *timeout = nf_ct_icmpv6_timeout;
+ *timeout = in->timeout;
}
return 0;
}
@@ -315,11 +322,9 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *icmpv6_sysctl_header;
static struct ctl_table icmpv6_sysctl_table[] = {
{
.procname = "nf_conntrack_icmpv6_timeout",
- .data = &nf_ct_icmpv6_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -328,6 +333,36 @@ static struct ctl_table icmpv6_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+ sizeof(icmpv6_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmpv6_timeout;
+
+ return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
@@ -355,8 +390,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
.nla_policy = icmpv6_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
- .ctl_table_header = &icmpv6_sysctl_header,
- .ctl_table = icmpv6_sysctl_table,
-#endif
+ .init_net = icmpv6_init_net,
+ .get_net_proto = icmpv6_get_net_proto,
};
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9a7978fdc02a..053082dfc93e 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int hash = protocol & (MAX_INET_PROTOS - 1);
-
- return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet6_add_protocol);
@@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol);
int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+ int ret;
- ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 93d69836fded..ef0579d5bca6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (MAX_INET_PROTOS - 1);
+ hash = nexthdr & (RAW_HTABLE_SIZE - 1);
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
{
struct sock *raw_sk;
- raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+ raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
raw_sk = NULL;
@@ -328,9 +328,12 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
return;
harderr = icmpv6_err_convert(type, code, &err);
- if (type == ICMPV6_PKT_TOOBIG)
+ if (type == ICMPV6_PKT_TOOBIG) {
+ ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+ }
+ if (type == NDISC_REDIRECT)
+ ip6_sk_redirect(skb, sk);
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index becb048d18d4..84f6564dd372 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -99,10 +102,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
if (!(rt->dst.flags & DST_HOST))
return NULL;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
-
- peer = rt->rt6i_peer;
+ peer = rt6_get_peer_create(rt);
if (peer) {
u32 *old_p = __DST_METRICS_PTR(old);
unsigned long prev, new;
@@ -123,21 +123,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct in6_addr *p = &rt->rt6i_gateway;
if (!ipv6_addr_any(p))
return (const void *) p;
+ else if (skb)
+ return &ipv6_hdr(skb)->daddr;
return daddr;
}
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct rt6_info *rt = (struct rt6_info *) dst;
struct neighbour *n;
- daddr = choose_neigh_daddr(rt, daddr);
+ daddr = choose_neigh_daddr(rt, skb, daddr);
n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
if (n)
return n;
@@ -152,7 +158,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
if (IS_ERR(n))
return PTR_ERR(n);
}
- dst_set_neighbour(&rt->dst, n);
+ rt->n = n;
return 0;
}
@@ -171,6 +177,7 @@ static struct dst_ops ip6_dst_ops_template = {
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
+ .redirect = rt6_do_redirect,
.local_out = __ip6_local_out,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -182,7 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
return mtu ? : dst->dev->mtu;
}
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
@@ -200,6 +213,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.mtu = ip6_blackhole_mtu,
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
+ .redirect = ip6_rt_blackhole_redirect,
.cow_metrics = ip6_rt_blackhole_cow_metrics,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -261,16 +275,20 @@ static struct rt6_info ip6_blk_hole_entry_template = {
#endif
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct net_device *dev,
- int flags)
+ int flags,
+ struct fib6_table *table)
{
- struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+ struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+ 0, 0, flags);
- if (rt)
- memset(&rt->rt6i_table, 0,
- sizeof(*rt) - sizeof(struct dst_entry));
+ if (rt) {
+ struct dst_entry *dst = &rt->dst;
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+ }
return rt;
}
@@ -278,7 +296,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
- struct inet_peer *peer = rt->rt6i_peer;
+
+ if (rt->n)
+ neigh_release(rt->n);
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
@@ -291,8 +311,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
dst_release(dst->from);
- if (peer) {
- rt->rt6i_peer = NULL;
+ if (rt6_has_peer(rt)) {
+ struct inet_peer *peer = rt6_peer_ptr(rt);
inet_putpeer(peer);
}
}
@@ -306,13 +326,20 @@ static u32 rt6_peer_genid(void)
void rt6_bind_peer(struct rt6_info *rt, int create)
{
+ struct inet_peer_base *base;
struct inet_peer *peer;
- peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
- if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
- inet_putpeer(peer);
- else
- rt->rt6i_peer_genid = rt6_peer_genid();
+ base = inetpeer_base_ptr(rt->_rt6i_peer);
+ if (!base)
+ return;
+
+ peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+ if (peer) {
+ if (!rt6_set_peer(rt, peer))
+ inet_putpeer(peer);
+ else
+ rt->rt6i_peer_genid = rt6_peer_genid();
+ }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -323,12 +350,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
+ if (dev != loopback_dev) {
+ if (idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
+ if (rt->n && rt->n->dev == dev) {
+ rt->n->dev = loopback_dev;
+ dev_hold(loopback_dev);
+ dev_put(dev);
}
}
}
@@ -418,7 +452,7 @@ static void rt6_probe(struct rt6_info *rt)
* to no more than one per minute.
*/
rcu_read_lock();
- neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+ neigh = rt ? rt->n : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
goto out;
read_lock_bh(&neigh->lock);
@@ -465,7 +499,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
int m;
rcu_read_lock();
- neigh = dst_get_neighbour_noref(&rt->dst);
+ neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -812,7 +846,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
if (rt) {
rt->rt6i_flags |= RTF_CACHE;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+ rt->n = neigh_clone(ort->n);
}
return rt;
}
@@ -846,7 +880,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -931,6 +965,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
{
int flags = 0;
+ fl6->flowi6_iif = net->loopback_dev->ifindex;
+
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
@@ -951,10 +987,11 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
if (rt) {
- memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
-
new = &rt->dst;
+ memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_init_peer(rt, net->ipv6.peers);
+
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
@@ -996,7 +1033,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
if (rt->rt6i_peer_genid != rt6_peer_genid()) {
- if (!rt->rt6i_peer)
+ if (!rt6_has_peer(rt))
rt6_bind_peer(rt, 0);
rt->rt6i_peer_genid = rt6_peer_genid();
}
@@ -1038,11 +1075,15 @@ static void ip6_link_failure(struct sk_buff *skb)
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
+ dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ struct net *net = dev_net(dst->dev);
+
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1092,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
dst_metric_set(dst, RTAX_FEATURES, features);
}
dst_metric_set(dst, RTAX_MTU, mtu);
+ rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+ int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+ ip6_update_pmtu(skb, sock_net(sk), mtu,
+ sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
@@ -1110,7 +1208,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+ rt = ip6_dst_alloc(net, dev, 0, NULL);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1120,7 +1218,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (neigh)
neigh_hold(neigh);
else {
- neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
if (IS_ERR(neigh)) {
in6_dev_put(idev);
dst_free(&rt->dst);
@@ -1130,7 +1228,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
@@ -1292,7 +1390,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+ rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1546,107 +1644,94 @@ static int ip6_route_del(struct fib6_config *cfg)
return err;
}
-/*
- * Handle redirects
- */
-struct ip6rd_flowi {
- struct flowi6 fl6;
- struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
- struct fib6_table *table,
- struct flowi6 *fl6,
- int flags)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
- struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
- struct fib6_node *fn;
+ struct net *net = dev_net(skb->dev);
+ struct netevent_redirect netevent;
+ struct rt6_info *rt, *nrt = NULL;
+ const struct in6_addr *target;
+ struct ndisc_options ndopts;
+ const struct in6_addr *dest;
+ struct neighbour *old_neigh;
+ struct inet6_dev *in6_dev;
+ struct neighbour *neigh;
+ struct icmp6hdr *icmph;
+ int optlen, on_link;
+ u8 *lladdr;
- /*
- * Get the "current" route for this destination and
- * check if the redirect has come from approriate router.
- *
- * RFC 2461 specifies that redirects should only be
- * accepted if they come from the nexthop to the target.
- * Due to the way the routes are chosen, this notion
- * is a bit fuzzy and one might need to check all possible
- * routes.
- */
+ optlen = skb->tail - skb->transport_header;
+ optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
- read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
-restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- /*
- * Current route is on-link; redirect is always invalid.
- *
- * Seems, previous statement is not true. It could
- * be node, which looks for us as on-link (f.e. proxy ndisc)
- * But then router serving it might decide, that we should
- * know truth 8)8) --ANK (980726).
- */
- if (rt6_check_expired(rt))
- continue;
- if (!(rt->rt6i_flags & RTF_GATEWAY))
- continue;
- if (fl6->flowi6_oif != rt->dst.dev->ifindex)
- continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
- continue;
- break;
+ if (optlen < 0) {
+ net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
+ return;
}
- if (!rt)
- rt = net->ipv6.ip6_null_entry;
- BACKTRACK(net, &fl6->saddr);
-out:
- dst_hold(&rt->dst);
-
- read_unlock_bh(&table->tb6_lock);
-
- return rt;
-};
+ icmph = icmp6_hdr(skb);
+ target = (const struct in6_addr *) (icmph + 1);
+ dest = target + 1;
-static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
- const struct in6_addr *src,
- const struct in6_addr *gateway,
- struct net_device *dev)
-{
- int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct net *net = dev_net(dev);
- struct ip6rd_flowi rdfl = {
- .fl6 = {
- .flowi6_oif = dev->ifindex,
- .daddr = *dest,
- .saddr = *src,
- },
- };
+ if (ipv6_addr_is_multicast(dest)) {
+ net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
+ return;
+ }
- rdfl.gateway = *gateway;
+ on_link = 0;
+ if (ipv6_addr_equal(dest, target)) {
+ on_link = 1;
+ } else if (ipv6_addr_type(target) !=
+ (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+ net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
+ return;
+ }
- if (rt6_need_strict(dest))
- flags |= RT6_LOOKUP_F_IFACE;
+ in6_dev = __in6_dev_get(skb->dev);
+ if (!in6_dev)
+ return;
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ return;
- return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
- flags, __ip6_route_redirect);
-}
+ /* RFC2461 8.1:
+ * The IP source address of the Redirect MUST be the same as the current
+ * first-hop router for the specified ICMP Destination Address.
+ */
-void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
- const struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
-{
- struct rt6_info *rt, *nrt = NULL;
- struct netevent_redirect netevent;
- struct net *net = dev_net(neigh->dev);
+ if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+ net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+ return;
+ }
- rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+ lladdr = NULL;
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+ return;
+ }
+ }
+ rt = (struct rt6_info *) dst;
if (rt == net->ipv6.ip6_null_entry) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
- goto out;
+ return;
}
+ /* Redirect received -> path was valid.
+ * Look, redirects are sent only in response to data packets,
+ * so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->dst);
+
+ neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ if (!neigh)
+ return;
+
+ /* Duplicate redirect: silently ignore. */
+ old_neigh = rt->n;
+ if (neigh == old_neigh)
+ goto out;
+
/*
* We have finally decided to accept it.
*/
@@ -1658,17 +1743,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
NEIGH_UPDATE_F_ISROUTER))
);
- /*
- * Redirect received -> path was valid.
- * Look, redirects are sent only in response to data packets,
- * so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
- goto out;
-
nrt = ip6_rt_copy(rt, dest);
if (!nrt)
goto out;
@@ -1678,132 +1752,25 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+ nrt->n = neigh_clone(neigh);
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
+ netevent.old_neigh = old_neigh;
netevent.new = &nrt->dst;
+ netevent.new_neigh = neigh;
+ netevent.daddr = dest;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
if (rt->rt6i_flags & RTF_CACHE) {
+ rt = (struct rt6_info *) dst_clone(&rt->dst);
ip6_del_rt(rt);
- return;
}
out:
- dst_release(&rt->dst);
-}
-
-/*
- * Handle ICMP "packet too big" messages
- * i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net *net, u32 pmtu, int ifindex)
-{
- struct rt6_info *rt, *nrt;
- int allfrag = 0;
-again:
- rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
- if (!rt)
- return;
-
- if (rt6_check_expired(rt)) {
- ip6_del_rt(rt);
- goto again;
- }
-
- if (pmtu >= dst_mtu(&rt->dst))
- goto out;
-
- if (pmtu < IPV6_MIN_MTU) {
- /*
- * According to RFC2460, PMTU is set to the IPv6 Minimum Link
- * MTU (1280) and a fragment header should always be included
- * after a node receiving Too Big message reporting PMTU is
- * less than the IPv6 Minimum Link MTU.
- */
- pmtu = IPV6_MIN_MTU;
- allfrag = 1;
- }
-
- /* New mtu received -> path was valid.
- They are sent only in response to data packets,
- so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Host route. If it is static, it would be better
- not to override it, but add new one, so that
- when cache entry will expire old pmtu
- would return automatically.
- */
- if (rt->rt6i_flags & RTF_CACHE) {
- dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&rt->dst, RTAX_FEATURES, features);
- }
- rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- rt->rt6i_flags |= RTF_MODIFIED;
- goto out;
- }
-
- /* Network route.
- Two cases are possible:
- 1. It is connected route. Action: COW
- 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
- */
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, daddr, saddr);
- else
- nrt = rt6_alloc_clone(rt, daddr);
-
- if (nrt) {
- dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
- }
-
- /* According to RFC 1981, detecting PMTU increase shouldn't be
- * happened within 5 mins, the recommended timer is 10 mins.
- * Here this route expiration time is set to ip6_rt_mtu_expires
- * which is 10 mins. After 10 mins the decreased pmtu is expired
- * and detecting PMTU increase will be automatically happened.
- */
- rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- nrt->rt6i_flags |= RTF_DYNAMIC;
- ip6_ins_rt(nrt);
- }
-out:
- dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net_device *dev, u32 pmtu)
-{
- struct net *net = dev_net(dev);
-
- /*
- * RFC 1981 states that a node "MUST reduce the size of the packets it
- * is sending along the path" that caused the Packet Too Big message.
- * Since it's not possible in the general case to determine which
- * interface was used to send the original packet, we update the MTU
- * on the interface that will be used to send future packets. We also
- * update the MTU on the interface that received the Packet Too Big in
- * case the original packet was forced out that interface with
- * SO_BINDTODEVICE or similar. This is the next best thing to the
- * correct behaviour, which would be to update the MTU on all
- * interfaces.
- */
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+ neigh_release(neigh);
}
/*
@@ -1814,8 +1781,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest)
{
struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- ort->dst.dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+ ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
@@ -2099,8 +2066,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- net->loopback_dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
int err;
if (!rt) {
@@ -2396,13 +2362,11 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 pid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
- const struct inet_peer *peer;
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
u32 table;
struct neighbour *n;
- u32 ts, tsage;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2440,10 +2404,12 @@ static int rt6_fill_node(struct net *net,
rtm->rtm_protocol = rt->rt6i_protocol;
if (rt->rt6i_flags & RTF_DYNAMIC)
rtm->rtm_protocol = RTPROT_REDIRECT;
- else if (rt->rt6i_flags & RTF_ADDRCONF)
- rtm->rtm_protocol = RTPROT_KERNEL;
- else if (rt->rt6i_flags & RTF_DEFAULT)
- rtm->rtm_protocol = RTPROT_RA;
+ else if (rt->rt6i_flags & RTF_ADDRCONF) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+ rtm->rtm_protocol = RTPROT_RA;
+ else
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ }
if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
@@ -2500,7 +2466,7 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
rcu_read_unlock();
@@ -2521,15 +2487,7 @@ static int rt6_fill_node(struct net *net,
else
expires = INT_MAX;
- peer = rt->rt6i_peer;
- ts = tsage = 0;
- if (peer && peer->tcp_ts_stamp) {
- ts = peer->tcp_ts;
- tsage = get_seconds() - peer->tcp_ts_stamp;
- }
-
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
- expires, rt->dst.error) < 0)
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2722,7 +2680,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
@@ -3007,6 +2965,31 @@ static struct pernet_operations ip6_route_net_ops = {
.exit = ip6_route_net_exit,
};
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+ struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+ if (!bp)
+ return -ENOMEM;
+ inet_peer_base_init(bp);
+ net->ipv6.peers = bp;
+ return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+ struct inet_peer_base *bp = net->ipv6.peers;
+
+ net->ipv6.peers = NULL;
+ inetpeer_invalidate_tree(bp);
+ kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+ .init = ipv6_inetpeer_init,
+ .exit = ipv6_inetpeer_exit,
+};
+
static struct pernet_operations ip6_route_net_late_ops = {
.init = ip6_route_net_init_late,
.exit = ip6_route_net_exit_late,
@@ -3032,10 +3015,14 @@ int __init ip6_route_init(void)
if (ret)
goto out_kmem_cache;
- ret = register_pernet_subsys(&ip6_route_net_ops);
+ ret = register_pernet_subsys(&ipv6_inetpeer_ops);
if (ret)
goto out_dst_entries;
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_register_inetpeer;
+
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
/* Registering of the loopback is done before this portion of code,
@@ -3088,6 +3075,8 @@ out_fib6_init:
fib6_gc_cleanup();
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
out_dst_entries:
dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
@@ -3102,6 +3091,7 @@ void ip6_route_cleanup(void)
fib6_rules_cleanup();
xfrm6_fini();
fib6_gc_cleanup();
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
unregister_pernet_subsys(&ip6_route_net_ops);
dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 60415711563f..3bd1bfc01f85 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_PORT_UNREACH:
/* Impossible event. */
return 0;
- case ICMP_FRAG_NEEDED:
- /* Soft state for pmtu is maintained by IP core. */
- return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -542,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (code != ICMP_EXC_TTL)
return 0;
break;
+ case ICMP_REDIRECT:
+ break;
}
err = -ENOENT;
@@ -551,7 +550,23 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
skb->dev,
iph->daddr,
iph->saddr);
- if (t == NULL || t->parms.iph.daddr == 0)
+ if (t == NULL)
+ goto out;
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+
+ if (t->parms.iph.daddr == 0)
goto out;
err = 0;
@@ -792,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8e951d8d3b81..bb46061c813a 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -21,9 +21,6 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-extern int sysctl_tcp_syncookies;
-extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -180,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9df64a50b075..0302ec3fecfc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
- struct inet_peer *peer = rt6_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- }
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -377,6 +363,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
+
if (type == ICMPV6_PKT_TOOBIG) {
struct dst_entry *dst;
@@ -385,41 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
goto out;
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
-
- } else
- dst_hold(dst);
+ dst = inet6_csk_update_pmtu(sk, ntohl(info));
+ if (!dst)
+ goto out;
if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
tcp_sync_mss(sk, dst_mtu(dst));
tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ }
goto out;
}
@@ -475,62 +441,43 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi6 *fl6,
+ struct request_sock *req,
struct request_values *rvp,
u16 queue_mapping)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
- struct in6_addr * final_p, final;
- struct flowi6 fl6;
- struct dst_entry *dst;
- int err;
+ int err = -ENOMEM;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- fl6.saddr = treq->loc_addr;
- fl6.flowlabel = 0;
- fl6.flowi6_oif = treq->iif;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
+ /* First, grab a route. */
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- }
+
skb = tcp_make_synack(sk, dst, req, rvp);
- err = -ENOMEM;
+
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
- fl6.daddr = treq->rmt_addr;
+ fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return err;
}
static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
+ struct flowi6 fl6;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, req, rvp, 0);
+ return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1057,6 +1004,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
bool want_cookie = false;
if (skb->protocol == htons(ETH_P_IP))
@@ -1085,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
@@ -1150,8 +1098,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->iif = inet6_iif(skb);
if (!isn) {
- struct inet_peer *peer = NULL;
-
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1176,14 +1122,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, req)) != NULL &&
- (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
- ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
- &treq->rmt_addr)) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
+ (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+ if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -1192,8 +1132,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
+ !tcp_peer_is_proven(req, dst, false)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -1215,7 +1154,7 @@ have_isn:
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, req,
+ if (tcp_v6_send_synack(sk, dst, &fl6, req,
(struct request_values *)&tmp_ext,
skb_get_queue_mapping(skb)) ||
want_cookie)
@@ -1242,10 +1181,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -1302,13 +1241,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
treq = inet6_rsk(req);
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, req);
+ dst = inet6_csk_route_req(sk, &fl6, req);
if (!dst)
goto out;
}
@@ -1371,11 +1309,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
@@ -1422,8 +1357,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
@@ -1734,42 +1667,10 @@ do_time_wait:
goto discard_it;
}
-static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
-{
- struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet_peer *peer;
-
- if (!rt ||
- !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
- peer = inet_getpeer_v6(&np->daddr, 1);
- *release_it = true;
- } else {
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
- *release_it = false;
- }
-
- return peer;
-}
-
-static void *tcp_v6_tw_get_peer(struct sock *sk)
-{
- const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
- const struct inet_timewait_sock *tw = inet_twsk(sk);
-
- if (tw->tw_family == AF_INET)
- return tcp_v4_tw_get_peer(sk);
-
- return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
-}
-
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v6_tw_get_peer,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1778,7 +1679,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.rebuild_header = inet6_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
.net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
@@ -1810,7 +1710,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.rebuild_header = inet_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -2049,6 +1948,7 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
+ .release_cb = tcp_release_cb,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f05099fc5901..99d0077b56b8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -48,6 +48,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/events/skb.h>
#include "udp_impl.h"
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
@@ -385,15 +386,16 @@ try_again:
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied );
+ msg->msg_iov, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
- if (err)
+ if (unlikely(err)) {
+ trace_kfree_skb(skb, udpv6_recvmsg);
goto out_free;
-
+ }
if (!peeked) {
if (is_udp4)
UDP_INC_STATS_USER(sock_net(sk),
@@ -479,6 +481,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk == NULL)
return;
+ if (type == ICMPV6_PKT_TOOBIG)
+ ip6_sk_update_pmtu(skb, sk, info);
+ if (type == NDISC_REDIRECT)
+ ip6_sk_redirect(skb, sk);
+
np = inet6_sk(sk);
if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8625fba96db9..ef39812107b1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -99,12 +99,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
if (!xdst->u.rt6.rt6i_idev)
return -ENODEV;
- xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
- if (rt->rt6i_peer)
- atomic_inc(&rt->rt6i_peer->refcnt);
+ rt6_transfer_peer(&xdst->u.rt6, rt);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
+ xdst->u.rt6.n = neigh_clone(rt->n);
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -208,12 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu);
+}
+
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+ path->ops->redirect(path, sk, skb);
}
static void xfrm6_dst_destroy(struct dst_entry *dst)
@@ -223,8 +232,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
- if (likely(xdst->u.rt6.rt6i_peer))
- inet_putpeer(xdst->u.rt6.rt6i_peer);
+ if (rt6_has_peer(&xdst->u.rt6)) {
+ struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
+ inet_putpeer(peer);
+ }
xfrm_dst_destroy(xdst);
}
@@ -260,6 +271,7 @@ static struct dst_ops xfrm6_dst_ops = {
.protocol = cpu_to_be16(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
+ .redirect = xfrm6_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
OpenPOWER on IntegriCloud