summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
commit3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch)
tree3df72faaacd494d5ac8c9668df4f529b1b5e4457 /net/ipv6
parente017507f37d5cb8b541df165a824958bc333bec3 (diff)
parent320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff)
downloadtalos-obmc-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.gz
talos-obmc-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller: 1) Remove the ipv4 routing cache. Now lookups go directly into the FIB trie and use prebuilt routes cached there. No more garbage collection, no more rDOS attacks on the routing cache. Instead we now get predictable and consistent performance, no matter what the pattern of traffic we service. This has been almost 2 years in the making. Special thanks to Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who have helped along the way. I'm sure that with a change of this magnitude there will be some kind of fallout, but such things ought the be simple to fix at this point. Luckily I'm not European so I'll be around all of August to fix things :-) The major stages of this work here are each fronted by a forced merge commit whose commit message contains a top-level description of the motivations and implementation issues. 2) Pre-demux of established ipv4 TCP sockets, saves a route demux on input. 3) TCP SYN/ACK performance tweaks from Eric Dumazet. 4) Add namespace support for netfilter L4 conntrack helpers, from Gao Feng. 5) Add config mechanism for Energy Efficient Ethernet to ethtool, from Yuval Mintz. 6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet. 7) Support for connection tracker helpers in userspace, from Pablo Neira Ayuso. 8) Allow userspace driven TX load balancing functions in TEAM driver, from Jiri Pirko. 9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with embedded gotos. 10) TCP Small Queues, essentially minimize the amount of TCP data queued up in the packet scheduler layer. Whereas the existing BQL (Byte Queue Limits) limits the pkt_sched --> netdevice queuing levels, this controls the TCP --> pkt_sched queueing levels. From Eric Dumazet. 11) Reduce the number of get_page/put_page ops done on SKB fragments, from Alexander Duyck. 12) Implement protection against blind resets in TCP (RFC 5961), from Eric Dumazet. 13) Support the client side of TCP Fast Open, basically the ability to send data in the SYN exchange, from Yuchung Cheng. Basically, the sender queues up data with a sendmsg() call using MSG_FASTOPEN, then they do the connect() which emits the queued up fastopen data. 14) Avoid all the problems we get into in TCP when timers or PMTU events hit a locked socket. The TCP Small Queues changes added a tcp_release_cb() that allows us to queue work up to the release_sock() caller, and that's what we use here too. From Eric Dumazet. 15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits) genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP r8169: revert "add byte queue limit support". ipv4: Change rt->rt_iif encoding. net: Make skb->skb_iif always track skb->dev ipv4: Prepare for change of rt->rt_iif encoding. ipv4: Remove all RTCF_DIRECTSRC handliing. ipv4: Really ignore ICMP address requests/replies. decnet: Don't set RTCF_DIRECTSRC. net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse. ipv4: Remove redundant assignment rds: set correct msg_namelen openvswitch: potential NULL deref in sample() tcp: dont drop MTU reduction indications bnx2x: Add new 57840 device IDs tcp: avoid oops in tcp_metrics and reset tcpm_stamp niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value niu: Fix to check for dma mapping errors. net: Fix references to out-of-scope variables in put_cmsg_compat() net: ethernet: davinci_emac: add pm_runtime support net: ethernet: davinci_emac: Remove unnecessary #include ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/ah6.c11
-rw-r--r--net/ipv6/esp6.c11
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/icmp.c23
-rw-r--r--net/ipv6/inet6_connection_sock.c103
-rw-r--r--net/ipv6/ip6_fib.c5
-rw-r--r--net/ipv6/ip6_input.c9
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c96
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipcomp6.c11
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/ndisc.c129
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c131
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c51
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/route.c538
-rw-r--r--net/ipv6/sit.c25
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c204
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/xfrm6_policy.c26
24 files changed, 717 insertions, 766 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8f6411c97189..79181819a24f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -63,6 +63,7 @@
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/string.h>
+#include <linux/hash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -579,15 +580,9 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
list_add_tail(&ifp->if_list, p);
}
-static u32 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 inet6_addr_hash(const struct in6_addr *addr)
{
- /*
- * We perform the hash function over the last 64 bits of the address
- * This will include the IEEE address token on links that support it.
- */
- return jhash_2words((__force u32)addr->s6_addr32[2],
- (__force u32)addr->s6_addr32[3], 0)
- & (IN6_ADDR_HSIZE - 1);
+ return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
}
/* On success it returns ifp with increased reference count */
@@ -662,7 +657,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
in6_ifa_hold(ifa);
/* Add to big hash table */
- hash = ipv6_addr_hash(addr);
+ hash = inet6_addr_hash(addr);
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
spin_unlock(&addrconf_hash_lock);
@@ -1270,7 +1265,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
struct hlist_node *node;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1293,7 +1288,7 @@ EXPORT_SYMBOL(ipv6_chk_addr);
static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev)
{
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
struct inet6_ifaddr *ifp;
struct hlist_node *node;
@@ -1336,7 +1331,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp, *result = NULL;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
struct hlist_node *node;
rcu_read_lock_bh();
@@ -3223,7 +3218,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
struct inet6_ifaddr *ifp = NULL;
struct hlist_node *n;
- unsigned int hash = ipv6_addr_hash(addr);
+ unsigned int hash = inet6_addr_hash(addr);
rcu_read_lock_bh();
hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f1a4a2c28ed3..7e6139508ee7 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -35,6 +35,7 @@
#include <linux/pfkeyv2.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -612,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct xfrm_state *x;
if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
return;
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
- ntohl(ah->spi), &iph->daddr);
-
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index db1521fcda5b..6dc7fd353ef5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -39,6 +39,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -433,15 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct xfrm_state *x;
if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG)
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
return;
- pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n",
- ntohl(esph->spi), &iph->daddr);
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6447dc49429f..fa3d9c328092 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -791,14 +791,14 @@ static int ipv6_renew_option(void *ohdr,
if (ohdr) {
memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
*hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
+ *p += CMSG_ALIGN(ipv6_optlen(*hdr));
}
} else {
if (newopt) {
if (copy_from_user(*p, newopt, newoptlen))
return -EFAULT;
*hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
+ if (ipv6_optlen(*hdr) > newoptlen)
return -EINVAL;
*p += CMSG_ALIGN(newoptlen);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 091a2971c7b7..24d69dbca4d6 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -188,14 +188,16 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
int tmo = net->ipv6.sysctl.icmpv6_time;
+ struct inet_peer *peer;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ res = inet_peer_xrlim_allow(peer, tmo);
+ if (peer)
+ inet_putpeer(peer);
}
dst_release(dst);
return res;
@@ -596,13 +598,12 @@ out:
icmpv6_xmit_unlock(sk);
}
-static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
{
const struct inet6_protocol *ipprot;
int inner_offset;
- int hash;
- u8 nexthdr;
__be16 frag_off;
+ u8 nexthdr;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return;
@@ -629,10 +630,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- hash = nexthdr & (MAX_INET_PROTOS - 1);
-
rcu_read_lock();
- ipprot = rcu_dereference(inet6_protos[hash]);
+ ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
rcu_read_unlock();
@@ -649,7 +648,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
- const struct ipv6hdr *orig_hdr;
struct icmp6hdr *hdr;
u8 type;
@@ -661,7 +659,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
XFRM_STATE_ICMP))
goto drop_no_count;
- if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+ if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
nh = skb_network_offset(skb);
@@ -722,9 +720,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto discard_it;
hdr = icmp6_hdr(skb);
- orig_hdr = (struct ipv6hdr *) (hdr + 1);
- rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
- ntohl(hdr->icmp6_mtu));
/*
* Drop through to notify
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e6cee5292a0b..0251a6005be8 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
struct dst_entry *inet6_csk_route_req(struct sock *sk,
+ struct flowi6 *fl6,
const struct request_sock *req)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
- struct flowi6 fl6;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = treq->loc_addr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->daddr = treq->rmt_addr;
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ fl6->saddr = treq->loc_addr;
+ fl6->flowi6_oif = treq->iif;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_dport = inet_rsk(req)->rmt_port;
+ fl6->fl6_sport = inet_rsk(req)->loc_port;
+ security_req_classify_flow(req, flowi6_to_flowi(fl6));
+
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
if (IS_ERR(dst))
return NULL;
@@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
__ip6_dst_store(sk, dst, daddr, saddr);
@@ -203,43 +204,52 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
+ struct flowi6 *fl6)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
- struct dst_entry *dst;
struct in6_addr *final_p, final;
- int res;
+ struct dst_entry *dst;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl6.flowlabel);
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_sport = inet->inet_sport;
- fl6.fl6_dport = inet->inet_dport;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->daddr = np->daddr;
+ fl6->saddr = np->saddr;
+ fl6->flowlabel = np->flow_label;
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_sport = inet->inet_sport;
+ fl6->fl6_dport = inet->inet_dport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+ if (!dst) {
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
- if (dst == NULL) {
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (!IS_ERR(dst))
+ __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ }
+ return dst;
+}
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- sk->sk_route_caps = 0;
- kfree_skb(skb);
- return PTR_ERR(dst);
- }
+int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+{
+ struct sock *sk = skb->sk;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int res;
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ dst = inet6_csk_route_socket(sk, &fl6);
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
}
rcu_read_lock();
@@ -253,3 +263,16 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
return res;
}
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
+
+struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
+
+ if (IS_ERR(dst))
+ return NULL;
+ dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+ return inet6_csk_route_socket(sk, &fl6);
+}
+EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 608327661960..13690d650c3e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -197,6 +197,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table->tb6_id = id;
table->tb6_root.leaf = net->ipv6.ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&table->tb6_peers);
}
return table;
@@ -1633,6 +1634,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -1643,6 +1645,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
#endif
fib6_tables_init(net);
@@ -1666,8 +1669,10 @@ static void fib6_net_exit(struct net *net)
del_timer_sync(&net->ipv6.ip6_fib_timer);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
kfree(net->ipv6.fib6_local_tbl);
#endif
+ inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
kfree(net->ipv6.fib6_main_tbl);
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 21a15dfe4a9e..5ab923e51af3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,13 +168,12 @@ drop:
static int ip6_input_finish(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
+ struct inet6_dev *idev;
unsigned int nhoff;
int nexthdr;
bool raw;
- u8 hash;
- struct inet6_dev *idev;
- struct net *net = dev_net(skb_dst(skb)->dev);
/*
* Parse extension headers
@@ -189,9 +188,7 @@ resubmit:
nexthdr = skb_network_header(skb)[nhoff];
raw = raw6_local_deliver(skb, nexthdr);
-
- hash = nexthdr & (MAX_INET_PROTOS - 1);
- if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+ if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) {
int ret;
if (ipprot->flags & INET6_PROTO_FINAL) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index decc21d19c53..5b2d63ed793e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -83,24 +83,12 @@ int ip6_local_out(struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ip6_local_out);
-/* dev_loopback_xmit for use with netfilter. */
-static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
-{
- skb_reset_mac_header(newskb);
- __skb_pull(newskb, skb_network_offset(newskb));
- newskb->pkt_type = PACKET_LOOPBACK;
- newskb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(newskb));
-
- netif_rx_ni(newskb);
- return 0;
-}
-
static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct neighbour *neigh;
+ struct rt6_info *rt;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -121,7 +109,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
newskb, NULL, newskb->dev,
- ip6_dev_loopback_xmit);
+ dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
IP6_INC_STATS(dev_net(dev), idev,
@@ -136,9 +124,10 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
+ rt = (struct rt6_info *) dst;
+ neigh = rt->n;
if (neigh) {
- int res = neigh_output(neigh, skb);
+ int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock();
return res;
@@ -463,6 +452,7 @@ int ip6_forward(struct sk_buff *skb)
*/
if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
+ struct inet_peer *peer;
struct rt6_info *rt;
/*
@@ -476,14 +466,15 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
- if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
+ if (peer)
+ inet_putpeer(peer);
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -604,12 +595,13 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
if (rt && !(rt->dst.flags & DST_NOPEER)) {
struct inet_peer *peer;
+ struct net *net;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
if (peer) {
fhdr->identification = htonl(inet_getid(peer, 0));
+ inet_putpeer(peer);
return;
}
}
@@ -960,6 +952,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
+ struct rt6_info *rt;
#endif
int err;
@@ -988,7 +981,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry of the nexthop router
*/
rcu_read_lock();
- n = dst_get_neighbour_noref(*dst);
+ rt = (struct rt6_info *) *dst;
+ n = rt->n;
if (n && !(n->nud_state & NUD_VALID)) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c9015fad8d65..9a1d5fe6aef8 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -40,6 +40,7 @@
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
+#include <linux/hash.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -70,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0");
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT 20
-#define HASH_SIZE 32
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
-#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
- (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
- (HASH_SIZE - 1))
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
@@ -166,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
{
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(local);
+ unsigned int hash = HASH(remote, local);
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
@@ -205,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
prio = 1;
- h = HASH(remote) ^ HASH(local);
+ h = HASH(remote, local);
}
return &ip6n->tnls[prio][h];
}
@@ -252,7 +256,7 @@ static void ip6_dev_free(struct net_device *dev)
}
/**
- * ip6_tnl_create() - create a new tunnel
+ * ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
* @pt: pointer to new tunnel
*
@@ -550,6 +554,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
+ case NDISC_REDIRECT:
+ rel_type = ICMP_REDIRECT;
+ rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
@@ -606,8 +613,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
}
+ if (rel_type == ICMP_REDIRECT)
+ skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -684,24 +693,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
+static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ltype = ipv6_addr_type(laddr);
+ int rtype = ipv6_addr_type(raddr);
+ __u32 flags = 0;
+
+ if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
+ flags = IP6_TNL_F_CAP_PER_PACKET;
+ } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+ (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ flags |= IP6_TNL_F_CAP_RCV;
+ }
+ return flags;
+}
+
/* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
{
struct ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
- if (p->flags & IP6_TNL_F_CAP_RCV) {
+ if ((p->flags & IP6_TNL_F_CAP_RCV) ||
+ ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+ (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
struct net_device *ldev = NULL;
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if ((ipv6_addr_is_multicast(&p->laddr) ||
- likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ if ((ipv6_addr_is_multicast(laddr) ||
+ likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
ret = 1;
-
}
return ret;
}
@@ -740,7 +775,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
goto discard;
}
- if (!ip6_tnl_rcv_ctl(t)) {
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
t->dev->stats.rx_dropped++;
rcu_read_unlock();
goto discard;
@@ -921,7 +956,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
*pmtu = mtu;
err = -EMSGSIZE;
@@ -1114,25 +1149,6 @@ tx_err:
return NETDEV_TX_OK;
}
-static void ip6_tnl_set_cap(struct ip6_tnl *t)
-{
- struct ip6_tnl_parm *p = &t->parms;
- int ltype = ipv6_addr_type(&p->laddr);
- int rtype = ipv6_addr_type(&p->raddr);
-
- p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
-
- if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
- (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
- if (ltype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_XMIT;
- if (rtype&IPV6_ADDR_UNICAST)
- p->flags |= IP6_TNL_F_CAP_RCV;
- }
-}
-
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
@@ -1153,7 +1169,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
- ip6_tnl_set_cap(t);
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
dev->flags |= IFF_POINTOPOINT;
@@ -1438,6 +1455,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
+
+ ip6_tnl_link_config(t);
+
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 461e47c8e956..4532973f0dd4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2104,8 +2104,9 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (c->mf6c_parent >= MAXMIFS)
return -ENOENT;
- if (MIF_EXISTS(mrt, c->mf6c_parent))
- RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
+ if (MIF_EXISTS(mrt, c->mf6c_parent) &&
+ nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5cb75bfe45b1..7af5aee75d98 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -46,6 +46,7 @@
#include <linux/list.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -63,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
return;
spi = htonl(ntohs(ipcomph->cpi));
@@ -72,8 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!x)
return;
- pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n",
- spi, &iph->daddr);
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, 0, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6d0f5dc8e3a6..92f8e48e4ba4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -211,6 +211,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_mc_socklist __rcu **lnk;
struct net *net = sock_net(sk);
+ if (!ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
spin_lock(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list;
(mc_lst = rcu_dereference_protected(*lnk,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 54f62d3b8dd6..ff36194a71aa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -143,40 +143,6 @@ struct neigh_table nd_tbl = {
.gc_thresh3 = 1024,
};
-/* ND options */
-struct ndisc_options {
- struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
-#ifdef CONFIG_IPV6_ROUTE_INFO
- struct nd_opt_hdr *nd_opts_ri;
- struct nd_opt_hdr *nd_opts_ri_end;
-#endif
- struct nd_opt_hdr *nd_useropts;
- struct nd_opt_hdr *nd_useropts_end;
-};
-
-#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
-#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
-#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
-#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
-#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
-#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
-
-#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-
-/*
- * Return the padding between the option length and the start of the
- * link addr. Currently only IP-over-InfiniBand needs this, although
- * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
- * also need a pad of 2.
- */
-static int ndisc_addr_option_pad(unsigned short type)
-{
- switch (type) {
- case ARPHRD_INFINIBAND: return 2;
- default: return 0;
- }
-}
-
static inline int ndisc_opt_addr_space(struct net_device *dev)
{
return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
@@ -233,8 +199,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
}
-static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
- struct ndisc_options *ndopts)
+struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+ struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -297,17 +263,6 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
return ndopts;
}
-static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
- struct net_device *dev)
-{
- u8 *lladdr = (u8 *)(p + 1);
- int lladdrlen = p->nd_opt_len << 3;
- int prepad = ndisc_addr_option_pad(dev->type);
- if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
- return NULL;
- return lladdr + prepad;
-}
-
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
{
switch (dev->type) {
@@ -1379,16 +1334,6 @@ out:
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
- struct inet6_dev *in6_dev;
- struct icmp6hdr *icmph;
- const struct in6_addr *dest;
- const struct in6_addr *target; /* new first hop to destination */
- struct neighbour *neigh;
- int on_link = 0;
- struct ndisc_options ndopts;
- int optlen;
- u8 *lladdr = NULL;
-
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
@@ -1405,65 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- optlen = skb->tail - skb->transport_header;
- optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
-
- if (optlen < 0) {
- ND_PRINTK(2, warn, "Redirect: packet too short\n");
- return;
- }
-
- icmph = icmp6_hdr(skb);
- target = (const struct in6_addr *) (icmph + 1);
- dest = target + 1;
-
- if (ipv6_addr_is_multicast(dest)) {
- ND_PRINTK(2, warn,
- "Redirect: destination address is multicast\n");
- return;
- }
-
- if (ipv6_addr_equal(dest, target)) {
- on_link = 1;
- } else if (ipv6_addr_type(target) !=
- (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK(2, warn,
- "Redirect: target address is not link-local unicast\n");
- return;
- }
-
- in6_dev = __in6_dev_get(skb->dev);
- if (!in6_dev)
- return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
- return;
-
- /* RFC2461 8.1:
- * The IP source address of the Redirect MUST be the same as the current
- * first-hop router for the specified ICMP Destination Address.
- */
-
- if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
- ND_PRINTK(2, warn, "Redirect: invalid ND options\n");
- return;
- }
- if (ndopts.nd_opts_tgt_lladdr) {
- lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
- skb->dev);
- if (!lladdr) {
- ND_PRINTK(2, warn,
- "Redirect: invalid link-layer address length\n");
- return;
- }
- }
-
- neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
- if (neigh) {
- rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr, neigh, lladdr,
- on_link);
- neigh_release(neigh);
- }
+ icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
@@ -1472,6 +1359,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.ndisc_sk;
int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ struct inet_peer *peer;
struct sk_buff *buff;
struct icmp6hdr *icmph;
struct in6_addr saddr_buf;
@@ -1485,6 +1373,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
int rd_len;
int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
@@ -1518,9 +1407,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ ret = inet_peer_xrlim_allow(peer, 1*HZ);
+ if (peer)
+ inet_putpeer(peer);
+ if (!ret)
goto release;
if (dev->addr_len) {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3224ef90a21a..4794f96cf2e0 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv6_helper(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
const struct nf_conn_help *help;
@@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- goto out;
+ return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
- goto out;
+ return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
- goto out;
+ return NF_ACCEPT;
protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
skb->len - extoff);
@@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
}
ret = helper->help(skb, protoff, ct, ctinfo);
- if (ret != NF_ACCEPT) {
+ if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
"nf_ct_%s: dropping packet", helper->name);
- return ret;
}
-out:
+ return ret;
+}
+
+static unsigned int ipv6_confirm(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
@@ -254,6 +261,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_CONNTRACK,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -261,6 +275,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
.priority = NF_IP6_PRI_LAST,
},
{
+ .hook = ipv6_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv6_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
@@ -333,37 +354,75 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
-static int __init nf_conntrack_l3proto_ipv6_init(void)
+static int ipv6_net_init(struct net *net)
{
int ret = 0;
- need_conntrack();
- nf_defrag_ipv6_enable();
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_tcp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register tcp.\n");
- return ret;
+ printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
+ goto out;
}
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_udp6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register udp.\n");
- goto cleanup_tcp;
+ printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
+ goto cleanup_tcp6;
}
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_icmpv6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
- goto cleanup_udp;
+ printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
+ goto cleanup_udp6;
}
-
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ ret = nf_conntrack_l3proto_register(net,
+ &nf_conntrack_l3proto_ipv6);
if (ret < 0) {
- pr_err("nf_conntrack_ipv6: can't register ipv6\n");
+ printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
goto cleanup_icmpv6;
}
+ return 0;
+ cleanup_icmpv6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmpv6);
+ cleanup_udp6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp6);
+ cleanup_tcp6:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp6);
+ out:
+ return ret;
+}
+static void ipv6_net_exit(struct net *net)
+{
+ nf_conntrack_l3proto_unregister(net,
+ &nf_conntrack_l3proto_ipv6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmpv6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp6);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp6);
+}
+
+static struct pernet_operations ipv6_net_ops = {
+ .init = ipv6_net_init,
+ .exit = ipv6_net_exit,
+};
+
+static int __init nf_conntrack_l3proto_ipv6_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv6_enable();
+
+ ret = register_pernet_subsys(&ipv6_net_ops);
+ if (ret < 0)
+ goto cleanup_pernet;
ret = nf_register_hooks(ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
if (ret < 0) {
@@ -374,13 +433,8 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
return ret;
cleanup_ipv6:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- cleanup_icmpv6:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ unregister_pernet_subsys(&ipv6_net_ops);
+ cleanup_pernet:
return ret;
}
@@ -388,10 +442,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
{
synchronize_net();
nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ unregister_pernet_subsys(&ipv6_net_ops);
}
module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3e81904fbbcd..2d54b2061d68 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -29,6 +29,11 @@
static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6;
+}
+
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
@@ -90,7 +95,7 @@ static int icmpv6_print_tuple(struct seq_file *s,
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
- return &nf_ct_icmpv6_timeout;
+ return &icmpv6_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -281,16 +286,18 @@ static int icmpv6_nlattr_tuple_size(void)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_cttimeout.h>
-static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
{
unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmpv6_pernet(net);
if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
} else {
/* Set default ICMPv6 timeout. */
- *timeout = nf_ct_icmpv6_timeout;
+ *timeout = in->timeout;
}
return 0;
}
@@ -315,11 +322,9 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *icmpv6_sysctl_header;
static struct ctl_table icmpv6_sysctl_table[] = {
{
.procname = "nf_conntrack_icmpv6_timeout",
- .data = &nf_ct_icmpv6_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -328,6 +333,36 @@ static struct ctl_table icmpv6_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+ sizeof(icmpv6_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmpv6_timeout;
+
+ return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
@@ -355,8 +390,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
.nla_policy = icmpv6_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
- .ctl_table_header = &icmpv6_sysctl_header,
- .ctl_table = icmpv6_sysctl_table,
-#endif
+ .init_net = icmpv6_init_net,
+ .get_net_proto = icmpv6_get_net_proto,
};
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9a7978fdc02a..053082dfc93e 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int hash = protocol & (MAX_INET_PROTOS - 1);
-
- return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet6_add_protocol);
@@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol);
int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
- int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+ int ret;
- ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol],
prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 93d69836fded..ef0579d5bca6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (MAX_INET_PROTOS - 1);
+ hash = nexthdr & (RAW_HTABLE_SIZE - 1);
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
{
struct sock *raw_sk;
- raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+ raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
raw_sk = NULL;
@@ -328,9 +328,12 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
return;
harderr = icmpv6_err_convert(type, code, &err);
- if (type == ICMPV6_PKT_TOOBIG)
+ if (type == ICMPV6_PKT_TOOBIG) {
+ ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
-
+ }
+ if (type == NDISC_REDIRECT)
+ ip6_sk_redirect(skb, sk);
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index becb048d18d4..cf02cb97bbdd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -99,10 +102,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
if (!(rt->dst.flags & DST_HOST))
return NULL;
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
-
- peer = rt->rt6i_peer;
+ peer = rt6_get_peer_create(rt);
if (peer) {
u32 *old_p = __DST_METRICS_PTR(old);
unsigned long prev, new;
@@ -123,21 +123,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct in6_addr *p = &rt->rt6i_gateway;
if (!ipv6_addr_any(p))
return (const void *) p;
+ else if (skb)
+ return &ipv6_hdr(skb)->daddr;
return daddr;
}
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct rt6_info *rt = (struct rt6_info *) dst;
struct neighbour *n;
- daddr = choose_neigh_daddr(rt, daddr);
+ daddr = choose_neigh_daddr(rt, skb, daddr);
n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
if (n)
return n;
@@ -152,7 +158,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
if (IS_ERR(n))
return PTR_ERR(n);
}
- dst_set_neighbour(&rt->dst, n);
+ rt->n = n;
return 0;
}
@@ -171,6 +177,7 @@ static struct dst_ops ip6_dst_ops_template = {
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
+ .redirect = rt6_do_redirect,
.local_out = __ip6_local_out,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -182,7 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
return mtu ? : dst->dev->mtu;
}
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
@@ -200,6 +213,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.mtu = ip6_blackhole_mtu,
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
+ .redirect = ip6_rt_blackhole_redirect,
.cow_metrics = ip6_rt_blackhole_cow_metrics,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -261,16 +275,20 @@ static struct rt6_info ip6_blk_hole_entry_template = {
#endif
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct net_device *dev,
- int flags)
+ int flags,
+ struct fib6_table *table)
{
- struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+ struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+ 0, DST_OBSOLETE_NONE, flags);
- if (rt)
- memset(&rt->rt6i_table, 0,
- sizeof(*rt) - sizeof(struct dst_entry));
+ if (rt) {
+ struct dst_entry *dst = &rt->dst;
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+ }
return rt;
}
@@ -278,7 +296,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
- struct inet_peer *peer = rt->rt6i_peer;
+
+ if (rt->n)
+ neigh_release(rt->n);
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
@@ -291,8 +311,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
dst_release(dst->from);
- if (peer) {
- rt->rt6i_peer = NULL;
+ if (rt6_has_peer(rt)) {
+ struct inet_peer *peer = rt6_peer_ptr(rt);
inet_putpeer(peer);
}
}
@@ -306,13 +326,20 @@ static u32 rt6_peer_genid(void)
void rt6_bind_peer(struct rt6_info *rt, int create)
{
+ struct inet_peer_base *base;
struct inet_peer *peer;
- peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
- if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
- inet_putpeer(peer);
- else
- rt->rt6i_peer_genid = rt6_peer_genid();
+ base = inetpeer_base_ptr(rt->_rt6i_peer);
+ if (!base)
+ return;
+
+ peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+ if (peer) {
+ if (!rt6_set_peer(rt, peer))
+ inet_putpeer(peer);
+ else
+ rt->rt6i_peer_genid = rt6_peer_genid();
+ }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -323,12 +350,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
+ if (dev != loopback_dev) {
+ if (idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
+ if (rt->n && rt->n->dev == dev) {
+ rt->n->dev = loopback_dev;
+ dev_hold(loopback_dev);
+ dev_put(dev);
}
}
}
@@ -418,7 +452,7 @@ static void rt6_probe(struct rt6_info *rt)
* to no more than one per minute.
*/
rcu_read_lock();
- neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+ neigh = rt ? rt->n : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
goto out;
read_lock_bh(&neigh->lock);
@@ -465,7 +499,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
int m;
rcu_read_lock();
- neigh = dst_get_neighbour_noref(&rt->dst);
+ neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -812,7 +846,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
if (rt) {
rt->rt6i_flags |= RTF_CACHE;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+ rt->n = neigh_clone(ort->n);
}
return rt;
}
@@ -846,7 +880,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -931,6 +965,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
{
int flags = 0;
+ fl6->flowi6_iif = net->loopback_dev->ifindex;
+
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
@@ -949,12 +985,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
+ rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
- memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
-
new = &rt->dst;
+ memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_init_peer(rt, net->ipv6.peers);
+
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
@@ -996,7 +1033,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
if (rt->rt6i_peer_genid != rt6_peer_genid()) {
- if (!rt->rt6i_peer)
+ if (!rt6_has_peer(rt))
rt6_bind_peer(rt, 0);
rt->rt6i_peer_genid = rt6_peer_genid();
}
@@ -1038,11 +1075,15 @@ static void ip6_link_failure(struct sk_buff *skb)
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
+ dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ struct net *net = dev_net(dst->dev);
+
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1092,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
dst_metric_set(dst, RTAX_FEATURES, features);
}
dst_metric_set(dst, RTAX_MTU, mtu);
+ rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+ int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+ ip6_update_pmtu(skb, sock_net(sk), mtu,
+ sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+ const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
@@ -1110,7 +1208,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+ rt = ip6_dst_alloc(net, dev, 0, NULL);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1120,7 +1218,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (neigh)
neigh_hold(neigh);
else {
- neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
if (IS_ERR(neigh)) {
in6_dev_put(idev);
dst_free(&rt->dst);
@@ -1130,7 +1228,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
@@ -1292,7 +1390,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+ rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1546,107 +1644,94 @@ static int ip6_route_del(struct fib6_config *cfg)
return err;
}
-/*
- * Handle redirects
- */
-struct ip6rd_flowi {
- struct flowi6 fl6;
- struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
- struct fib6_table *table,
- struct flowi6 *fl6,
- int flags)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
- struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
- struct fib6_node *fn;
+ struct net *net = dev_net(skb->dev);
+ struct netevent_redirect netevent;
+ struct rt6_info *rt, *nrt = NULL;
+ const struct in6_addr *target;
+ struct ndisc_options ndopts;
+ const struct in6_addr *dest;
+ struct neighbour *old_neigh;
+ struct inet6_dev *in6_dev;
+ struct neighbour *neigh;
+ struct icmp6hdr *icmph;
+ int optlen, on_link;
+ u8 *lladdr;
- /*
- * Get the "current" route for this destination and
- * check if the redirect has come from approriate router.
- *
- * RFC 2461 specifies that redirects should only be
- * accepted if they come from the nexthop to the target.
- * Due to the way the routes are chosen, this notion
- * is a bit fuzzy and one might need to check all possible
- * routes.
- */
+ optlen = skb->tail - skb->transport_header;
+ optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
- read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
-restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- /*
- * Current route is on-link; redirect is always invalid.
- *
- * Seems, previous statement is not true. It could
- * be node, which looks for us as on-link (f.e. proxy ndisc)
- * But then router serving it might decide, that we should
- * know truth 8)8) --ANK (980726).
- */
- if (rt6_check_expired(rt))
- continue;
- if (!(rt->rt6i_flags & RTF_GATEWAY))
- continue;
- if (fl6->flowi6_oif != rt->dst.dev->ifindex)
- continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
- continue;
- break;
+ if (optlen < 0) {
+ net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
+ return;
}
- if (!rt)
- rt = net->ipv6.ip6_null_entry;
- BACKTRACK(net, &fl6->saddr);
-out:
- dst_hold(&rt->dst);
-
- read_unlock_bh(&table->tb6_lock);
-
- return rt;
-};
+ icmph = icmp6_hdr(skb);
+ target = (const struct in6_addr *) (icmph + 1);
+ dest = target + 1;
-static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
- const struct in6_addr *src,
- const struct in6_addr *gateway,
- struct net_device *dev)
-{
- int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct net *net = dev_net(dev);
- struct ip6rd_flowi rdfl = {
- .fl6 = {
- .flowi6_oif = dev->ifindex,
- .daddr = *dest,
- .saddr = *src,
- },
- };
+ if (ipv6_addr_is_multicast(dest)) {
+ net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
+ return;
+ }
- rdfl.gateway = *gateway;
+ on_link = 0;
+ if (ipv6_addr_equal(dest, target)) {
+ on_link = 1;
+ } else if (ipv6_addr_type(target) !=
+ (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+ net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
+ return;
+ }
- if (rt6_need_strict(dest))
- flags |= RT6_LOOKUP_F_IFACE;
+ in6_dev = __in6_dev_get(skb->dev);
+ if (!in6_dev)
+ return;
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ return;
- return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
- flags, __ip6_route_redirect);
-}
+ /* RFC2461 8.1:
+ * The IP source address of the Redirect MUST be the same as the current
+ * first-hop router for the specified ICMP Destination Address.
+ */
-void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
- const struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
-{
- struct rt6_info *rt, *nrt = NULL;
- struct netevent_redirect netevent;
- struct net *net = dev_net(neigh->dev);
+ if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+ net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+ return;
+ }
- rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+ lladdr = NULL;
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+ return;
+ }
+ }
+ rt = (struct rt6_info *) dst;
if (rt == net->ipv6.ip6_null_entry) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
- goto out;
+ return;
}
+ /* Redirect received -> path was valid.
+ * Look, redirects are sent only in response to data packets,
+ * so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->dst);
+
+ neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ if (!neigh)
+ return;
+
+ /* Duplicate redirect: silently ignore. */
+ old_neigh = rt->n;
+ if (neigh == old_neigh)
+ goto out;
+
/*
* We have finally decided to accept it.
*/
@@ -1658,17 +1743,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
NEIGH_UPDATE_F_ISROUTER))
);
- /*
- * Redirect received -> path was valid.
- * Look, redirects are sent only in response to data packets,
- * so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
- goto out;
-
nrt = ip6_rt_copy(rt, dest);
if (!nrt)
goto out;
@@ -1678,132 +1752,25 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+ nrt->n = neigh_clone(neigh);
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
+ netevent.old_neigh = old_neigh;
netevent.new = &nrt->dst;
+ netevent.new_neigh = neigh;
+ netevent.daddr = dest;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
if (rt->rt6i_flags & RTF_CACHE) {
+ rt = (struct rt6_info *) dst_clone(&rt->dst);
ip6_del_rt(rt);
- return;
}
out:
- dst_release(&rt->dst);
-}
-
-/*
- * Handle ICMP "packet too big" messages
- * i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net *net, u32 pmtu, int ifindex)
-{
- struct rt6_info *rt, *nrt;
- int allfrag = 0;
-again:
- rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
- if (!rt)
- return;
-
- if (rt6_check_expired(rt)) {
- ip6_del_rt(rt);
- goto again;
- }
-
- if (pmtu >= dst_mtu(&rt->dst))
- goto out;
-
- if (pmtu < IPV6_MIN_MTU) {
- /*
- * According to RFC2460, PMTU is set to the IPv6 Minimum Link
- * MTU (1280) and a fragment header should always be included
- * after a node receiving Too Big message reporting PMTU is
- * less than the IPv6 Minimum Link MTU.
- */
- pmtu = IPV6_MIN_MTU;
- allfrag = 1;
- }
-
- /* New mtu received -> path was valid.
- They are sent only in response to data packets,
- so that this nexthop apparently is reachable. --ANK
- */
- dst_confirm(&rt->dst);
-
- /* Host route. If it is static, it would be better
- not to override it, but add new one, so that
- when cache entry will expire old pmtu
- would return automatically.
- */
- if (rt->rt6i_flags & RTF_CACHE) {
- dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&rt->dst, RTAX_FEATURES, features);
- }
- rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- rt->rt6i_flags |= RTF_MODIFIED;
- goto out;
- }
-
- /* Network route.
- Two cases are possible:
- 1. It is connected route. Action: COW
- 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
- */
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, daddr, saddr);
- else
- nrt = rt6_alloc_clone(rt, daddr);
-
- if (nrt) {
- dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
- if (allfrag) {
- u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
- features |= RTAX_FEATURE_ALLFRAG;
- dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
- }
-
- /* According to RFC 1981, detecting PMTU increase shouldn't be
- * happened within 5 mins, the recommended timer is 10 mins.
- * Here this route expiration time is set to ip6_rt_mtu_expires
- * which is 10 mins. After 10 mins the decreased pmtu is expired
- * and detecting PMTU increase will be automatically happened.
- */
- rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
- nrt->rt6i_flags |= RTF_DYNAMIC;
- ip6_ins_rt(nrt);
- }
-out:
- dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct net_device *dev, u32 pmtu)
-{
- struct net *net = dev_net(dev);
-
- /*
- * RFC 1981 states that a node "MUST reduce the size of the packets it
- * is sending along the path" that caused the Packet Too Big message.
- * Since it's not possible in the general case to determine which
- * interface was used to send the original packet, we update the MTU
- * on the interface that will be used to send future packets. We also
- * update the MTU on the interface that received the Packet Too Big in
- * case the original packet was forced out that interface with
- * SO_BINDTODEVICE or similar. This is the next best thing to the
- * correct behaviour, which would be to update the MTU on all
- * interfaces.
- */
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
- rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+ neigh_release(neigh);
}
/*
@@ -1814,8 +1781,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest)
{
struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- ort->dst.dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+ ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
@@ -2099,8 +2066,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- net->loopback_dev, 0);
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
int err;
if (!rt) {
@@ -2396,13 +2362,11 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 pid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
- const struct inet_peer *peer;
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
u32 table;
struct neighbour *n;
- u32 ts, tsage;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2440,10 +2404,12 @@ static int rt6_fill_node(struct net *net,
rtm->rtm_protocol = rt->rt6i_protocol;
if (rt->rt6i_flags & RTF_DYNAMIC)
rtm->rtm_protocol = RTPROT_REDIRECT;
- else if (rt->rt6i_flags & RTF_ADDRCONF)
- rtm->rtm_protocol = RTPROT_KERNEL;
- else if (rt->rt6i_flags & RTF_DEFAULT)
- rtm->rtm_protocol = RTPROT_RA;
+ else if (rt->rt6i_flags & RTF_ADDRCONF) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+ rtm->rtm_protocol = RTPROT_RA;
+ else
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ }
if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
@@ -2500,7 +2466,7 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
rcu_read_unlock();
@@ -2521,15 +2487,7 @@ static int rt6_fill_node(struct net *net,
else
expires = INT_MAX;
- peer = rt->rt6i_peer;
- ts = tsage = 0;
- if (peer && peer->tcp_ts_stamp) {
- ts = peer->tcp_ts;
- tsage = get_seconds() - peer->tcp_ts_stamp;
- }
-
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
- expires, rt->dst.error) < 0)
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -2722,7 +2680,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
@@ -3007,6 +2965,31 @@ static struct pernet_operations ip6_route_net_ops = {
.exit = ip6_route_net_exit,
};
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+ struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+ if (!bp)
+ return -ENOMEM;
+ inet_peer_base_init(bp);
+ net->ipv6.peers = bp;
+ return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+ struct inet_peer_base *bp = net->ipv6.peers;
+
+ net->ipv6.peers = NULL;
+ inetpeer_invalidate_tree(bp);
+ kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+ .init = ipv6_inetpeer_init,
+ .exit = ipv6_inetpeer_exit,
+};
+
static struct pernet_operations ip6_route_net_late_ops = {
.init = ip6_route_net_init_late,
.exit = ip6_route_net_exit_late,
@@ -3032,10 +3015,14 @@ int __init ip6_route_init(void)
if (ret)
goto out_kmem_cache;
- ret = register_pernet_subsys(&ip6_route_net_ops);
+ ret = register_pernet_subsys(&ipv6_inetpeer_ops);
if (ret)
goto out_dst_entries;
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_register_inetpeer;
+
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
/* Registering of the loopback is done before this portion of code,
@@ -3088,6 +3075,8 @@ out_fib6_init:
fib6_gc_cleanup();
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
out_dst_entries:
dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
@@ -3102,6 +3091,7 @@ void ip6_route_cleanup(void)
fib6_rules_cleanup();
xfrm6_fini();
fib6_gc_cleanup();
+ unregister_pernet_subsys(&ipv6_inetpeer_ops);
unregister_pernet_subsys(&ip6_route_net_ops);
dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 60415711563f..3bd1bfc01f85 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_PORT_UNREACH:
/* Impossible event. */
return 0;
- case ICMP_FRAG_NEEDED:
- /* Soft state for pmtu is maintained by IP core. */
- return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -542,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (code != ICMP_EXC_TTL)
return 0;
break;
+ case ICMP_REDIRECT:
+ break;
}
err = -ENOENT;
@@ -551,7 +550,23 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
skb->dev,
iph->daddr,
iph->saddr);
- if (t == NULL || t->parms.iph.daddr == 0)
+ if (t == NULL)
+ goto out;
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ IPPROTO_IPV6, 0);
+ err = 0;
+ goto out;
+ }
+
+ if (t->parms.iph.daddr == 0)
goto out;
err = 0;
@@ -792,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8e951d8d3b81..bb46061c813a 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -21,9 +21,6 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-extern int sysctl_tcp_syncookies;
-extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -180,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9df64a50b075..f49476e2d884 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
- struct inet_peer *peer = rt6_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- }
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -329,6 +315,23 @@ failure:
return err;
}
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+ struct dst_entry *dst;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ if (!dst)
+ return;
+
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_simple_retransmit(sk);
+ }
+}
+
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
@@ -356,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
+ if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
@@ -377,49 +380,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
- if (type == ICMPV6_PKT_TOOBIG) {
- struct dst_entry *dst;
-
- if (sock_owned_by_user(sk))
- goto out;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
- goto out;
-
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- } else
- dst_hold(dst);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
- tcp_sync_mss(sk, dst_mtu(dst));
- tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ if (type == ICMPV6_PKT_TOOBIG) {
+ tp->mtu_info = ntohl(info);
+ if (!sock_owned_by_user(sk))
+ tcp_v6_mtu_reduced(sk);
+ else
+ set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
goto out;
}
@@ -475,62 +448,43 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi6 *fl6,
+ struct request_sock *req,
struct request_values *rvp,
u16 queue_mapping)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
- struct in6_addr * final_p, final;
- struct flowi6 fl6;
- struct dst_entry *dst;
- int err;
+ int err = -ENOMEM;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- fl6.saddr = treq->loc_addr;
- fl6.flowlabel = 0;
- fl6.flowi6_oif = treq->iif;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
+ /* First, grab a route. */
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- }
+
skb = tcp_make_synack(sk, dst, req, rvp);
- err = -ENOMEM;
+
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
- fl6.daddr = treq->rmt_addr;
+ fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return err;
}
static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
+ struct flowi6 fl6;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, req, rvp, 0);
+ return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1057,6 +1011,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
bool want_cookie = false;
if (skb->protocol == htons(ETH_P_IP))
@@ -1085,7 +1040,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
@@ -1150,8 +1105,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->iif = inet6_iif(skb);
if (!isn) {
- struct inet_peer *peer = NULL;
-
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1176,14 +1129,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, req)) != NULL &&
- (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
- ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
- &treq->rmt_addr)) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
+ (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+ if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -1192,8 +1139,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
+ !tcp_peer_is_proven(req, dst, false)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -1215,7 +1161,7 @@ have_isn:
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, req,
+ if (tcp_v6_send_synack(sk, dst, &fl6, req,
(struct request_values *)&tmp_ext,
skb_get_queue_mapping(skb)) ||
want_cookie)
@@ -1242,10 +1188,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -1302,13 +1248,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
treq = inet6_rsk(req);
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, req);
+ dst = inet6_csk_route_req(sk, &fl6, req);
if (!dst)
goto out;
}
@@ -1371,11 +1316,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
@@ -1422,8 +1364,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
@@ -1734,42 +1674,10 @@ do_time_wait:
goto discard_it;
}
-static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
-{
- struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet_peer *peer;
-
- if (!rt ||
- !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
- peer = inet_getpeer_v6(&np->daddr, 1);
- *release_it = true;
- } else {
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
- *release_it = false;
- }
-
- return peer;
-}
-
-static void *tcp_v6_tw_get_peer(struct sock *sk)
-{
- const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
- const struct inet_timewait_sock *tw = inet_twsk(sk);
-
- if (tw->tw_family == AF_INET)
- return tcp_v4_tw_get_peer(sk);
-
- return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
-}
-
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v6_tw_get_peer,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1778,7 +1686,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.rebuild_header = inet6_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
.net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
@@ -1810,7 +1717,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.rebuild_header = inet_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -2049,6 +1955,8 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
+ .release_cb = tcp_release_cb,
+ .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f05099fc5901..99d0077b56b8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -48,6 +48,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/events/skb.h>
#include "udp_impl.h"
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
@@ -385,15 +386,16 @@ try_again:
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied );
+ msg->msg_iov, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
- if (err)
+ if (unlikely(err)) {
+ trace_kfree_skb(skb, udpv6_recvmsg);
goto out_free;
-
+ }
if (!peeked) {
if (is_udp4)
UDP_INC_STATS_USER(sock_net(sk),
@@ -479,6 +481,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk == NULL)
return;
+ if (type == ICMPV6_PKT_TOOBIG)
+ ip6_sk_update_pmtu(skb, sk, info);
+ if (type == NDISC_REDIRECT)
+ ip6_sk_redirect(skb, sk);
+
np = inet6_sk(sk);
if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8625fba96db9..ef39812107b1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -99,12 +99,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
if (!xdst->u.rt6.rt6i_idev)
return -ENODEV;
- xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
- if (rt->rt6i_peer)
- atomic_inc(&rt->rt6i_peer->refcnt);
+ rt6_transfer_peer(&xdst->u.rt6, rt);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
+ xdst->u.rt6.n = neigh_clone(rt->n);
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -208,12 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu);
+}
+
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+ path->ops->redirect(path, sk, skb);
}
static void xfrm6_dst_destroy(struct dst_entry *dst)
@@ -223,8 +232,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
- if (likely(xdst->u.rt6.rt6i_peer))
- inet_putpeer(xdst->u.rt6.rt6i_peer);
+ if (rt6_has_peer(&xdst->u.rt6)) {
+ struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
+ inet_putpeer(peer);
+ }
xfrm_dst_destroy(xdst);
}
@@ -260,6 +271,7 @@ static struct dst_ops xfrm6_dst_ops = {
.protocol = cpu_to_be16(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
+ .redirect = xfrm6_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
OpenPOWER on IntegriCloud