From c457338d7acd3823e765b684a62294cfda9d2f55 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 8 Nov 2011 04:41:42 +0000 Subject: ipv6: drop packets when source address is multicast RFC 4291 Section 2.7 says Multicast addresses must not be used as source addresses in IPv6 packets - drop them on input so we don't process the packet further. Signed-off-by: Brian Haley Reported-and-Tested-by: Kumar Sanghvi Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 027c7ff6f1e5..a46c64eb0a66 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -111,6 +111,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* + * RFC4291 2.7 + * Multicast addresses must not be used as source addresses in IPv6 + * packets or appear in any Routing header. + */ + if (ipv6_addr_is_multicast(&hdr->saddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); -- cgit v1.2.1 From 069294e813ed5f27f82613b027609bcda5f1b914 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 8 Nov 2011 12:12:44 +0000 Subject: ah: Correctly pass error codes in ahash output callback. The AH4/6 ahash output callbacks pass nexthdr to xfrm_output_resume instead of the error code. This appears to be a copy+paste error from the input case, where nexthdr is expected. This causes the driver to continuously add AH headers to the datagram until either an allocation fails and the packet is dropped or the ahash driver hits a synchronous fallback and the resulting monstrosity is transmitted. Correct this issue by simply passing the error code unadulterated. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2195ae651923..ede4d9d6cc2b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -324,8 +324,6 @@ static void ah6_output_done(struct crypto_async_request *base, int err) #endif } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } -- cgit v1.2.1 From b7ea81a58adc123a4e980cb0eff9eb5c144b5dc7 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 8 Nov 2011 12:12:45 +0000 Subject: ah: Read nexthdr value before overwriting it in ahash input callback. The AH4/6 ahash input callbacks read out the nexthdr field from the AH header *after* they overwrite that header. This is obviously not going to end well. Fix it up. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ede4d9d6cc2b..7a33aaa00227 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -464,12 +464,12 @@ static void ah6_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); skb_set_transport_header(skb, -hdr_len); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); -- cgit v1.2.1 From 4b90a603a1b21d63cf743cc833680cb195a729f6 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Thu, 10 Nov 2011 09:01:27 +0000 Subject: ah: Don't return NET_XMIT_DROP on input. When the ahash driver returns -EBUSY, AH4/6 input functions return NET_XMIT_DROP, presumably copied from the output code path. But returning transmit codes on input doesn't make a lot of sense. Since NET_XMIT_DROP is a positive int, this gets interpreted as the next header type (i.e., success). As that can only end badly, remove the check. Signed-off-by: Nick Bowler Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7a33aaa00227..4c0f894d0843 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -581,8 +581,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } -- cgit v1.2.1 From 731abb9cb27aef6013ce60808a04e04a545f3f4e Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 10 Nov 2011 15:10:23 +0000 Subject: ip6_tunnel: copy parms.name after register_netdevice Commit 1c5cae815d removed an explicit call to dev_alloc_name in ip6_tnl_create because register_netdevice will now create a valid name. This works for the net_device itself. However the tunnel keeps a copy of the name in the parms structure for the ip6_tnl associated with the tunnel. parms.name is set by copying the net_device name in ip6_tnl_dev_init_gen. That function is called from ip6_tnl_dev_init in ip6_tnl_create, but it is done before register_netdevice is called so the name is set to a bogus value in the parms.name structure. This shows up if you do a simple tunnel add, followed by a tunnel show: [root@localhost ~]# ip -6 tunnel add remote fec0::100 local fec0::200 [root@localhost ~]# ip -6 tunnel show ip6tnl0: ipv6/ipv6 remote :: local :: encaplimit 0 hoplimit 0 tclass 0x00 flowlabel 0x00000 (flowinfo 0x00000000) ip6tnl%d: ipv6/ipv6 remote fec0::100 local fec0::200 encaplimit 4 hoplimit 64 tclass 0x00 flowlabel 0x00000 (flowinfo 0x00000000) [root@localhost ~]# Fix this by moving the strcpy out of ip6_tnl_dev_init_gen, and calling it after register_netdevice has successfully returned. Cc: stable@vger.kernel.org Signed-off-by: Josh Boyer Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bdc15c9003d7..4e2e9ff67ef2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -289,6 +289,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) if ((err = register_netdevice(dev)) < 0) goto failed_free; + strcpy(t->parms.name, dev->name); + dev_hold(dev); ip6_tnl_link(ip6n, t); return t; @@ -1407,7 +1409,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; - strcpy(t->parms.name, dev->name); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; @@ -1487,6 +1488,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) static int __net_init ip6_tnl_init_net(struct net *net) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; @@ -1507,6 +1509,10 @@ static int __net_init ip6_tnl_init_net(struct net *net) err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: -- cgit v1.2.1 From 4d65a2465f6f2694de67777a8aedb1272f473979 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 23 Nov 2011 03:51:54 -0500 Subject: ipv6: fix a bug in ndisc_send_redirect Release skb when transmit rate limit _not_ allow Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 44e5b7f2a6c1..0cb78d7ddaf5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1571,7 +1571,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, } if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { -- cgit v1.2.1 From c16a98ed91597b40b22b540c6517103497ef8e74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 15:49:31 -0500 Subject: ipv6: tcp: fix panic in SYN processing commit 72a3effaf633bc ([NET]: Size listen hash tables using backlog hint) added a bug allowing inet6_synq_hash() to return an out of bound array index, because of u16 overflow. Bug can happen if system admins set net.core.somaxconn & net.ipv4.tcp_max_syn_backlog sysctls to values greater than 65536 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fee46d5a2f12..1567fb120392 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, * request_sock (formerly open request) hash tables. */ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, - const u32 rnd, const u16 synq_hsize) + const u32 rnd, const u32 synq_hsize) { u32 c; -- cgit v1.2.1 From 46a246c4dff9f248913e791b69f2336cd8d4ec41 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Nov 2011 16:07:00 -0500 Subject: netfilter: Remove NOTRACK/RAW dependency on NETFILTER_ADVANCED. Distributions are using this in their default scripts, so don't hide them behind the advanced setting. Reported-by: Linus Torvalds Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 448464844a25..f792b34cbe9c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -186,7 +186,6 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING -- cgit v1.2.1 From 4d0fe50c75a547088e4304e5eb5f521514dfae46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 17:29:23 -0500 Subject: ipv6: tcp: fix tcp_v6_conn_request() Since linux 2.6.26 (commit c6aefafb7ec6 : Add IPv6 support to TCP SYN cookies), we can drop a SYN packet reusing a TIME_WAIT socket. (As a matter of fact we fail to send the SYNACK answer) As the client resends its SYN packet after a one second timeout, we accept it, because first packet removed the TIME_WAIT socket before being dropped. This probably explains why nobody ever noticed or complained. Reported-by: Jesse Young Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 36131d122a6f..2dea4bb7b54a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1255,6 +1255,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1264,12 +1271,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); -- cgit v1.2.1 From 6b600b26c0215bf9ed04062ecfacf0bc20e2588c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:12:13 +0000 Subject: route: Use the device mtu as the default for blackhole routes As it is, we return null as the default mtu of blackhole routes. This may lead to a propagation of a bogus pmtu if the default_mtu method of a blackhole route is invoked. So return dst->dev->mtu as the default mtu instead. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8473016bba4a..d8fbd18c9467 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -157,7 +157,7 @@ static struct dst_ops ip6_dst_ops_template = { static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) { - return 0; + return dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) -- cgit v1.2.1 From ebb762f27fed083cb993a0816393aba4615f6544 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:12:51 +0000 Subject: net: Rename the dst_opt default_mtu method to mtu We plan to invoke the dst_opt->default_mtu() method unconditioally from dst_mtu(). So rename the method to dst_opt->mtu() to match the name with the new meaning. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d8fbd18c9467..76645d7077ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -77,7 +77,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); -static unsigned int ip6_default_mtu(const struct dst_entry *dst); +static unsigned int ip6_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -144,7 +144,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, - .default_mtu = ip6_default_mtu, + .mtu = ip6_mtu, .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, @@ -155,7 +155,7 @@ static struct dst_ops ip6_dst_ops_template = { .neigh_lookup = ip6_neigh_lookup, }; -static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } @@ -175,7 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, - .default_mtu = ip6_blackhole_default_mtu, + .mtu = ip6_blackhole_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .cow_metrics = ip6_rt_blackhole_cow_metrics, @@ -1041,7 +1041,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } -static unsigned int ip6_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_mtu(const struct dst_entry *dst) { unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; -- cgit v1.2.1 From 618f9bc74a039da76fa027ac2600c5b785b964c5 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Nov 2011 02:13:31 +0000 Subject: net: Move mtu handling down to the protocol depended handlers We move all mtu handling from dst_mtu() down to the protocol layer. So each protocol can implement the mtu handling in a different manner. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 76645d7077ff..3399dd326287 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -157,7 +157,9 @@ static struct dst_ops ip6_dst_ops_template = { static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -1043,8 +1045,13 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); -- cgit v1.2.1 From 2a38e6d5aed24bb7f0211e0819fac8c32c2b5791 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Sun, 27 Nov 2011 21:33:34 +0000 Subject: ipv6: Set mcast_hops to IPV6_DEFAULT_MCASTHOPS when -1 was given. We need to set np->mcast_hops to it's default value at this moment otherwise when we use it and found it's value is -1, the logic to get default hop limit doesn't take multicast into account and will return wrong hop limit(IPV6_DEFAULT_HOPLIMIT) which is for unicast. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c99e3ee9781f..26cb08c84b74 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -503,7 +503,7 @@ done: goto e_inval; if (val > 255 || val < -1) goto e_inval; - np->mcast_hops = val; + np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); retv = 0; break; -- cgit v1.2.1 From 59c2cdae2791c0b2ee13d148edc6b771e7e7953f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 14:12:55 -0500 Subject: Revert "udp: remove redundant variable" This reverts commit 81d54ec8479a2c695760da81f05b5a9fb2dbe40a. If we take the "try_again" goto, due to a checksum error, the 'len' has already been truncated. So we won't compute the same values as the original code did. Reported-by: paul bilke Signed-off-by: David S. Miller --- net/ipv6/udp.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 846f4757eb8d..8c2541915183 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -340,7 +340,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -363,9 +363,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); @@ -376,14 +377,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov,len); + msg->msg_iov, copied ); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -432,7 +433,7 @@ try_again: datagram_recv_ctl(sk, msg, skb); } - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; -- cgit v1.2.1 From 4af04aba93f47699e7ac33e7cfd4da22550e6114 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 6 Dec 2011 21:23:45 +0000 Subject: ipv6: Fix for adding multicast route for loopback device automatically. There is no obvious reason to add a default multicast route for loopback devices, otherwise there would be a route entry whose dst.error set to -ENETUNREACH that would blocking all multicast packets. ==================== [ more detailed explanation ] The problem is that the resulting routing table depends on the sequence of interface's initialization and in some situation, that would block all muticast packets. Suppose there are two interfaces on my computer (lo and eth0), if we initailize 'lo' before 'eth0', the resuting routing table(for multicast) would be # ip -6 route show | grep ff00:: unreachable ff00::/8 dev lo metric 256 error -101 ff00::/8 dev eth0 metric 256 When sending multicasting packets, routing subsystem will return the first route entry which with a error set to -101(ENETUNREACH). I know the kernel will set the default ipv6 address for 'lo' when it is up and won't set the default multicast route for it, but there is no reason to stop 'init' program from setting address for 'lo', and that is exactly what systemd did. I am sure there is something wrong with kernel or systemd, currently I preferred kernel caused this problem. ==================== Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cf88df82e2c2..36806def8cfd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1805,7 +1805,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return ERR_PTR(-EACCES); /* Add default multicast route */ - addrconf_add_mroute(dev); + if (!(dev->flags & IFF_LOOPBACK)) + addrconf_add_mroute(dev); /* Add link local route */ addrconf_add_lroute(dev); -- cgit v1.2.1 From 72b36015ba43a3cca5303f5534d2c3e1899eae29 Mon Sep 17 00:00:00 2001 From: Ted Feng Date: Thu, 8 Dec 2011 00:46:21 +0000 Subject: ipip, sit: copy parms.name after register_netdevice Same fix as 731abb9cb2 for ipip and sit tunnel. Commit 1c5cae815d removed an explicit call to dev_alloc_name in ipip_tunnel_locate and ipip6_tunnel_locate, because register_netdevice will now create a valid name, however the tunnel keeps a copy of the name in the private parms structure. Fix this by copying the name back after register_netdevice has successfully returned. This shows up if you do a simple tunnel add, followed by a tunnel show: $ sudo ip tunnel add mode ipip remote 10.2.20.211 $ ip tunnel tunl0: ip/ip remote any local any ttl inherit nopmtudisc tunl%d: ip/ip remote 10.2.20.211 local any ttl inherit $ sudo ip tunnel add mode sit remote 10.2.20.212 $ ip tunnel sit0: ipv6/ip remote any local any ttl 64 nopmtudisc 6rd-prefix 2002::/16 sit%d: ioctl 89f8 failed: No such device sit%d: ipv6/ip remote 10.2.20.212 local any ttl inherit Cc: stable@vger.kernel.org Signed-off-by: Ted Feng Signed-off-by: David S. Miller --- net/ipv6/sit.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a7a18602a046..96f3623618e3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip6_tunnel_link(sitn, nt); @@ -1144,7 +1146,6 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1207,6 +1208,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; @@ -1231,6 +1233,9 @@ static int __net_init sit_init_net(struct net *net) if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(sitn->fb_tunnel_dev); + + strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: -- cgit v1.2.1 From bb3c36863e8001fc21a88bebfdead4da4c23e848 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Dec 2011 17:35:06 -0500 Subject: ipv6: Check dest prefix length on original route not copied one in rt6_alloc_cow(). After commit 8e2ec639173f325977818c45011ee176ef2b11f6 ("ipv6: don't use inetpeer to store metrics for routes.") the test in rt6_alloc_cow() for setting the ANYCAST flag is now wrong. 'rt' will always now have a plen of 128, because it is set explicitly to 128 by ip6_rt_copy. So to restore the semantics of the test, check the destination prefix length of 'ort'. Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3399dd326287..b582a0a0f1c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -728,7 +728,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, int attempts = !in_softirq(); if (!(rt->rt6i_flags&RTF_GATEWAY)) { - if (rt->rt6i_dst.plen != 128 && + if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; ipv6_addr_copy(&rt->rt6i_gateway, daddr); -- cgit v1.2.1 From e688a604807647c9450f9c12a7cb6d027150a895 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Dec 2011 04:15:53 +0000 Subject: net: introduce DST_NOPEER dst flag Chris Boot reported crashes occurring in ipv6_select_ident(). [ 461.457562] RIP: 0010:[] [] ipv6_select_ident+0x31/0xa7 [ 461.578229] Call Trace: [ 461.580742] [ 461.582870] [] ? udp6_ufo_fragment+0x124/0x1a2 [ 461.589054] [] ? ipv6_gso_segment+0xc0/0x155 [ 461.595140] [] ? skb_gso_segment+0x208/0x28b [ 461.601198] [] ? ipv6_confirm+0x146/0x15e [nf_conntrack_ipv6] [ 461.608786] [] ? nf_iterate+0x41/0x77 [ 461.614227] [] ? dev_hard_start_xmit+0x357/0x543 [ 461.620659] [] ? nf_hook_slow+0x73/0x111 [ 461.626440] [] ? br_parse_ip_options+0x19a/0x19a [bridge] [ 461.633581] [] ? dev_queue_xmit+0x3af/0x459 [ 461.639577] [] ? br_dev_queue_push_xmit+0x72/0x76 [bridge] [ 461.646887] [] ? br_nf_post_routing+0x17d/0x18f [bridge] [ 461.653997] [] ? nf_iterate+0x41/0x77 [ 461.659473] [] ? br_flood+0xfa/0xfa [bridge] [ 461.665485] [] ? nf_hook_slow+0x73/0x111 [ 461.671234] [] ? br_flood+0xfa/0xfa [bridge] [ 461.677299] [] ? nf_bridge_update_protocol+0x20/0x20 [bridge] [ 461.684891] [] ? nf_ct_zone+0xa/0x17 [nf_conntrack] [ 461.691520] [] ? br_flood+0xfa/0xfa [bridge] [ 461.697572] [] ? NF_HOOK.constprop.8+0x3c/0x56 [bridge] [ 461.704616] [] ? nf_bridge_push_encap_header+0x1c/0x26 [bridge] [ 461.712329] [] ? br_nf_forward_finish+0x8a/0x95 [bridge] [ 461.719490] [] ? nf_bridge_pull_encap_header+0x1c/0x27 [bridge] [ 461.727223] [] ? br_nf_forward_ip+0x1c0/0x1d4 [bridge] [ 461.734292] [] ? nf_iterate+0x41/0x77 [ 461.739758] [] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.746203] [] ? nf_hook_slow+0x73/0x111 [ 461.751950] [] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.758378] [] ? NF_HOOK.constprop.4+0x56/0x56 [bridge] This is caused by bridge netfilter special dst_entry (fake_rtable), a special shared entry, where attaching an inetpeer makes no sense. Problem is present since commit 87c48fa3b46 (ipv6: make fragment identifications less predictable) Introduce DST_NOPEER dst flag and make sure ipv6_select_ident() and __ip_select_ident() fallback to the 'no peer attached' handling. Reported-by: Chris Boot Tested-by: Chris Boot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 84d0bd5cac93..ec562713db9b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -603,7 +603,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) static atomic_t ipv6_fragmentation_id; int old, new; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { struct inet_peer *peer; if (!rt->rt6i_peer) -- cgit v1.2.1