From 7143dfac692cd25d48a24dbe8323bc17af95b4ec Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 28 Dec 2012 16:07:16 +0800 Subject: ah4/esp4: set transport header correctly for IPsec tunnel mode. IPsec tunnel does not set ECN field to CE in inner header when the ECN field in the outer header is CE, and the ECN field in the inner header is ECT(0) or ECT(1). The cause is ipip_hdr() does not return the correct address of inner header since skb->transport-header is not the inner header after esp_input_done2(), or ah_input(). Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 11 +++++++++-- net/ipv4/esp4.c | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a0d8392491c3..a154d0a08c79 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -381,7 +385,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b61e9deb7c7e..fd26ff4f3eac 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err) pskb_trim(skb, skb->len - alen - padlen - 2); __skb_pull(skb, hlen); - skb_set_transport_header(skb, -ihl); + if (x->props.mode == XFRM_MODE_TUNNEL) + skb_reset_transport_header(skb); + else + skb_set_transport_header(skb, -ihl); err = nexthdr[1]; -- cgit v1.2.1 From 38d523e2948162776903349c89d65f7b9370dadb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Jan 2013 20:55:01 +0000 Subject: ipv4: Remove output route check in ipv4_mtu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output route check was introduced with git commit 261663b0 (ipv4: Don't use the cached pmtu informations for input routes) during times when we cached the pmtu informations on the inetpeer. Now the pmtu informations are back in the routes, so this check is obsolete. It also had some unwanted side effects, as reported by Timo Teras and Lukas Tribus. Signed-off-by: Steffen Klassert Acked-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 844a9ef60dbd..6e4a89c5e27e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1120,7 +1120,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu && rt_is_output_route(rt)) + if (mtu) return mtu; mtu = dst->dev->mtu; -- cgit v1.2.1 From fa1e492aa3cbafba9f8fc6d05e5b08a3091daf4a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Jan 2013 20:58:10 +0000 Subject: ipv4: Don't update the pmtu on mtu locked routes Routes with locked mtu should not use learned pmtu informations, so do not update the pmtu on these routes. Reported-by: Julian Anastasov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e4a89c5e27e..259cbeee9a8b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + if (dst->dev->mtu < mtu) return; -- cgit v1.2.1 From b74aa930ef49a3c0d8e4c1987f89decac768fb2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Jan 2013 16:10:37 +0000 Subject: tcp: fix incorrect LOCKDROPPEDICMPS counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 563d34d057 (tcp: dont drop MTU reduction indications) added an error leading to incorrect accounting of LINUX_MIB_LOCKDROPPEDICMPS If socket is owned by the user, we want to increment this SNMP counter, unless the message is a (ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED) one. Reported-by: Maciej Żenczykowski Signed-off-by: Eric Dumazet Cc: Neal Cardwell Signed-off-by: Maciej Żenczykowski Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 54139fa514e6..70b09ef2463b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * We do take care of PMTU discovery (RFC1191) special case : * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk) && - type != ICMP_DEST_UNREACH && - code != ICMP_FRAG_NEEDED) - NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); - + if (sock_owned_by_user(sk)) { + if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); + } if (sk->sk_state == TCP_CLOSE) goto out; -- cgit v1.2.1 From 05ab86c55683410593720003442dde629782aaac Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Jan 2013 13:38:53 +0100 Subject: xfrm4: Invalidate all ipv4 routes on IPsec pmtu events On IPsec pmtu events we can't access the transport headers of the original packet, so we can't find the socket that sent the packet. The only chance to notify the socket about the pmtu change is to force a relookup for all routes. This patch implenents this for the IPsec protocols. Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 7 +++++-- net/ipv4/esp4.c | 7 +++++-- net/ipv4/ipcomp.c | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a154d0a08c79..a69b4e4a02b5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -420,9 +420,12 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fd26ff4f3eac..3b4f0cd2e63e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index d3ab47e19a89..9a46daed2f3c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { + atomic_inc(&flow_cache_genid); + rt_genid_bump(net); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - else + } else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } -- cgit v1.2.1 From 9cb3a50c5f63ed745702972f66eaee8767659acd Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 21 Jan 2013 01:59:11 +0000 Subject: ipv4: Invalidate the socket cached route on pmtu events if possible The route lookup in ipv4_sk_update_pmtu() might return a route different from the route we cached at the socket. This is because standart routes are per cpu, so each cpu has it's own struct rtable. This means that we do not invalidate the socket cached route if the NET_RX_SOFTIRQ is not served by the same cpu that the sending socket uses. As a result, the cached route reused until we disconnect. With this patch we invalidate the socket cached route if possible. If the socket is owened by the user, we can't update the cached route directly. A followup patch will implement socket release callback functions for datagram sockets to handle this case. Reported-by: Yurij M. Plotnikov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 259cbeee9a8b..132737a7c83a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -965,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, } EXPORT_SYMBOL_GPL(ipv4_update_pmtu); -void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; @@ -978,6 +978,46 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) ip_rt_put(rt); } } + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; + struct dst_entry *dst; + + bh_lock_sock(sk); + rt = (struct rtable *) __sk_dst_get(sk); + + if (sock_owned_by_user(sk) || !rt) { + __ipv4_sk_update_pmtu(skb, sk, mtu); + goto out; + } + + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!__sk_dst_check(sk, 0)) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + } + + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + + dst = dst_check(&rt->dst, 0); + if (!dst) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; + + dst = &rt->dst; + } + + __sk_dst_set(sk, dst); + +out: + bh_unlock_sock(sk); +} EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, -- cgit v1.2.1 From 8141ed9fcedb278f4a3a78680591bef1e55f75fb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 21 Jan 2013 02:00:03 +0000 Subject: ipv4: Add a socket release callback for datagram sockets This implements a socket release callback function to check if the socket cached route got invalid during the time we owned the socket. The function is used from udp, raw and ping sockets. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/datagram.c | 25 +++++++++++++++++++++++++ net/ipv4/ping.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/udp.c | 1 + 4 files changed, 28 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 424fafbc8cb0..b28e863fe0a7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -85,3 +85,28 @@ out: return err; } EXPORT_SYMBOL(ip4_datagram_connect); + +void ip4_datagram_release_cb(struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 fl4; + struct rtable *rt; + + if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) + return; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (!IS_ERR(rt)) + __sk_dst_set(sk, &rt->dst); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8f3d05424a3e..6f9c07268cf6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -738,6 +738,7 @@ struct proto ping_prot = { .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = ping_v4_hash, .unhash = ping_v4_unhash, .get_port = ping_v4_get_port, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 73d1e4df4bf6..6f08991409c3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -894,6 +894,7 @@ struct proto raw_prot = { .recvmsg = raw_recvmsg, .bind = raw_bind, .backlog_rcv = raw_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 79c8dbe59b54..1f4d405eafba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1952,6 +1952,7 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, + .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, -- cgit v1.2.1 From b44108dbdbaa07c609bb5755e8dd6c2035236251 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 22 Jan 2013 00:01:28 +0000 Subject: ipv4: Fix route refcount on pmtu discovery git commit 9cb3a50c (ipv4: Invalidate the socket cached route on pmtu events if possible) introduced a refcount problem. We don't get a refcount on the route if we get it from__sk_dst_get(), but we need one if we want to reuse this route because __sk_dst_set() releases the refcount of the old route. This patch adds proper refcount handling for that case. We introduce a 'new' flag to indicate that we are going to use a new route and we release the old route only if we replace it by a new one. Reported-by: Julian Anastasov Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 132737a7c83a..a0fcc47fee73 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -985,6 +985,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; struct dst_entry *dst; + bool new = false; bh_lock_sock(sk); rt = (struct rtable *) __sk_dst_get(sk); @@ -1000,20 +1001,26 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; + + new = true; } __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); dst = dst_check(&rt->dst, 0); if (!dst) { + if (new) + dst_release(&rt->dst); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; - dst = &rt->dst; + new = true; } - __sk_dst_set(sk, dst); + if (new) + __sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); -- cgit v1.2.1 From 5465740ace36f179de5bb0ccb5d46ddeb945e309 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 23 Jan 2013 11:45:42 +0000 Subject: IP_GRE: Fix kernel panic in IP_GRE with GRE csum. Due to IP_GRE GSO support, GRE can recieve non linear skb which results in panic in case of GRE_CSUM. Following patch fixes it by using correct csum API. Bug introduced in commit 6b78f16e4bdde3936b (gre: add GSO support) Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 303012adf9e6..e81b1caf2ea2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -963,8 +963,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ptr--; } if (tunnel->parms.o_flags&GRE_CSUM) { + int offset = skb_transport_offset(skb); + *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, + skb->len - offset, + 0)); } } -- cgit v1.2.1