From 18943d292facbc70e6a36fc62399ae833f64671b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Nov 2010 11:15:54 +0000 Subject: inet: fix ip_mc_drop_socket() commit 8723e1b4ad9be4444 (inet: RCU changes in inetdev_by_index()) forgot one call site in ip_mc_drop_socket() We should not decrease idev refcount after inetdev_by_index() call, since refcount is not increased anymore. Reported-by: Markus Trippelsdorf Reported-by: Miles Lane Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c8877c6c7216..3c53c2d89e3b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk) in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); - if (in_dev != NULL) { + if (in_dev != NULL) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); - in_dev_put(in_dev); - } /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); call_rcu(&iml->rcu, ip_mc_socklist_reclaim); -- cgit v1.2.1 From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Nov 2010 23:24:26 +0000 Subject: net: avoid limits overflow Robin Holt tried to boot a 16TB machine and found some limits were reached : sysctl_tcp_mem[2], sysctl_udp_mem[2] We can switch infrastructure to use long "instead" of "int", now atomic_long_t primitives are available for free. Signed-off-by: Eric Dumazet Reported-by: Robin Holt Reviewed-by: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/proc.c | 8 ++++---- net/ipv4/sysctl_net_ipv4.c | 5 ++--- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 11 +++++++---- net/ipv4/udp.c | 4 ++-- 5 files changed, 17 insertions(+), 15 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4ae1f203f7cb..1b48eb1ed453 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) local_bh_enable(); socket_seq_show(seq); - seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", + seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, tcp_death_row.tw_count, sockets, - atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d mem %d\n", + atomic_long_read(&tcp_memory_allocated)); + seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), - atomic_read(&udp_memory_allocated)); + atomic_long_read(&udp_memory_allocated)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17c..e91911d7aae2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_doulongvec_minmax }, { .procname = "tcp_wmem", @@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .proc_handler = proc_doulongvec_minmax, }, { .procname = "udp_rmem_min", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1664a0590bb8..245603c4ad48 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -int sysctl_tcp_mem[3] __read_mostly; +long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); -atomic_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3357f69e353d..6d8ab1c4efc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk) int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); - if (sk->sk_sndbuf < 3 * sndmem) - sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); + if (sk->sk_sndbuf < 3 * sndmem) { + sk->sk_sndbuf = 3 * sndmem; + if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) + sk->sk_sndbuf = sysctl_tcp_wmem[2]; + } } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_memory_pressure && - atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { + atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } @@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk) return 0; /* If we are under soft global TCP memory pressure, do not expand. */ - if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) + if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) return 0; /* If we filled the congestion window, do not expand. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28cb2d733a3c..5e0a3a582a59 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -110,7 +110,7 @@ struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); -int sysctl_udp_mem[3] __read_mostly; +long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); int sysctl_udp_rmem_min __read_mostly; @@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min); int sysctl_udp_wmem_min __read_mostly; EXPORT_SYMBOL(sysctl_udp_wmem_min); -atomic_t udp_memory_allocated; +atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 -- cgit v1.2.1 From 7a1abd08d52fdeddb3e9a5a33f2f15cc6a5674d2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Nov 2010 21:35:37 -0800 Subject: tcp: Increase TCP_MAXSEG socket option minimum. As noted by Steve Chen, since commit f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 ("tcp: advertise MSS requested by user") we can end up with a situation where tcp_select_initial_window() does a divide by a zero (or even negative) mss value. The problem is that sometimes we effectively subtract TCPOLEN_TSTAMP_ALIGNED and/or TCPOLEN_MD5SIG_ALIGNED from the mss. Fix this by increasing the minimum from 8 to 64. Reported-by: Steve Chen Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 245603c4ad48..081419969485 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 8 || val > MAX_TCP_WINDOW) { + if (val < 64 || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } -- cgit v1.2.1 From 8f49c2703b33519aaaccc63f571b465b9d2b3a2d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Nov 2010 13:35:00 -0800 Subject: tcp: Don't change unlocked socket state in tcp_v4_err(). Alexey Kuznetsov noticed a regression introduced by commit f1ecd5d9e7366609d640ff4040304ea197fbc618 ("Revert Backoff [v3]: Revert RTO on ICMP destination unreachable") The RTO and timer modification code added to tcp_v4_err() doesn't check sock_owned_by_user(), which if true means we don't have exclusive access to the socket and therefore cannot modify it's critical state. Just skip this new code block if sock_owned_by_user() is true and eliminate the now superfluous sock_owned_by_user() code block contained within. Reported-by: Alexey Kuznetsov Signed-off-by: David S. Miller CC: Damian Lukowski Acked-by: Eric Dumazet --- net/ipv4/tcp_ipv4.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f8527d41682..69ccbc1dde9c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) !icsk->icsk_backoff) break; + if (sock_owned_by_user(sk)) + break; + icsk->icsk_backoff--; inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << icsk->icsk_backoff; @@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (remaining) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); - } else if (sock_owned_by_user(sk)) { - /* RTO revert clocked out retransmission, - * but socket is locked. Will defer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - HZ/20, TCP_RTO_MAX); } else { /* RTO revert clocked out retransmission. * Will retransmit now */ -- cgit v1.2.1