From 6b58e0a5f32dedb609438bb9c9c82aa6e23381f2 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Fri, 6 Mar 2015 11:18:23 +0800 Subject: ipv4: Use binary search to choose tcp PMTU probe_size Current probe_size is chosen by doubling mss_cache, the probing process will end shortly with a sub-optimal mss size, and the link mtu will not be taken full advantage of, in return, this will make user to tweak tcp_base_mss with care. Use binary search to choose probe_size in a fine granularity manner, an optimal mss will be found to boost performance as its maxmium. In addition, introduce a sysctl_tcp_probe_threshold to control when probing will stop in respect to the width of search range. Test env: Docker instance with vxlan encapuslation(82599EB) iperf -c 10.0.0.24 -t 60 before this patch: 1.26 Gbits/sec After this patch: increase 26% 1.59 Gbits/sec Signed-off-by: Fan Du Acked-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a2dfed4783b..35790d977a2b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2460,6 +2460,7 @@ static int __net_init tcp_sk_init(struct net *net) } net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; return 0; fail: -- cgit v1.2.3 From 05cbc0db03e82128f2e7e353d4194dd24a1627fe Mon Sep 17 00:00:00 2001 From: Fan Du Date: Fri, 6 Mar 2015 11:18:24 +0800 Subject: ipv4: Create probe timer for tcp PMTU as per RFC4821 As per RFC4821 7.3. Selecting Probe Size, a probe timer should be armed once probing has converged. Once this timer expired, probing again to take advantage of any path PMTU change. The recommended probing interval is 10 minutes per RFC1981. Probing interval could be sysctled by sysctl_tcp_probe_interval. Eric Dumazet suggested to implement pseudo timer based on 32bits jiffies tcp_time_stamp instead of using classic timer for such rare event. Signed-off-by: Fan Du Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +++ net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 38 ++++++++++++++++++++++++++++++++++++-- net/ipv4/tcp_timer.c | 1 + 7 files changed, 51 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5976bdecf58b..b9a6b0a94cc6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -126,6 +126,8 @@ struct inet_connection_sock { /* Information on the current probe. */ int probe_size; + + u32 probe_timestamp; } icsk_mtup; u32 icsk_ca_priv[16]; u32 icsk_user_timeout; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e051d399fa17..8f3a1a1a5a94 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -88,6 +88,7 @@ struct netns_ipv4 { int sysctl_tcp_mtu_probing; int sysctl_tcp_base_mss; int sysctl_tcp_probe_threshold; + u32 sysctl_tcp_probe_interval; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 1ad82e334e27..2e11e38205c2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* The least MTU to use for probing */ #define TCP_BASE_MSS 1024 +/* probing interval, default to 10 minutes as per RFC4821 */ +#define TCP_PROBE_INTERVAL 600 + /* Specify interval when tcp mtu probing will stop */ #define TCP_PROBE_THRESHOLD 8 diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d3c09c12ee81..fdf899163d44 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -890,6 +890,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_probe_interval", + .data = &init_net.ipv4.sysctl_tcp_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 35790d977a2b..f0c6fc32bfa8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2461,6 +2461,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; + net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; return 0; fail: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ed024cbb097f..5a73ad5afaf7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1354,6 +1354,8 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; + if (icsk->icsk_mtup.enabled) + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1828,6 +1830,31 @@ send_now: return false; } +static inline void tcp_mtu_check_reprobe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + u32 interval; + s32 delta; + + interval = net->ipv4.sysctl_tcp_probe_interval; + delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + if (unlikely(delta >= interval * HZ)) { + int mss = tcp_current_mss(sk); + + /* Update current search range */ + icsk->icsk_mtup.probe_size = 0; + icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + + sizeof(struct tcphdr) + + icsk->icsk_af_ops->net_header_len; + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + + /* Update probe time stamp */ + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + } +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -1870,9 +1897,16 @@ static int tcp_mtu_probe(struct sock *sk) icsk->icsk_mtup.search_low) >> 1); size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; + /* When misfortune happens, we are reprobing actively, + * and then reprobe timer has expired. We stick with current + * probing process by not resetting search range to its orignal. + */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) { - /* TODO: set timer for probe_converge_event */ + interval < net->ipv4.sysctl_tcp_probe_threshold) { + /* Check whether enough time has elaplased for + * another round of probing. + */ + tcp_mtu_check_reprobe(sk); return -1; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0732b787904e..15505936511d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); -- cgit v1.2.3 From d4f06873b636519cedbe8d2eeae77c713c6a121c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:09 -0700 Subject: inet: get_openreq4() & get_openreq6() do not need listener ireq->ir_num contains local port, use it. Also, get_openreq4() dumping listen_sk->refcnt makes litle sense. inet_diag_fill_req() can also use ireq->ir_num Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 3 +-- net/ipv4/tcp_ipv4.c | 8 ++++---- net/ipv6/tcp_ipv6.c | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 29317ff4a007..c55a6fa3162d 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -718,7 +718,6 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_sock *inet = inet_sk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -744,7 +743,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, if (tmo < 0) tmo = 0; - r->id.idiag_sport = inet->inet_sport; + r->id.idiag_sport = htons(ireq->ir_num); r->id.idiag_dport = ireq->ir_rmt_port; memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f0c6fc32bfa8..70b0f701bbdb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2204,7 +2204,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(const struct sock *sk, const struct request_sock *req, +static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -2214,7 +2214,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", i, ireq->ir_loc_addr, - ntohs(inet_sk(sk)->inet_sport), + ireq->ir_num, ireq->ir_rmt_addr, ntohs(ireq->ir_rmt_port), TCP_SYN_RECV, @@ -2225,7 +2225,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ - atomic_read(&sk->sk_refcnt), + 0, req); } @@ -2332,7 +2332,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) get_tcp4_sock(v, seq, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); + get_openreq4(v, seq, st->num, st->uid); break; } out: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5d46832c6f72..1ccfede7d55f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1689,7 +1689,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, kuid_t uid) + struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; @@ -1827,7 +1827,7 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); + get_openreq6(seq, v, st->num, st->uid); break; } out: -- cgit v1.2.3 From 3f66b083a5b7f1a63540c24df3679c24f2e935a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:10 -0700 Subject: inet: introduce ireq_family Before inserting request socks into general hash table, fill their socket family. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/ipv4/inet_diag.c | 2 +- net/ipv4/syncookies.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9d6470c16a27..b3053fdd871e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -82,6 +82,7 @@ struct inet_request_sock { #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state #define ireq_refcnt req.__req_common.skc_refcnt +#define ireq_family req.__req_common.skc_family kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f695874b5ade..8f6f4004daac 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -642,6 +642,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 703a21acf434..5166b0043f95 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -404,6 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET6; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c55a6fa3162d..43789c99031f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -728,7 +728,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, return -EMSGSIZE; r = nlmsg_data(nlh); - r->idiag_family = sk->sk_family; + r->idiag_family = ireq->ireq_family; r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; r->idiag_retrans = req->num_retrans; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 18e5a67fda81..0c432730c7b4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,6 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->listener = NULL; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 70b0f701bbdb..1f514a0c5e60 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1228,6 +1228,7 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, ireq->ir_rmt_addr = ip_hdr(skb)->saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); + ireq->ireq_family = AF_INET; } static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 66bba6a84e47..58875ce8e178 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -197,6 +197,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->listener = NULL; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ccfede7d55f..c5fc6a5e4adc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -749,6 +749,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, atomic_inc(&skb->users); ireq->pktopts = skb; } + ireq->ireq_family = AF_INET6; } static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, -- cgit v1.2.3 From f7e4eb03f9d9e2522bdd5107f37f9cf1af0bf0fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Mar 2015 21:12:13 -0700 Subject: inet: ip early demux should avoid request sockets When a request socket is created, we do not cache ip route dst entry, like for timewait sockets. Let's use sk_fullsock() helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1f514a0c5e60..80067d5858b4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d89f028dc8c4..e4761b22307b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1583,7 +1583,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = sk->sk_rx_dst; if (dst) -- cgit v1.2.3 From d1e559d0b1b0d02f76a6bd5b768a99dc834ae926 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2015 14:05:35 -0700 Subject: inet: add IPv6 support to sk_ehashfn() Intent is to converge IPv4 & IPv6 inet_hash functions to factorize code. IPv4 sockets initialize sk_rcv_saddr and sk_v6_daddr in this patch, thanks to new sk_daddr_set() and sk_rcv_saddr_set() helpers. __inet6_hash can now use sk_ehashfn() instead of a private inet6_sk_ehashfn() and will simply use __inet_hash() in a following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 19 +++++++++++++++++++ net/dccp/ipv4.c | 9 ++++----- net/dccp/ipv6.c | 10 ++-------- net/ipv4/inet_hashtables.c | 11 ++++++++++- net/ipv4/tcp_ipv4.c | 8 ++++---- net/ipv6/inet6_hashtables.c | 22 ++++------------------ net/ipv6/tcp_ipv6.c | 11 ++--------- 7 files changed, 45 insertions(+), 45 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ef993ef571ea..06ad42182ec2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -384,6 +384,25 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, } u32 sk_ehashfn(const struct sock *sk); +u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport); + +static inline void sk_daddr_set(struct sock *sk, __be32 addr) +{ + sk->sk_daddr = addr; /* alias of inet_daddr */ +#if IS_ENABLED(CONFIG_IPV6) + ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr); +#endif +} + +static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) +{ + sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */ +#if IS_ENABLED(CONFIG_IPV6) + ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr); +#endif +} int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bf897829f4f0..f3f8906f482e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -89,10 +89,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->inet_saddr == 0) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; - + sk_rcv_saddr_set(sk, inet->inet_saddr); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -408,8 +407,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; newinet->inet_opt = ireq->opt; ireq->opt = NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d7e7c7b0a3f1..9216d173dd5f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -470,11 +470,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -917,9 +913,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); - + np->saddr = sk->sk_v6_rcv_saddr; return err; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3a86dfd7ae33..ab7f677a97db 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -36,9 +36,18 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr, inet_ehash_secret + net_hash_mix(net)); } - +/* This function handles inet_sock, but also timewait and request sockets + * for IPv4/IPv6. + */ u32 sk_ehashfn(const struct sock *sk) { +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return inet6_ehashfn(sock_net(sk), + &sk->sk_v6_rcv_saddr, sk->sk_num, + &sk->sk_v6_daddr, sk->sk_dport); +#endif return inet_ehashfn(sock_net(sk), sk->sk_rcv_saddr, sk->sk_num, sk->sk_daddr, sk->sk_dport); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 80067d5858b4..ca207df4af1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet->inet_saddr) inet->inet_saddr = fl4->saddr; - inet->inet_rcv_saddr = inet->inet_saddr; + sk_rcv_saddr_set(sk, inet->inet_saddr); if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ @@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; - inet->inet_daddr = daddr; + sk_daddr_set(sk, daddr); inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) @@ -1319,8 +1319,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->ir_rmt_addr; - newinet->inet_rcv_saddr = ireq->ir_loc_addr; + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index df7df99d1d7e..ed5787b20192 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,11 +23,9 @@ #include #include -static u32 inet6_ehashfn(const struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; @@ -44,18 +42,6 @@ static u32 inet6_ehashfn(const struct net *net, inet6_ehash_secret + net_hash_mix(net)); } -static int inet6_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; - const struct in6_addr *faddr = &sk->sk_v6_daddr; - const __u16 lport = inet->inet_num; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet6_ehashfn(net, laddr, lport, faddr, fport); -} - int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; @@ -75,7 +61,7 @@ int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) struct hlist_nulls_head *list; spinlock_t *lock; - sk->sk_hash = hash = inet6_sk_ehashfn(sk); + sk->sk_hash = hash = sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); spin_lock(lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e4761b22307b..5546df074583 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,11 +233,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; - } else { - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &sk->sk_v6_rcv_saddr); } + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -1078,11 +1075,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; -- cgit v1.2.3 From 08d2cc3b26554cae21f279b520ae5c2a3b2be421 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2015 14:05:38 -0700 Subject: inet: request sock should init IPv6/IPv4 addresses In order to be able to use sk_ehashfn() for request socks, we need to initialize their IPv6/IPv4 addresses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 5 +++++ net/dccp/ipv4.c | 4 ++-- net/ipv4/inet_diag.c | 4 ++-- net/ipv4/syncookies.c | 4 ++-- net/ipv4/tcp_ipv4.c | 8 ++++---- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 3fa4f824900a..e7ef86340514 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -91,6 +91,11 @@ static inline struct request_sock *inet_reqsk(struct sock *sk) return (struct request_sock *)sk; } +static inline struct sock *req_to_sk(struct request_sock *req) +{ + return (struct sock *)req; +} + static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f3f8906f482e..e7ad291cd96b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -638,8 +638,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e7ba59038c8d..74c39c9f3e11 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -742,14 +742,14 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { /* Note: entry.sport and entry.userlocks are already set */ - entry_fill_addrs(&entry, (struct sock *)req); + entry_fill_addrs(&entry, req_to_sk(req)); entry.dport = ntohs(ireq->ir_rmt_port); if (!inet_diag_bc_run(bc, &entry)) continue; } - err = inet_req_diag_fill((struct sock *)req, skb, + err = inet_req_diag_fill(req_to_sk(req), skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 34e755403715..ef01d8570358 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -337,8 +337,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ca207df4af1c..ddd0b1f25b96 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1219,14 +1219,14 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) #endif -static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); - ireq->ir_loc_addr = ip_hdr(skb)->daddr; - ireq->ir_rmt_addr = ip_hdr(skb)->saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); ireq->ireq_family = AF_INET; } -- cgit v1.2.3 From 52452c542559ac980b48dbf22a30ee7fa0af507c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Mar 2015 19:04:19 -0700 Subject: inet: drop prev pointer handling in request sock When request sock are put in ehash table, the whole notion of having a previous request to update dl_next is pointless. Also, following patch will get rid of big purge timer, so we want to delete a request sock without holding listener lock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 1 - include/net/inet_connection_sock.h | 11 ++++------- include/net/request_sock.h | 15 +++++++++++---- include/net/tcp.h | 3 +-- net/dccp/dccp.h | 3 +-- net/dccp/ipv4.c | 14 ++++++-------- net/dccp/ipv6.c | 19 ++++++++----------- net/dccp/minisocks.c | 7 +++---- net/ipv4/inet_connection_sock.c | 22 ++++++++++++---------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 17 ++++++++--------- net/ipv4/tcp_minisocks.c | 5 ++--- net/ipv6/inet6_connection_sock.c | 10 ++++------ net/ipv6/tcp_ipv6.c | 12 ++++++------ 14 files changed, 67 insertions(+), 74 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 74af137304be..15bd40878d2a 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -29,7 +29,6 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req); struct request_sock *inet6_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b9a6b0a94cc6..423a46106e57 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -257,7 +257,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, const __be16 rport, const __be32 raddr, const __be32 laddr); @@ -310,17 +309,15 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) } static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) + struct request_sock *req) { - reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req); } static inline void inet_csk_reqsk_queue_drop(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) + struct request_sock *req) { - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); reqsk_free(req); } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index e7ef86340514..65223905d139 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -50,6 +50,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); struct request_sock { struct sock_common __req_common; #define rsk_refcnt __req_common.skc_refcnt +#define rsk_hash __req_common.skc_hash struct request_sock *dl_next; struct sock *rsk_listener; @@ -216,11 +217,16 @@ static inline int reqsk_queue_empty(struct request_sock_queue *queue) } static inline void reqsk_queue_unlink(struct request_sock_queue *queue, - struct request_sock *req, - struct request_sock **prev_req) + struct request_sock *req) { + struct listen_sock *lopt = queue->listen_opt; + struct request_sock **prev; + write_lock(&queue->syn_wait_lock); - *prev_req = req->dl_next; + prev = &lopt->syn_table[req->rsk_hash]; + while (*prev != req) + prev = &(*prev)->dl_next; + *prev = req->dl_next; write_unlock(&queue->syn_wait_lock); } @@ -300,7 +306,6 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; - req->dl_next = lopt->syn_table[hash]; /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. @@ -308,7 +313,9 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, smp_wmb(); atomic_set(&req->rsk_refcnt, 1); + req->rsk_hash = hash; write_lock(&queue->syn_wait_lock); + req->dl_next = lopt->syn_table[hash]; lopt->syn_table[hash] = req; write_unlock(&queue->syn_wait_lock); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b29835b81d8..082fd79132b7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -406,8 +406,7 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, struct request_sock **prev, - bool fastopen); + struct request_sock *req, bool fastopen); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3b1d64d6e093..2396f50c5b04 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -280,8 +280,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); + struct request_sock *req); int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e7ad291cd96b..5bffbbaf1fac 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -288,11 +288,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req , **prev; + struct request_sock *req; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; - req = inet_csk_search_req(sk, &prev, dh->dccph_dport, + req = inet_csk_search_req(sk, dh->dccph_dport, iph->daddr, iph->saddr); if (!req) goto out; @@ -314,7 +314,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); goto out; case DCCP_REQUESTING: @@ -448,13 +448,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) const struct dccp_hdr *dh = dccp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; - struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, - dh->dccph_sport, + struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, iph->saddr, iph->daddr); - if (req != NULL) - return dccp_check_req(sk, skb, req, prev); + if (req) + return dccp_check_req(sk, skb, req); nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, iph->saddr, dh->dccph_sport, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c655de5f67c9..ae2184039fe3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -149,12 +149,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; + struct request_sock *req; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; - req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, + req = inet6_csk_search_req(sk, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) @@ -172,7 +172,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); goto out; case DCCP_REQUESTING: @@ -317,16 +317,13 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); const struct ipv6hdr *iph = ipv6_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet6_csk_search_req(sk, &prev, - dh->dccph_sport, - &iph->saddr, - &iph->daddr, - inet6_iif(skb)); + + req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, + &iph->daddr, inet6_iif(skb)); if (req != NULL) - return dccp_check_req(sk, skb, req, prev); + return dccp_check_req(sk, skb, req); nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, &iph->saddr, dh->dccph_sport, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b50dc436db1f..332f7d6d9942 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -152,8 +152,7 @@ EXPORT_SYMBOL_GPL(dccp_create_openreq_child); * as an request_sock. */ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev) + struct request_sock *req) { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); @@ -200,7 +199,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: @@ -212,7 +211,7 @@ drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0f91858aecf..4f57a017928c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -480,18 +480,17 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, #endif struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, const __be16 rport, const __be32 raddr, const __be32 laddr) { const struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; - for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + for (req = lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; + req != NULL; + req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -499,7 +498,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { WARN_ON(req->sk); - *prevp = prev; break; } } @@ -610,7 +608,10 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, i = lopt->clock_hand; do { - reqp=&lopt->syn_table[i]; + reqp = &lopt->syn_table[i]; + if (!*reqp) + goto next_bucket; + write_lock(&queue->syn_wait_lock); while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { int expire = 0, resend = 0; @@ -635,14 +636,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } /* Drop this request */ - inet_csk_reqsk_queue_unlink(parent, req, reqp); + *reqp = req->dl_next; reqsk_queue_removed(queue, req); reqsk_put(req); continue; } reqp = &req->dl_next; } - + write_unlock(&queue->syn_wait_lock); +next_bucket: i = (i + 1) & (lopt->nr_table_entries - 1); } while (--budget > 0); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1dfbaee3554e..95caea707f54 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5694,7 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (tcp_check_req(sk, skb, req, NULL, true) == NULL) + if (tcp_check_req(sk, skb, req, true) == NULL) goto discard; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ddd0b1f25b96..19c3770f1e97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -458,12 +458,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req, **prev; + struct request_sock *req; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; - req = inet_csk_search_req(sk, &prev, th->dest, + req = inet_csk_search_req(sk, th->dest, iph->daddr, iph->saddr); if (!req) goto out; @@ -484,7 +484,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; @@ -1392,15 +1392,14 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); + struct request_sock *req; struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, - iph->saddr, iph->daddr); + + req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) - return tcp_check_req(sk, skb, req, prev, false); + return tcp_check_req(sk, skb, req, false); nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dd11ac7798c6..848bcab358e4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -572,7 +572,6 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev, bool fastopen) { struct tcp_options_received tmp_opt; @@ -766,7 +765,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_add(sk, req, child); @@ -791,7 +790,7 @@ embryonic_reset: tcp_reset(sk); } if (!fastopen) { - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); } return NULL; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 29b32206e494..b7acb9ebc4f5 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -113,7 +113,6 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, } struct request_sock *inet6_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, @@ -121,13 +120,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, { const struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; - for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, + for (req = lopt->syn_table[inet6_synq_hash(raddr, rport, lopt->hash_rnd, lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + req != NULL; + req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -136,7 +135,6 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { WARN_ON(req->sk != NULL); - *prevp = prev; return req; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 720676d073d9..146f123b52c9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -403,13 +403,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; + struct request_sock *req; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, + req = inet6_csk_search_req(sk, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -424,7 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - inet_csk_reqsk_queue_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); goto out; @@ -980,16 +980,16 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); + struct request_sock *req; struct sock *nsk; /* Find possible connection requests. */ - req = inet6_csk_search_req(sk, &prev, th->source, + req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) - return tcp_check_req(sk, skb, req, prev, false); + return tcp_check_req(sk, skb, req, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, -- cgit v1.2.3 From fa76ce7328b289b6edd476e24eb52fd634261720 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Mar 2015 19:04:20 -0700 Subject: inet: get rid of central tcp/dccp listener timer One of the major issue for TCP is the SYNACK rtx handling, done by inet_csk_reqsk_queue_prune(), fired by the keepalive timer of a TCP_LISTEN socket. This function runs for awful long times, with socket lock held, meaning that other cpus needing this lock have to spin for hundred of ms. SYNACK are sent in huge bursts, likely to cause severe drops anyway. This model was OK 15 years ago when memory was very tight. We now can afford to have a timer per request sock. Timer invocations no longer need to lock the listener, and can be run from all cpus in parallel. With following patch increasing somaxconn width to 32 bits, I tested a listener with more than 4 million active request sockets, and a steady SYNFLOOD of ~200,000 SYN per second. Host was sending ~830,000 SYNACK per second. This is ~100 times more what we could achieve before this patch. Later, we will get rid of the listener hash and use ehash instead. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 2 +- include/net/inet_connection_sock.h | 15 ++-- include/net/request_sock.h | 87 +++++++++++----------- net/core/request_sock.c | 13 ++-- net/core/sock.c | 2 +- net/dccp/ipv4.c | 10 ++- net/dccp/ipv6.c | 12 ++-- net/dccp/timer.c | 24 +------ net/ipv4/inet_connection_sock.c | 139 ++++++++++++++++++------------------ net/ipv4/inet_diag.c | 4 +- net/ipv4/syncookies.c | 1 - net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_ipv4.c | 11 ++- net/ipv4/tcp_minisocks.c | 5 +- net/ipv4/tcp_timer.c | 12 +--- net/ipv6/inet6_connection_sock.c | 19 ++--- net/ipv6/syncookies.c | 1 - net/ipv6/tcp_ipv6.c | 12 ++-- 18 files changed, 173 insertions(+), 198 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 15bd40878d2a..6d539e4e5ba7 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -28,7 +28,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req); -struct request_sock *inet6_csk_search_req(const struct sock *sk, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 423a46106e57..7b5887cd1172 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -256,7 +256,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); -struct request_sock *inet_csk_search_req(const struct sock *sk, +struct request_sock *inet_csk_search_req(struct sock *sk, const __be16 rport, const __be32 raddr, const __be32 laddr); @@ -282,15 +282,13 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, static inline void inet_csk_reqsk_queue_removed(struct sock *sk, struct request_sock *req) { - if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) - inet_csk_delete_keepalive_timer(sk); + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); } static inline void inet_csk_reqsk_queue_added(struct sock *sk, const unsigned long timeout) { - if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) - inet_csk_reset_keepalive_timer(sk, timeout); + reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); } static inline int inet_csk_reqsk_queue_len(const struct sock *sk) @@ -319,14 +317,9 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, { inet_csk_reqsk_queue_unlink(sk, req); inet_csk_reqsk_queue_removed(sk, req); - reqsk_free(req); + reqsk_put(req); } -void inet_csk_reqsk_queue_prune(struct sock *parent, - const unsigned long interval, - const unsigned long timeout, - const unsigned long max_rto); - void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 65223905d139..6a91261d9b7b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -62,7 +62,7 @@ struct request_sock { u32 window_clamp; /* window clamp at creation time */ u32 rcv_wnd; /* rcv_wnd offered first time */ u32 ts_recent; - unsigned long expires; + struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; u32 secid; @@ -110,9 +110,6 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - /* temporary debugging, until req sock are put into ehash table */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1); - if (atomic_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } @@ -124,12 +121,16 @@ extern int sysctl_max_syn_backlog; * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs */ struct listen_sock { - u8 max_qlen_log; + int qlen_inc; /* protected by listener lock */ + int young_inc;/* protected by listener lock */ + + /* following fields can be updated by timer */ + atomic_t qlen_dec; /* qlen = qlen_inc - qlen_dec */ + atomic_t young_dec; + + u8 max_qlen_log ____cacheline_aligned_in_smp; u8 synflood_warned; /* 2 bytes hole, try to use */ - int qlen; - int qlen_young; - int clock_hand; u32 hash_rnd; u32 nr_table_entries; struct request_sock *syn_table[0]; @@ -182,9 +183,7 @@ struct fastopen_queue { struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; - rwlock_t syn_wait_lock; u8 rskq_defer_accept; - /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been * enabled on this listener. Check @@ -192,6 +191,9 @@ struct request_sock_queue { * to determine if TFO is enabled * right at this moment. */ + + /* temporary alignment, our goal is to get rid of this lock */ + rwlock_t syn_wait_lock ____cacheline_aligned_in_smp; }; int reqsk_queue_alloc(struct request_sock_queue *queue, @@ -223,11 +225,15 @@ static inline void reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock **prev; write_lock(&queue->syn_wait_lock); + prev = &lopt->syn_table[req->rsk_hash]; while (*prev != req) prev = &(*prev)->dl_next; *prev = req->dl_next; + write_unlock(&queue->syn_wait_lock); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); } static inline void reqsk_queue_add(struct request_sock_queue *queue, @@ -260,64 +266,53 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue return req; } -static inline int reqsk_queue_removed(struct request_sock_queue *queue, - struct request_sock *req) +static inline void reqsk_queue_removed(struct request_sock_queue *queue, + const struct request_sock *req) { struct listen_sock *lopt = queue->listen_opt; if (req->num_timeout == 0) - --lopt->qlen_young; - - return --lopt->qlen; + atomic_inc(&lopt->young_dec); + atomic_inc(&lopt->qlen_dec); } -static inline int reqsk_queue_added(struct request_sock_queue *queue) +static inline void reqsk_queue_added(struct request_sock_queue *queue) { struct listen_sock *lopt = queue->listen_opt; - const int prev_qlen = lopt->qlen; - lopt->qlen_young++; - lopt->qlen++; - return prev_qlen; + lopt->young_inc++; + lopt->qlen_inc++; } -static inline int reqsk_queue_len(const struct request_sock_queue *queue) +static inline int listen_sock_qlen(const struct listen_sock *lopt) { - return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; + return lopt->qlen_inc - atomic_read(&lopt->qlen_dec); } -static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) +static inline int listen_sock_young(const struct listen_sock *lopt) { - return queue->listen_opt->qlen_young; + return lopt->young_inc - atomic_read(&lopt->young_dec); } -static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) +static inline int reqsk_queue_len(const struct request_sock_queue *queue) { - return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; + const struct listen_sock *lopt = queue->listen_opt; + + return lopt ? listen_sock_qlen(lopt) : 0; } -static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, - u32 hash, struct request_sock *req, - unsigned long timeout) +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { - struct listen_sock *lopt = queue->listen_opt; - - req->expires = jiffies + timeout; - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - - /* before letting lookups find us, make sure all req fields - * are committed to memory and refcnt initialized. - */ - smp_wmb(); - atomic_set(&req->rsk_refcnt, 1); + return listen_sock_young(queue->listen_opt); +} - req->rsk_hash = hash; - write_lock(&queue->syn_wait_lock); - req->dl_next = lopt->syn_table[hash]; - lopt->syn_table[hash] = req; - write_unlock(&queue->syn_wait_lock); +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) +{ + return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log; } +void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned long timeout); + #endif /* _REQUEST_SOCK_H */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index cc39a2aa663a..cdc0ddd9ac9f 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - if (lopt->qlen != 0) { + if (listen_sock_qlen(lopt) != 0) { unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; + write_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; - lopt->qlen--; + atomic_inc(&lopt->qlen_dec); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); reqsk_put(req); } + write_unlock_bh(&queue->syn_wait_lock); } } - WARN_ON(lopt->qlen != 0); + if (WARN_ON(listen_sock_qlen(lopt) != 0)) + pr_err("qlen %u\n", listen_sock_qlen(lopt)); kvfree(lopt); } @@ -187,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * * For more details see CoNext'11 "TCP Fast Open" paper. */ - req->expires = jiffies + 60*HZ; + req->rsk_timer.expires = jiffies + 60*HZ; if (fastopenq->rskq_rst_head == NULL) fastopenq->rskq_rst_head = req; else diff --git a/net/core/sock.c b/net/core/sock.c index d9f9e4825362..744a04ddb61c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2739,7 +2739,7 @@ static int req_prot_init(const struct proto *prot) rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + 0, NULL); if (!rsk_prot->slab) { pr_crit("%s: Can't create request sock SLAB cache!\n", diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 5bffbbaf1fac..25a9615b3b88 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -306,6 +306,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } /* @@ -315,6 +316,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req); + reqsk_put(req); goto out; case DCCP_REQUESTING: @@ -451,9 +453,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) /* Find possible connection requests. */ struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, iph->saddr, iph->daddr); - if (req) - return dccp_check_req(sk, skb, req); - + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ae2184039fe3..69d8f13895ba 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -157,7 +157,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, req = inet6_csk_search_req(sk, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); - if (req == NULL) + if (!req) goto out; /* @@ -169,10 +169,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } inet_csk_reqsk_queue_drop(sk, req); + reqsk_put(req); goto out; case DCCP_REQUESTING: @@ -322,9 +324,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, &iph->daddr, inet6_iif(skb)); - if (req != NULL) - return dccp_check_req(sk, skb, req); - + if (req) { + nsk = dccp_check_req(sk, skb, req); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1cd46a345cb0..3ef7acef3ce8 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -161,33 +161,11 @@ out: sock_put(sk); } -/* - * Timer for listening sockets - */ -static void dccp_response_timer(struct sock *sk) -{ - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, - DCCP_RTO_MAX); -} - static void dccp_keepalive_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - /* Only process if socket is not in use. */ - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - inet_csk_reset_keepalive_timer(sk, HZ / 20); - goto out; - } - - if (sk->sk_state == DCCP_LISTEN) { - dccp_response_timer(sk); - goto out; - } -out: - bh_unlock_sock(sk); + pr_err("dccp should not use a keepalive timer !\n"); sock_put(sk); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4f57a017928c..126a37a156cf 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -476,31 +477,37 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, #if IS_ENABLED(CONFIG_IPV6) #define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define AF_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) true #endif -struct request_sock *inet_csk_search_req(const struct sock *sk, - const __be16 rport, const __be32 raddr, +/* Note: this is temporary : + * req sock will no longer be in listener hash table +*/ +struct request_sock *inet_csk_search_req(struct sock *sk, + const __be16 rport, + const __be32 raddr, const __be32 laddr) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req; + u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (req = lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - req != NULL; - req = req->dl_next) { + write_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && ireq->ir_rmt_addr == raddr && ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk); break; } } + write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); return req; } @@ -556,23 +563,23 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); -void inet_csk_reqsk_queue_prune(struct sock *parent, - const unsigned long interval, - const unsigned long timeout, - const unsigned long max_rto) +static void reqsk_timer_handler(unsigned long data) { - struct inet_connection_sock *icsk = inet_csk(parent); + struct request_sock *req = (struct request_sock *)data; + struct sock *sk_listener = req->rsk_listener; + struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; - int thresh = max_retries; - unsigned long now = jiffies; - struct request_sock **reqp, *req; - int i, budget; + int expire = 0, resend = 0; + int max_retries, thresh; - if (lopt == NULL || lopt->qlen == 0) + if (sk_listener->sk_state != TCP_LISTEN || !lopt) { + reqsk_put(req); return; + } + max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + thresh = max_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means @@ -590,71 +597,63 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, * embrions; and abort old ones without pity, if old * ones are about to clog our table. */ - if (lopt->qlen>>(lopt->max_qlen_log-1)) { - int young = (lopt->qlen_young<<1); + if (listen_sock_qlen(lopt) >> (lopt->max_qlen_log - 1)) { + int young = listen_sock_young(lopt) << 1; while (thresh > 2) { - if (lopt->qlen < young) + if (listen_sock_qlen(lopt) < young) break; thresh--; young <<= 1; } } - if (queue->rskq_defer_accept) max_retries = queue->rskq_defer_accept; + syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept, + &expire, &resend); + req->rsk_ops->syn_ack_timeout(sk_listener, req); + if (!expire && + (!resend || + !inet_rtx_syn_ack(sk_listener, req) || + inet_rsk(req)->acked)) { + unsigned long timeo; + + if (req->num_timeout++ == 0) + atomic_inc(&lopt->young_dec); + timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); + mod_timer_pinned(&req->rsk_timer, jiffies + timeo); + return; + } + inet_csk_reqsk_queue_drop(sk_listener, req); + reqsk_put(req); +} - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); - i = lopt->clock_hand; - - do { - reqp = &lopt->syn_table[i]; - if (!*reqp) - goto next_bucket; - write_lock(&queue->syn_wait_lock); - while ((req = *reqp) != NULL) { - if (time_after_eq(now, req->expires)) { - int expire = 0, resend = 0; - - syn_ack_recalc(req, thresh, max_retries, - queue->rskq_defer_accept, - &expire, &resend); - req->rsk_ops->syn_ack_timeout(parent, req); - if (!expire && - (!resend || - !inet_rtx_syn_ack(parent, req) || - inet_rsk(req)->acked)) { - unsigned long timeo; - - if (req->num_timeout++ == 0) - lopt->qlen_young--; - timeo = min(timeout << req->num_timeout, - max_rto); - req->expires = now + timeo; - reqp = &req->dl_next; - continue; - } +void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned long timeout) +{ + struct listen_sock *lopt = queue->listen_opt; - /* Drop this request */ - *reqp = req->dl_next; - reqsk_queue_removed(queue, req); - reqsk_put(req); - continue; - } - reqp = &req->dl_next; - } - write_unlock(&queue->syn_wait_lock); -next_bucket: - i = (i + 1) & (lopt->nr_table_entries - 1); + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; - } while (--budget > 0); + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 2); + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + req->rsk_hash = hash; - lopt->clock_hand = i; + write_lock(&queue->syn_wait_lock); + req->dl_next = lopt->syn_table[hash]; + lopt->syn_table[hash] = req; + write_unlock(&queue->syn_wait_lock); - if (lopt->qlen) - inet_csk_reset_keepalive_timer(parent, interval); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); +EXPORT_SYMBOL(reqsk_queue_hash_req); /** * inet_csk_clone_lock - clone an inet socket, and lock its clone @@ -790,8 +789,6 @@ void inet_csk_listen_stop(struct sock *sk) struct request_sock *acc_req; struct request_sock *req; - inet_csk_delete_keepalive_timer(sk); - /* make all the listen_opt local to us */ acc_req = reqsk_queue_yank_acceptq(queue); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 74c39c9f3e11..34073bbe2700 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -285,7 +285,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != offsetof(struct sock, sk_cookie)); - tmo = inet_reqsk(sk)->expires - jiffies; + tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; r->idiag_rqueue = 0; r->idiag_wqueue = 0; @@ -719,7 +719,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); lopt = icsk->icsk_accept_queue.listen_opt; - if (!lopt || !lopt->qlen) + if (!lopt || !listen_sock_qlen(lopt)) goto out; if (bc) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef01d8570358..805dc444741d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -361,7 +361,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; } - req->expires = 0UL; req->num_retrans = 0; /* diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 82e375a0cbcf..2eb887ec0ce3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -240,7 +240,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { spin_unlock(&fastopenq->lock); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19c3770f1e97..5554b8f33d41 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -475,6 +475,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } @@ -486,6 +487,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) */ inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + reqsk_put(req); goto out; case TCP_SYN_SENT: @@ -1398,8 +1400,11 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) struct sock *nsk; req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, false); + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -2208,7 +2213,7 @@ static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); - long delta = req->expires - jiffies; + long delta = req->rsk_timer.expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 848bcab358e4..274e96fb369b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -629,8 +629,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, &tcp_rsk(req)->last_oow_ack_time) && !inet_rtx_syn_ack(sk, req)) - req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, - TCP_RTO_MAX) + jiffies; + mod_timer_pending(&req->rsk_timer, jiffies + + min(TCP_TIMEOUT_INIT << req->num_timeout, + TCP_RTO_MAX)); return NULL; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 15505936511d..3daa6b5d766d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -539,16 +539,6 @@ static void tcp_write_timer(unsigned long data) sock_put(sk); } -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) -{ - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); -} - void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); @@ -583,7 +573,7 @@ static void tcp_keepalive_timer (unsigned long data) } if (sk->sk_state == TCP_LISTEN) { - tcp_synack_timer(sk); + pr_err("Hmm... keepalive on a LISTEN ???\n"); goto out; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b7acb9ebc4f5..2f3bbe569e8f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -112,21 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, return c & (synq_hsize - 1); } -struct request_sock *inet6_csk_search_req(const struct sock *sk, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req; + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (req = lopt->syn_table[inet6_synq_hash(raddr, rport, - lopt->hash_rnd, - lopt->nr_table_entries)]; - req != NULL; - req = req->dl_next) { + write_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -134,12 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk != NULL); - return req; + break; } } + write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - return NULL; + return req; } EXPORT_SYMBOL_GPL(inet6_csk_search_req); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index da5823e5e5a7..2819137fc87d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -222,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); - req->expires = 0UL; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 146f123b52c9..6e3f90db038c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -421,11 +421,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + reqsk_put(req); goto out; case TCP_SYN_SENT: @@ -988,9 +990,11 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) - return tcp_check_req(sk, skb, req, false); - + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -1670,7 +1674,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) static void get_openreq6(struct seq_file *seq, struct request_sock *req, int i, kuid_t uid) { - int ttd = req->expires - jiffies; + long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; -- cgit v1.2.3 From b282705336e03fc7b9377a278939594870a40f96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:21 -0700 Subject: net: convert syn_wait_lock to a spinlock This is a low hanging fruit, as we'll get rid of syn_wait_lock eventually. We hold syn_wait_lock for such small sections, that it makes no sense to use a read/write lock. A spin lock is simply faster. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 11 +++-------- net/core/request_sock.c | 14 +++++++------- net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/inet_diag.c | 4 ++-- net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv6/inet6_connection_sock.c | 4 ++-- 6 files changed, 24 insertions(+), 29 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 8603c350fad0..fe41f3ceb008 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -173,11 +173,6 @@ struct fastopen_queue { * %syn_wait_lock is necessary only to avoid proc interface having to grab the main * lock sock while browsing the listening hash (otherwise it's deadlock prone). * - * This lock is acquired in read mode only from listening_get_next() seq_file - * op and it's acquired in write mode _only_ from code that is actively - * changing rskq_accept_head. All readers that are holding the master sock lock - * don't need to grab this lock in read mode too as rskq_accept_head. writes - * are always protected from the main sock lock. */ struct request_sock_queue { struct request_sock *rskq_accept_head; @@ -192,7 +187,7 @@ struct request_sock_queue { */ /* temporary alignment, our goal is to get rid of this lock */ - rwlock_t syn_wait_lock ____cacheline_aligned_in_smp; + spinlock_t syn_wait_lock ____cacheline_aligned_in_smp; }; int reqsk_queue_alloc(struct request_sock_queue *queue, @@ -223,14 +218,14 @@ static inline void reqsk_queue_unlink(struct request_sock_queue *queue, struct listen_sock *lopt = queue->listen_opt; struct request_sock **prev; - write_lock(&queue->syn_wait_lock); + spin_lock(&queue->syn_wait_lock); prev = &lopt->syn_table[req->rsk_hash]; while (*prev != req) prev = &(*prev)->dl_next; *prev = req->dl_next; - write_unlock(&queue->syn_wait_lock); + spin_unlock(&queue->syn_wait_lock); if (del_timer(&req->rsk_timer)) reqsk_put(req); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index cdc0ddd9ac9f..87b22c0bc08c 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -58,14 +58,14 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, return -ENOMEM; get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - rwlock_init(&queue->syn_wait_lock); + spin_lock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; lopt->max_qlen_log = ilog2(nr_table_entries); - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return 0; } @@ -81,10 +81,10 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk( { struct listen_sock *lopt; - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); lopt = queue->listen_opt; queue->listen_opt = NULL; - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); return lopt; } @@ -100,7 +100,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; - write_lock_bh(&queue->syn_wait_lock); + spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; atomic_inc(&lopt->qlen_dec); @@ -108,7 +108,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) reqsk_put(req); reqsk_put(req); } - write_unlock_bh(&queue->syn_wait_lock); + spin_unlock_bh(&queue->syn_wait_lock); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 711ab143d4cb..79c0c9439fdc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -495,7 +495,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk, u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, lopt->nr_table_entries); - write_lock(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -508,7 +508,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk, break; } } - write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); return req; } @@ -650,10 +650,10 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue, setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); req->rsk_hash = hash; - write_lock(&queue->syn_wait_lock); + spin_lock(&queue->syn_wait_lock); req->dl_next = lopt->syn_table[hash]; lopt->syn_table[hash] = req; - write_unlock(&queue->syn_wait_lock); + spin_unlock(&queue->syn_wait_lock); mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f984b2001d0a..76322c9867d5 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -728,7 +728,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); lopt = icsk->icsk_accept_queue.listen_opt; if (!lopt || !listen_sock_qlen(lopt)) @@ -776,7 +776,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5554b8f33d41..8028ad5920a4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1909,13 +1909,13 @@ get_req: } sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_nulls_next(sk); } get_sk: @@ -1927,7 +1927,7 @@ get_sk: goto out; } icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); @@ -1936,7 +1936,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); st->offset = 0; @@ -2155,7 +2155,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_OPENREQ: if (v) { struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 2f3bbe569e8f..6927f3fb5597 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -124,7 +124,7 @@ struct request_sock *inet6_csk_search_req(struct sock *sk, u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, lopt->nr_table_entries); - write_lock(&icsk->icsk_accept_queue.syn_wait_lock); + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -138,7 +138,7 @@ struct request_sock *inet6_csk_search_req(struct sock *sk, break; } } - write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); return req; } -- cgit v1.2.3 From 26e3736090e1037ac929787df21c05497479b77f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:22 -0700 Subject: ipv4: tcp: handle ICMP messages on TCP_NEW_SYN_RECV request sockets tcp_v4_err() can restrict lookups to ehash table, and not to listeners. Note this patch creates the infrastructure, but this means that ICMP messages for request sockets are ignored until complete conversion. New tcp_req_err() helper is exported so that we can use it in IPv6 in following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 69 ++++++++++++++++++++++++++--------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1876262afd59..fe60e00e1919 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -447,6 +447,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); +void tcp_req_err(struct sock *sk, u32 seq); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8028ad5920a4..a57615062b66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) dst->ops->redirect(dst, sk, skb); } + +/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ +void tcp_req_err(struct sock *sk, u32 seq) +{ + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); + + /* ICMPs are not backlogged, hence we cannot get + * an established socket here. + */ + WARN_ON(req->sk); + + if (seq != tcp_rsk(req)->snt_isn) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + } else { + /* + * Still in SYN_RECV, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(req->rsk_listener, req); + NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + } + reqsk_put(req); +} +EXPORT_SYMBOL(tcp_req_err); + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) int err; struct net *net = dev_net(icmp_skb->dev); - sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, - iph->saddr, th->source, inet_iif(icmp_skb)); + sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, + th->dest, iph->saddr, ntohs(th->source), + inet_iif(icmp_skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; @@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -458,38 +489,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (sk->sk_state) { - struct request_sock *req; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet_csk_search_req(sk, th->dest, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - an established socket here. - */ - WARN_ON(req->sk); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - reqsk_put(req); - goto out; - } - - /* - * Still in SYN_RECV, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - reqsk_put(req); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is -- cgit v1.2.3 From c69736696cf3742b37d850289dc0d7ead177bb14 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Mon, 23 Mar 2015 15:00:41 -0700 Subject: inet: fix double request socket freeing Eric Hugne reported following error : I'm hitting this warning on latest net-next when i try to SSH into a machine with eth0 added to a bridge (but i think the problem is older than that) Steps to reproduce: node2 ~ # brctl addif br0 eth0 [ 223.758785] device eth0 entered promiscuous mode node2 ~ # ip link set br0 up [ 244.503614] br0: port 1(eth0) entered forwarding state [ 244.505108] br0: port 1(eth0) entered forwarding state node2 ~ # [ 251.160159] ------------[ cut here ]------------ [ 251.160831] WARNING: CPU: 0 PID: 3 at include/net/request_sock.h:102 tcp_v4_err+0x6b1/0x720() [ 251.162077] Modules linked in: [ 251.162496] CPU: 0 PID: 3 Comm: ksoftirqd/0 Not tainted 4.0.0-rc3+ #18 [ 251.163334] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 251.164078] ffffffff81a8365c ffff880038a6ba18 ffffffff8162ace4 0000000000009898 [ 251.165084] 0000000000000000 ffff880038a6ba58 ffffffff8104da85 ffff88003fa437c0 [ 251.166195] ffff88003fa437c0 ffff88003fa74e00 ffff88003fa43bb8 ffff88003fad99a0 [ 251.167203] Call Trace: [ 251.167533] [] dump_stack+0x45/0x57 [ 251.168206] [] warn_slowpath_common+0x85/0xc0 [ 251.169239] [] warn_slowpath_null+0x15/0x20 [ 251.170271] [] tcp_v4_err+0x6b1/0x720 [ 251.171408] [] ? _raw_read_lock_irq+0x3/0x10 [ 251.172589] [] ? inet_del_offload+0x40/0x40 [ 251.173366] [] icmp_socket_deliver+0x65/0xb0 [ 251.174134] [] icmp_unreach+0xc2/0x280 [ 251.174820] [] icmp_rcv+0x2bd/0x3a0 [ 251.175473] [] ip_local_deliver_finish+0x82/0x1e0 [ 251.176282] [] ip_local_deliver+0x88/0x90 [ 251.177004] [] ip_rcv_finish+0xf0/0x310 [ 251.177693] [] ip_rcv+0x2dc/0x390 [ 251.178336] [] __netif_receive_skb_core+0x713/0xa20 [ 251.179170] [] __netif_receive_skb+0x1a/0x80 [ 251.179922] [] process_backlog+0x94/0x120 [ 251.180639] [] net_rx_action+0x1e2/0x310 [ 251.181356] [] __do_softirq+0xa7/0x290 [ 251.182046] [] run_ksoftirqd+0x19/0x30 [ 251.182726] [] smpboot_thread_fn+0x153/0x1d0 [ 251.183485] [] ? SyS_setgroups+0x130/0x130 [ 251.184228] [] kthread+0xee/0x110 [ 251.184871] [] ? kthread_create_on_node+0x1b0/0x1b0 [ 251.185690] [] ret_from_fork+0x58/0x90 [ 251.186385] [] ? kthread_create_on_node+0x1b0/0x1b0 [ 251.187216] ---[ end trace c947fc7b24e42ea1 ]--- [ 259.542268] br0: port 1(eth0) entered forwarding state Remove the double calls to reqsk_put() [edumazet] : I got confused because reqsk_timer_handler() _has_ to call reqsk_put(req) after calling inet_csk_reqsk_queue_drop(), as the timer handler holds a reference on req. Signed-off-by: Fan Du Signed-off-by: Eric Dumazet Reported-by: Erik Hugne Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6310b8b19598..2b4f21d34df6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -208,6 +208,7 @@ void dccp_req_err(struct sock *sk, u64 seq) if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); } else { /* * Still in RESPOND, just remove it silently. @@ -217,7 +218,6 @@ void dccp_req_err(struct sock *sk, u64 seq) */ inet_csk_reqsk_queue_drop(req->rsk_listener, req); } - reqsk_put(req); } EXPORT_SYMBOL(dccp_req_err); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a57615062b66..4e90217003e8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -324,6 +324,7 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); } else { /* * Still in SYN_RECV, just remove it silently. @@ -331,10 +332,9 @@ void tcp_req_err(struct sock *sk, u32 seq) * created socket, and POSIX does not want network * errors returned from accept(). */ - inet_csk_reqsk_queue_drop(req->rsk_listener, req); NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + inet_csk_reqsk_queue_drop(req->rsk_listener, req); } - reqsk_put(req); } EXPORT_SYMBOL(tcp_req_err); -- cgit v1.2.3 From ff74e23f7edb3759d1290b10f80222e3bbb6304b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2015 15:58:54 -0700 Subject: tcp: md5: input path is run under rcu protected sections It is guaranteed that both tcp_v4_rcv() and tcp_v6_rcv() run from rcu read locked sections : ip_local_deliver_finish() and ip6_input_finish() both use rcu_read_lock() Also align tcp_v6_inbound_md5_hash() on tcp_v4_inbound_md5_hash() by returning a boolean. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 17 +++-------------- net/ipv6/tcp_ipv6.c | 25 ++++++------------------- 2 files changed, 9 insertions(+), 33 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4e90217003e8..d339a0488f51 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1153,8 +1153,9 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static bool __tcp_v4_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +/* Called with rcu_read_lock() */ +static bool tcp_v4_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1206,18 +1207,6 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk, } return false; } - -static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - bool ret; - - rcu_read_lock(); - ret = __tcp_v4_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; -} - #endif static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4a4e6d30c448..078e7d0f4cd8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -633,8 +633,7 @@ clear_hash_noput: return 1; } -static int __tcp_v6_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -648,16 +647,16 @@ static int __tcp_v6_inbound_md5_hash(struct sock *sk, /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) - return 0; + return false; if (hash_expected && !hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return 1; + return true; } if (!hash_expected && hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return 1; + return true; } /* check the signature */ @@ -670,22 +669,10 @@ static int __tcp_v6_inbound_md5_hash(struct sock *sk, genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); - return 1; + return true; } - return 0; -} - -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - int ret; - - rcu_read_lock(); - ret = __tcp_v6_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; + return false; } - #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, -- cgit v1.2.3 From 39f8e58e53be32ab758d30536e0bd2e6ce766462 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2015 15:58:55 -0700 Subject: tcp: md5: remove request sock argument of calc_md5_hash() Since request and established sockets now have same base, there is no need to pass two pointers to tcp_v4_md5_hash_skb() or tcp_v6_md5_hash_skb() Also add a const qualifier to their struct tcp_md5sig_key argument. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 29 +++++++++++++---------------- net/ipv4/tcp_ipv4.c | 17 +++++++---------- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/tcp_ipv6.c | 15 ++++++--------- 4 files changed, 28 insertions(+), 37 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index fe60e00e1919..992be858c370 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1296,9 +1296,8 @@ struct tcp_md5sig_pool { }; /* - functions */ -int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - const struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb); +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, const u8 *newkey, u8 newkeylen, gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, @@ -1616,14 +1615,13 @@ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct sock *addr_sk); - int (*calc_md5_hash) (char *location, - struct tcp_md5sig_key *md5, - const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); - int (*md5_parse) (struct sock *sk, - char __user *optval, - int optlen); + int (*calc_md5_hash)(char *location, + const struct tcp_md5sig_key *md5, + const struct sock *sk, + const struct sk_buff *skb); + int (*md5_parse)(struct sock *sk, + char __user *optval, + int optlen); #endif }; @@ -1632,11 +1630,10 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); - int (*calc_md5_hash) (char *location, - struct tcp_md5sig_key *md5, - const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); + int (*calc_md5_hash) (char *location, + const struct tcp_md5sig_key *md5, + const struct sock *sk, + const struct sk_buff *skb); #endif void (*init_req)(struct request_sock *req, struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d339a0488f51..79d5c641688c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -648,7 +648,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -1102,8 +1102,8 @@ clear_hash_noput: return 1; } -int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - const struct sock *sk, const struct request_sock *req, +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; @@ -1111,12 +1111,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; - if (sk) { - saddr = inet_sk(sk)->inet_saddr; - daddr = inet_sk(sk)->inet_daddr; - } else if (req) { - saddr = inet_rsk(req)->ir_loc_addr; - daddr = inet_rsk(req)->ir_rmt_addr; + if (sk) { /* valid for establish/request sockets */ + saddr = sk->sk_rcv_saddr; + daddr = sk->sk_daddr; } else { const struct iphdr *iph = ip_hdr(skb); saddr = iph->saddr; @@ -1195,7 +1192,7 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, */ genhash = tcp_v4_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5b7fad4b314c..501cf9d401c3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -986,7 +986,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (md5) { sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, NULL, skb); + md5, sk, skb); } #endif @@ -2973,7 +2973,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* Okay, we have all we need - do the md5 hash if needed */ if (md5) tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, NULL, req, skb); + md5, req_to_sk(req), skb); rcu_read_unlock(); #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 078e7d0f4cd8..57d1c41404ec 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -582,9 +582,9 @@ clear_hash_noput: return 1; } -static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_skb(char *md5_hash, + const struct tcp_md5sig_key *key, const struct sock *sk, - const struct request_sock *req, const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; @@ -592,12 +592,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct hash_desc *desc; const struct tcphdr *th = tcp_hdr(skb); - if (sk) { - saddr = &inet6_sk(sk)->saddr; + if (sk) { /* valid for establish/request sockets */ + saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; - } else if (req) { - saddr = &inet_rsk(req)->ir_v6_loc_addr; - daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -662,7 +659,7 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) /* check the signature */ genhash = tcp_v6_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", @@ -880,7 +877,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { -- cgit v1.2.3 From fd3a154a00fb991872680f19021f5edbb40b4dbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2015 15:58:56 -0700 Subject: tcp: md5: get rid of tcp_v[46]_reqsk_md5_lookup() With request socks convergence, we no longer need different lookup methods. A request socket can use generic lookup function. Add const qualifier to 2nd tcp_v[46]_md5_lookup() parameter. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++---- net/ipv4/tcp_ipv4.c | 19 +++++-------------- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 10 ++-------- 4 files changed, 12 insertions(+), 27 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 992be858c370..42690daa924e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1303,7 +1303,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk); + const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, @@ -1614,7 +1614,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, - struct sock *addr_sk); + const struct sock *addr_sk); int (*calc_md5_hash)(char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, @@ -1628,8 +1628,8 @@ struct tcp_sock_af_ops { struct tcp_request_sock_ops { u16 mss_clamp; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, - struct request_sock *req); + struct tcp_md5sig_key *(*req_md5_lookup)(struct sock *sk, + const struct sock *addr_sk); int (*calc_md5_hash) (char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 79d5c641688c..fc8995a702a6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -898,10 +898,10 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; unsigned int size = sizeof(struct in_addr); - struct tcp_md5sig_info *md5sig; + const struct tcp_md5sig_info *md5sig; /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, @@ -924,24 +924,15 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + addr = (union tcp_md5_addr *)&sk->sk_daddr; return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); -static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; - return tcp_md5_do_lookup(sk, addr, AF_INET); -} - /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) @@ -1247,7 +1238,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v4_reqsk_md5_lookup, + .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif .init_req = tcp_v4_init_req, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 501cf9d401c3..2e69b8d16e68 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2938,7 +2938,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); - md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); + md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 57d1c41404ec..a9568caf4675 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -486,17 +486,11 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } -static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); -} - static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, int optlen) { @@ -720,7 +714,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v6_reqsk_md5_lookup, + .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif .init_req = tcp_v6_init_req, -- cgit v1.2.3 From 0144a81cccf7532bead90f0542f517bd028d3b3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2015 21:45:56 -0700 Subject: tcp: fix ipv4 mapped request socks ss should display ipv4 mapped request sockets like this : tcp SYN-RECV 0 0 ::ffff:192.168.0.1:8080 ::ffff:192.0.2.1:35261 and not like this : tcp SYN-RECV 0 0 192.168.0.1:8080 192.0.2.1:35261 We should init ireq->ireq_family based on listener sk_family, not the actual protocol carried by SYN packet. This means we can set ireq_family in inet_reqsk_alloc() Fixes: 3f66b083a5b7 ("inet: introduce ireq_family") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 1 - net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 - net/ipv6/syncookies.c | 1 - net/ipv6/tcp_ipv6.c | 1 - 5 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 805dc444741d..df849e5a10f1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->tfo_listener = false; - ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 95caea707f54..023196f7ec37 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5980,7 +5980,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - + ireq->ireq_family = sk_listener->sk_family; } return req; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc8995a702a6..e073517b2cc7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1206,7 +1206,6 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); - ireq->ireq_family = AF_INET; } static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2819137fc87d..21bc2eb53c57 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -197,7 +197,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); treq->tfo_listener = false; - ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a9568caf4675..cbdf6912d2c6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -688,7 +688,6 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, atomic_inc(&skb->users); ireq->pktopts = skb; } - ireq->ireq_family = AF_INET6; } static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, -- cgit v1.2.3 From 41d25fe0927aabb1d4b671871a99a55bcd203257 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Mar 2015 15:08:47 -0700 Subject: tcp: tcp_syn_flood_action() can be static After commit 1fb6f159fd21 ("tcp: add tcp_conn_request"), tcp_syn_flood_action() is no longer used from IPv6. We can make it static, by moving it above tcp_conn_request() Signed-off-by: Eric Dumazet Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 29 +++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 29 ----------------------------- 3 files changed, 29 insertions(+), 31 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 42690daa924e..963303fb96ae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -529,8 +529,6 @@ int tcp_write_wakeup(struct sock *); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); -bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb, - const char *proto); void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 023196f7ec37..18b80e8bc533 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5987,6 +5987,35 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, } EXPORT_SYMBOL(inet_reqsk_alloc); +/* + * Return true if a syncookie should be sent + */ +static bool tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) +{ + const char *msg = "Dropping request"; + bool want_cookie = false; + struct listen_sock *lopt; + +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + msg = "Sending cookies"; + want_cookie = true; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else +#endif + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e073517b2cc7..5aababa20a21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -856,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -/* - * Return true if a syncookie should be sent - */ -bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto) -{ - const char *msg = "Dropping request"; - bool want_cookie = false; - struct listen_sock *lopt; - -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - msg = "Sending cookies"; - want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); - } else -#endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); - } - return want_cookie; -} -EXPORT_SYMBOL(tcp_syn_flood_action); #ifdef CONFIG_TCP_MD5SIG /* -- cgit v1.2.3 From 51456b2914a34d16b1255b7c55d5cbf6a681d306 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Fri, 3 Apr 2015 09:17:26 +0100 Subject: ipv4: coding style: comparison for equality with NULL The ipv4 code uses a mixture of coding styles. In some instances check for NULL pointer is done as x == NULL and sometimes as !x. !x is preferred according to checkpatch and this patch makes the code consistent by adopting the latter form. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 6 +++--- net/ipv4/arp.c | 26 ++++++++++++------------- net/ipv4/cipso_ipv4.c | 36 +++++++++++++++++----------------- net/ipv4/devinet.c | 40 +++++++++++++++++++------------------- net/ipv4/esp4.c | 2 +- net/ipv4/fib_frontend.c | 26 ++++++++++++------------- net/ipv4/fib_rules.c | 4 ++-- net/ipv4/fib_semantics.c | 24 +++++++++++------------ net/ipv4/fib_trie.c | 12 ++++++------ net/ipv4/geneve.c | 2 +- net/ipv4/gre_offload.c | 2 +- net/ipv4/icmp.c | 6 +++--- net/ipv4/igmp.c | 12 ++++++------ net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/ip_fragment.c | 11 ++++++----- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_input.c | 5 +++-- net/ipv4/ip_output.c | 23 ++++++++++++---------- net/ipv4/ip_sockglue.c | 4 ++-- net/ipv4/ip_tunnel.c | 8 ++++---- net/ipv4/ipcomp.c | 2 +- net/ipv4/ipconfig.c | 6 ++++-- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 48 +++++++++++++++++++++++----------------------- net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 20 +++++++++---------- net/ipv4/sysctl_net_ipv4.c | 6 +++--- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 6 +++--- net/ipv4/tcp_input.c | 16 ++++++++-------- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 21 ++++++++++---------- net/ipv4/udp.c | 6 +++--- net/ipv4/udp_diag.c | 2 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_policy.c | 2 +- 39 files changed, 210 insertions(+), 202 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 64a9c0fdc4aa..7d3b00c01bc8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog) * shutdown() (rather than close()). */ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && - inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { + !inet_csk(sk)->icsk_accept_queue.fastopenq) { if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) err = fastopen_init_queue(sk, backlog); else if ((sysctl_tcp_fastopen & @@ -314,11 +314,11 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - WARN_ON(answer_prot->slab == NULL); + WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); - if (sk == NULL) + if (!sk) goto out; err = 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5f5c674e130a..ffe84226a2c8 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -228,7 +228,7 @@ static int arp_constructor(struct neighbour *neigh) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev == NULL) { + if (!in_dev) { rcu_read_unlock(); return -EINVAL; } @@ -475,7 +475,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, */ /* - * Create an arp packet. If (dest_hw == NULL), we create a broadcast + * Create an arp packet. If dest_hw is not set, we create a broadcast * message. */ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, @@ -495,7 +495,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, */ skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); - if (skb == NULL) + if (!skb) return NULL; skb_reserve(skb, hlen); @@ -503,9 +503,9 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); - if (src_hw == NULL) + if (!src_hw) src_hw = dev->dev_addr; - if (dest_hw == NULL) + if (!dest_hw) dest_hw = dev->broadcast; /* @@ -614,7 +614,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, skb = arp_create(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw); - if (skb == NULL) + if (!skb) return; arp_xmit(skb); @@ -644,7 +644,7 @@ static int arp_process(struct sk_buff *skb) * is ARP'able. */ - if (in_dev == NULL) + if (!in_dev) goto out; arp = arp_hdr(skb); @@ -808,7 +808,7 @@ static int arp_process(struct sk_buff *skb) is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && inet_addr_type(net, sip) == RTN_UNICAST; - if (n == NULL && + if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); @@ -900,7 +900,7 @@ out_of_mem: static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) { - if (dev == NULL) { + if (!dev) { IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } @@ -926,7 +926,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, return -ENODEV; } if (mask) { - if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL) + if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1)) return -ENOBUFS; return 0; } @@ -947,7 +947,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; - if (dev == NULL) { + if (!dev) { struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) @@ -1067,7 +1067,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, return arp_req_delete_public(net, r, dev); ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; - if (dev == NULL) { + if (!dev) { struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1116,7 +1116,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (r.arp_dev[0]) { err = -ENODEV; dev = __dev_get_by_name(net, r.arp_dev); - if (dev == NULL) + if (!dev) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e361ea6f3fc8..1b28e1183c1b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -255,7 +255,7 @@ static int __init cipso_v4_cache_init(void) cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, sizeof(struct cipso_v4_map_cache_bkt), GFP_KERNEL); - if (cipso_v4_cache == NULL) + if (!cipso_v4_cache) return -ENOMEM; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { @@ -339,7 +339,7 @@ static int cipso_v4_cache_check(const unsigned char *key, secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CIPSOV4; - if (prev_entry == NULL) { + if (!prev_entry) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } @@ -393,10 +393,10 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) + if (!entry) return -ENOMEM; entry->key = kmemdup(cipso_ptr, cipso_ptr_len, GFP_ATOMIC); - if (entry->key == NULL) { + if (!entry->key) { ret_val = -ENOMEM; goto cache_add_failure; } @@ -547,7 +547,7 @@ doi_add_return: */ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) { - if (doi_def == NULL) + if (!doi_def) return; switch (doi_def->type) { @@ -598,7 +598,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { + if (!doi_def) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -ENOENT; goto doi_remove_return; @@ -644,7 +644,7 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) + if (!doi_def) goto doi_getdef_return; if (!atomic_inc_not_zero(&doi_def->refcount)) doi_def = NULL; @@ -664,7 +664,7 @@ doi_getdef_return: */ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) { - if (doi_def == NULL) + if (!doi_def) return; if (!atomic_dec_and_test(&doi_def->refcount)) @@ -1642,7 +1642,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) rcu_read_lock(); doi_def = cipso_v4_doi_search(get_unaligned_be32(&opt[2])); - if (doi_def == NULL) { + if (!doi_def) { err_offset = 2; goto validate_return_locked; } @@ -1736,7 +1736,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * not the loopback device drop the packet. Further, * there is no legitimate reason for setting this from * userspace so reject it if skb is NULL. */ - if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { + if (!skb || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } @@ -1897,7 +1897,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * defined yet but it is not a problem as the only users of these * "lite" PF_INET sockets are functions which do an accept() call * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) + if (!sk) return 0; /* We allocate the maximum CIPSO option size here so we are probably @@ -1905,7 +1905,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { + if (!buf) { ret_val = -ENOMEM; goto socket_setattr_failure; } @@ -1921,7 +1921,7 @@ int cipso_v4_sock_setattr(struct sock *sk, * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); - if (opt == NULL) { + if (!opt) { ret_val = -ENOMEM; goto socket_setattr_failure; } @@ -1981,7 +1981,7 @@ int cipso_v4_req_setattr(struct request_sock *req, * on and after all we are only talking about 40 bytes. */ buf_len = CIPSO_V4_OPT_LEN_MAX; buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { + if (!buf) { ret_val = -ENOMEM; goto req_setattr_failure; } @@ -1997,7 +1997,7 @@ int cipso_v4_req_setattr(struct request_sock *req, * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); - if (opt == NULL) { + if (!opt) { ret_val = -ENOMEM; goto req_setattr_failure; } @@ -2102,7 +2102,7 @@ void cipso_v4_sock_delattr(struct sock *sk) sk_inet = inet_sk(sk); opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (opt == NULL || opt->opt.cipso == 0) + if (!opt || opt->opt.cipso == 0) return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); @@ -2128,7 +2128,7 @@ void cipso_v4_req_delattr(struct request_sock *req) req_inet = inet_rsk(req); opt = req_inet->opt; - if (opt == NULL || opt->opt.cipso == 0) + if (!opt || opt->opt.cipso == 0) return; cipso_v4_delopt(&req_inet->opt); @@ -2157,7 +2157,7 @@ int cipso_v4_getattr(const unsigned char *cipso, doi = get_unaligned_be32(&cipso[2]); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) + if (!doi_def) goto getattr_return; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c6473f365ad1..0ee21689d37e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -585,7 +585,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); in_dev = inetdev_by_index(net, ifm->ifa_index); - if (in_dev == NULL) { + if (!in_dev) { err = -ENODEV; goto errout; } @@ -755,21 +755,21 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); err = -EINVAL; - if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) + if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) goto errout; dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; - if (dev == NULL) + if (!dev) goto errout; in_dev = __in_dev_get_rtnl(dev); err = -ENOBUFS; - if (in_dev == NULL) + if (!in_dev) goto errout; ifa = inet_alloc_ifa(); - if (ifa == NULL) + if (!ifa) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. @@ -780,7 +780,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, neigh_parms_data_state_setall(in_dev->arp_parms); in_dev_hold(in_dev); - if (tb[IFA_ADDRESS] == NULL) + if (!tb[IFA_ADDRESS]) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; INIT_HLIST_NODE(&ifa->hash); @@ -1340,7 +1340,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) if (named++ == 0) goto skip; dot = strchr(old, ':'); - if (dot == NULL) { + if (!dot) { sprintf(old, ":%d", named); dot = old; } @@ -1509,7 +1509,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ifm = nlmsg_data(nlh); @@ -1628,7 +1628,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, net = dev_net(ifa->ifa_dev->dev); skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); - if (skb == NULL) + if (!skb) goto errout; err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); @@ -1665,7 +1665,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return -ENODATA; nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); - if (nla == NULL) + if (!nla) return -EMSGSIZE; for (i = 0; i < IPV4_DEVCONF_MAX; i++) @@ -1754,7 +1754,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ncm = nlmsg_data(nlh); @@ -1796,7 +1796,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, int err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, @@ -1853,10 +1853,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, break; default: dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) + if (!dev) goto errout; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) + if (!in_dev) goto errout; devconf = &in_dev->cnf; break; @@ -1864,7 +1864,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, @@ -2215,7 +2215,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) { struct devinet_sysctl_table *t = cnf->sysctl; - if (t == NULL) + if (!t) return; cnf->sysctl = NULL; @@ -2276,16 +2276,16 @@ static __net_init int devinet_init_net(struct net *net) if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); - if (all == NULL) + if (!all) goto err_alloc_all; dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) + if (!dflt) goto err_alloc_dflt; #ifdef CONFIG_SYSCTL tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); - if (tbl == NULL) + if (!tbl) goto err_alloc_ctl; tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; @@ -2305,7 +2305,7 @@ static __net_init int devinet_init_net(struct net *net) err = -ENOMEM; forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); - if (forw_hdr == NULL) + if (!forw_hdr) goto err_reg_ctl; net->ipv4.forw_hdr = forw_hdr; #endif diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 60173d4d3a0e..421a80b09b62 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -553,7 +553,7 @@ static int esp_init_authenc(struct xfrm_state *x) int err; err = -EINVAL; - if (x->ealg == NULL) + if (!x->ealg) goto error; err = -ENAMETOOLONG; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 718b0a16ea40..2166d2bf1562 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -53,11 +53,11 @@ static int __net_init fib4_rules_init(struct net *net) struct fib_table *local_table, *main_table; main_table = fib_trie_table(RT_TABLE_MAIN, NULL); - if (main_table == NULL) + if (!main_table) return -ENOMEM; local_table = fib_trie_table(RT_TABLE_LOCAL, main_table); - if (local_table == NULL) + if (!local_table) goto fail; hlist_add_head_rcu(&local_table->tb_hlist, @@ -486,7 +486,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) if (strcmp(ifa->ifa_label, devname) == 0) break; - if (ifa == NULL) + if (!ifa) return -ENODEV; cfg->fc_prefsrc = ifa->ifa_local; } @@ -514,7 +514,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, int len = 0; mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); - if (mx == NULL) + if (!mx) return -ENOMEM; if (rt->rt_flags & RTF_MTU) @@ -676,7 +676,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; tb = fib_get_table(net, cfg.fc_table); - if (tb == NULL) { + if (!tb) { err = -ESRCH; goto errout; } @@ -698,7 +698,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; tb = fib_new_table(net, cfg.fc_table); - if (tb == NULL) { + if (!tb) { err = -ENOBUFS; goto errout; } @@ -779,7 +779,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad else tb = fib_new_table(net, RT_TABLE_LOCAL); - if (tb == NULL) + if (!tb) return; cfg.fc_table = tb->tb_id; @@ -806,7 +806,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); - if (prim == NULL) { + if (!prim) { pr_warn("%s: bug: prim == NULL\n", __func__); return; } @@ -860,7 +860,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); - if (prim == NULL) { + if (!prim) { pr_warn("%s: bug: prim == NULL\n", __func__); return; } @@ -1030,7 +1030,7 @@ static void nl_fib_input(struct sk_buff *skb) return; skb = netlink_skb_clone(skb, GFP_KERNEL); - if (skb == NULL) + if (!skb) return; nlh = nlmsg_hdr(skb); @@ -1051,7 +1051,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) }; sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); - if (sk == NULL) + if (!sk) return -EAFNOSUPPORT; net->ipv4.fibnl = sk; return 0; @@ -1089,7 +1089,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); atomic_inc(&net->ipv4.dev_addr_genid); - if (ifa->ifa_dev->ifa_list == NULL) { + if (!ifa->ifa_dev->ifa_list) { /* Last address was deleted from this interface. * Disable IP. */ @@ -1157,7 +1157,7 @@ static int __net_init ip_fib_net_init(struct net *net) size = max_t(size_t, size, L1_CACHE_BYTES); net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); - if (net->ipv4.fib_table_hash == NULL) + if (!net->ipv4.fib_table_hash) return -ENOMEM; err = fib4_rules_init(net); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8162dd8e86d7..56151982f74e 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -153,7 +153,7 @@ static struct fib_table *fib_empty_table(struct net *net) u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_get_table(net, id) == NULL) + if (!fib_get_table(net, id)) return fib_new_table(net, id); return NULL; } @@ -184,7 +184,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_table *table; table = fib_empty_table(net); - if (table == NULL) { + if (!table) { err = -ENOBUFS; goto errout; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index eac5aec7772a..8d695b6659c7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -390,7 +390,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int err = -ENOBUFS; skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); - if (skb == NULL) + if (!skb) goto errout; err = fib_dump_info(skb, info->portid, seq, event, tb_id, @@ -503,7 +503,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (cfg->fc_mp == NULL) + if (!cfg->fc_mp) return 0; rtnh = cfg->fc_mp; @@ -646,7 +646,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, rcu_read_lock(); err = -ENODEV; in_dev = inetdev_by_index(net, nh->nh_oif); - if (in_dev == NULL) + if (!in_dev) goto out; err = -ENETDOWN; if (!(in_dev->dev->flags & IFF_UP)) @@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); - if (fi == NULL) + if (!fi) goto failure; fib_info_cnt++; if (cfg->fc_mx) { @@ -921,7 +921,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) nh->nh_scope = RT_SCOPE_NOWHERE; nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; - if (nh->nh_dev == NULL) + if (!nh->nh_dev) goto failure; } else { change_nexthops(fi) { @@ -995,7 +995,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct rtmsg *rtm; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -1045,12 +1045,12 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); - if (mp == NULL) + if (!mp) goto nla_put_failure; for_nexthops(fi) { rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (rtnh == NULL) + if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; @@ -1093,7 +1093,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) struct hlist_head *head = &fib_info_laddrhash[hash]; struct fib_info *fi; - if (fib_info_laddrhash == NULL || local == 0) + if (!fib_info_laddrhash || local == 0) return 0; hlist_for_each_entry(fi, head, fib_lhash) { @@ -1182,7 +1182,7 @@ void fib_select_default(struct fib_result *res) fib_alias_accessed(fa); - if (fi == NULL) { + if (!fi) { if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, @@ -1195,7 +1195,7 @@ void fib_select_default(struct fib_result *res) order++; } - if (order <= 0 || fi == NULL) { + if (order <= 0 || !fi) { tb->tb_default = -1; goto out; } @@ -1251,7 +1251,7 @@ int fib_sync_up(struct net_device *dev) alive++; continue; } - if (nexthop_nh->nh_dev == NULL || + if (!nexthop_nh->nh_dev || !(nexthop_nh->nh_dev->flags & IFF_UP)) continue; if (nexthop_nh->nh_dev != dev || diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2c7c299ee2b9..9e4a3e3423b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -391,9 +391,9 @@ static void put_child(struct key_vector *tn, unsigned long i, BUG_ON(i >= child_length(tn)); /* update emptyChildren, overflow into fullChildren */ - if (n == NULL && chi != NULL) + if (!n && chi != NULL) empty_child_inc(tn); - if (n != NULL && chi == NULL) + if (n != NULL && !chi) empty_child_dec(tn); /* update fullChildren */ @@ -528,7 +528,7 @@ static struct key_vector *inflate(struct trie *t, unsigned long j, k; /* An empty child */ - if (inode == NULL) + if (!inode) continue; /* A leaf or an internal node with skipped bits */ @@ -1154,7 +1154,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) } err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - if (new_fa == NULL) + if (!new_fa) goto out; fi_drop = fa->fa_info; @@ -1204,7 +1204,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - if (new_fa == NULL) + if (!new_fa) goto out; new_fa->fa_info = fi; @@ -1975,7 +1975,7 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) sz += sizeof(struct trie); tb = kzalloc(sz, GFP_KERNEL); - if (tb == NULL) + if (!tb) return NULL; tb->tb_id = id; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 5a4828ba05ad..a7d8be3dd3de 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -196,7 +196,7 @@ static struct sk_buff **geneve_gro_receive(struct sk_buff **head, rcu_read_lock(); ptype = gro_find_receive_by_type(type); - if (ptype == NULL) { + if (!ptype) { flush = 1; goto out_unlock; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 51973ddc05a6..9358f11aae40 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -149,7 +149,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, rcu_read_lock(); ptype = gro_find_receive_by_type(type); - if (ptype == NULL) + if (!ptype) goto out_unlock; grehlen = GRE_HEADER_SECTION; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5e564014a0b7..f5203fba6236 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -399,7 +399,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) return; sk = icmp_xmit_lock(net); - if (sk == NULL) + if (!sk) return; inet = inet_sk(sk); @@ -609,7 +609,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) skb_in->data, sizeof(_inner_type), &_inner_type); - if (itp == NULL) + if (!itp) goto out; /* @@ -627,7 +627,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) return; sk = icmp_xmit_lock(net); - if (sk == NULL) + if (!sk) goto out_free; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ad09213ac5b2..27d204b834f9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -692,7 +692,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); - if (skb == NULL) { + if (!skb) { ip_rt_put(rt); return -1; } @@ -981,7 +981,7 @@ int igmp_rcv(struct sk_buff *skb) int len = skb->len; bool dropped = true; - if (in_dev == NULL) + if (!in_dev) goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) @@ -1888,7 +1888,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) if (count >= sysctl_igmp_max_memberships) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); - if (iml == NULL) + if (!iml) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); @@ -1909,7 +1909,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; - if (psf == NULL) { + if (!psf) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); @@ -2360,7 +2360,7 @@ void ip_mc_drop_socket(struct sock *sk) struct ip_mc_socklist *iml; struct net *net = sock_net(sk); - if (inet->mc_list == NULL) + if (!inet->mc_list) return; rtnl_lock(); @@ -2587,7 +2587,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) for_each_netdev_rcu(net, state->dev) { struct in_device *idev; idev = __in_dev_get_rcu(state->dev); - if (unlikely(idev == NULL)) + if (unlikely(!idev)) continue; im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index e7920352646a..5e346a082e5f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -385,7 +385,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); - if (q == NULL) + if (!q) return NULL; q->net = nf; @@ -406,7 +406,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, struct inet_frag_queue *q; q = inet_frag_alloc(nf, f, arg); - if (q == NULL) + if (!q) return NULL; return inet_frag_intern(nf, q, f, arg); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 145a50c4d566..5a6cf8667a9d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (pskb_pull(skb, ihl) == NULL) + if (!pskb_pull(skb, ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -537,7 +537,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - WARN_ON(head == NULL); + WARN_ON(!head); WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ @@ -559,7 +559,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) goto out_nomem; clone->next = head->next; head->next = clone; @@ -754,7 +755,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table = ip4_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; @@ -770,7 +771,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) } hdr = register_net_sysctl(net, "net/ipv4", table); - if (hdr == NULL) + if (!hdr) goto err_reg; net->ipv4.frags_hdr = hdr; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1060ca0bc23a..5fd706473c73 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -182,7 +182,7 @@ static int ipgre_err(struct sk_buff *skb, u32 info, t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->daddr, iph->saddr, tpi->key); - if (t == NULL) + if (!t) return PACKET_REJECT; if (t->parms.iph.daddr == 0 || @@ -423,7 +423,7 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->dst.dev; ip_rt_put(rt); - if (__in_dev_get_rtnl(dev) == NULL) + if (!__in_dev_get_rtnl(dev)) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3d4da2c16b6a..00bed6fe3b66 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,7 +314,7 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) { const struct net_protocol *ipprot; int protocol = iph->protocol; @@ -387,7 +387,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8259e777b249..561d67b2ac74 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -182,7 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (skb2 == NULL) { + if (!skb2) { kfree_skb(skb); return -ENOMEM; } @@ -381,7 +381,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); - if (rt == NULL) { + if (!rt) { __be32 daddr; /* Use correct destination address if we have options. */ @@ -790,12 +790,13 @@ static inline int ip_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(queue)) == NULL) { + skb = skb_peek_tail(queue); + if (!skb) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); - if (skb == NULL) + if (!skb) return err; /* reserve space for Hardware header */ @@ -961,10 +962,10 @@ alloc_new_skb: skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, sk->sk_allocation); - if (unlikely(skb == NULL)) + if (unlikely(!skb)) err = -ENOBUFS; } - if (skb == NULL) + if (!skb) goto error; /* @@ -1088,10 +1089,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ opt = ipc->opt; if (opt) { - if (cork->opt == NULL) { + if (!cork->opt) { cork->opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); - if (unlikely(cork->opt == NULL)) + if (unlikely(!cork->opt)) return -ENOBUFS; } memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); @@ -1198,7 +1199,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EMSGSIZE; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + skb = skb_peek_tail(&sk->sk_write_queue); + if (!skb) return -EINVAL; cork->length += size; @@ -1329,7 +1331,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, __be16 df = 0; __u8 ttl; - if ((skb = __skb_dequeue(queue)) == NULL) + skb = __skb_dequeue(queue); + if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f6a0d54b308a..f64b1b24c64f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -351,7 +351,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (new_ra == NULL) { + if (!new_ra) { spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } @@ -482,7 +482,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) err = -EAGAIN; skb = sock_dequeue_err_skb(sk); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4bb7252110a6..31eaa9ba1803 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -654,7 +654,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (dst == 0) { /* NBMA tunnel */ - if (skb_dst(skb) == NULL) { + if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } @@ -672,7 +672,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) + if (!neigh) goto tx_error; addr6 = (const struct in6_addr *)&neigh->primary_key; @@ -843,7 +843,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) case SIOCGETTUNNEL: if (dev == itn->fb_tunnel_dev) { t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) + if (!t) t = netdev_priv(dev); } memcpy(p, &t->parms, sizeof(*p)); @@ -914,7 +914,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) if (dev == itn->fb_tunnel_dev) { err = -ENOENT; t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(itn->fb_tunnel_dev)) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index c0855d50a3fa..d97f4f2787f5 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -63,7 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) struct xfrm_state *t; t = xfrm_state_alloc(net); - if (t == NULL) + if (!t) goto out; t->id.proto = IPPROTO_IPIP; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b26376ef87f6..8e7328c6a390 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -504,7 +504,8 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (!net_eq(dev_net(dev), &init_net)) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct arphdr))) @@ -958,7 +959,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) return NET_RX_DROP; if (!pskb_may_pull(skb, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 5c81f6e40842..ff96396ebec5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -144,7 +144,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (t == NULL) + if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3ef30cf57f4a..a170e4bc9006 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -189,7 +189,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, } mrt = ipmr_get_table(rule->fr_net, rule->table); - if (mrt == NULL) + if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; @@ -253,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) { + if (!mrt) { err = -ENOMEM; goto err1; } @@ -320,7 +320,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (mrt == NULL) + if (!mrt) return NULL; write_pnet(&mrt->net, net); mrt->id = id; @@ -422,7 +422,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) + if (!in_dev) goto failure; ipv4_devconf_setall(in_dev); @@ -506,7 +506,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -762,7 +762,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); - if (dev && __in_dev_get_rtnl(dev) == NULL) { + if (dev && !__in_dev_get_rtnl(dev)) { dev_put(dev); return -EADDRNOTAVAIL; } @@ -1008,7 +1008,7 @@ static int ipmr_cache_report(struct mr_table *mrt, rcu_read_lock(); mroute_sk = rcu_dereference(mrt->mroute_sk); - if (mroute_sk == NULL) { + if (!mroute_sk) { rcu_read_unlock(); kfree_skb(skb); return -EINVAL; @@ -1161,7 +1161,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return -EINVAL; c = ipmr_cache_alloc(); - if (c == NULL) + if (!c) return -ENOMEM; c->mfc_origin = mfc->mfcc_origin.s_addr; @@ -1283,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -EOPNOTSUPP; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT_INIT) { @@ -1446,7 +1446,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return -EOPNOTSUPP; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT_VERSION && @@ -1492,7 +1492,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1566,7 +1566,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1701,7 +1701,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct flowi4 fl4; int encap = 0; - if (vif->dev == NULL) + if (!vif->dev) goto out_free; #ifdef CONFIG_IP_PIMSM @@ -1992,7 +1992,7 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); - if (cache == NULL) { + if (!cache) { int vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) @@ -2003,13 +2003,13 @@ int ip_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache == NULL) { + if (!cache) { int vif; if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); ip_local_deliver(skb); - if (skb2 == NULL) + if (!skb2) return -ENOBUFS; skb = skb2; } @@ -2068,7 +2068,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; read_unlock(&mrt_lock); - if (reg_dev == NULL) + if (!reg_dev) return 1; skb->mac_header = skb->network_header; @@ -2198,18 +2198,18 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, int err; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return -ENOENT; rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); - if (cache == NULL && skb->dev) { + if (!cache && skb->dev) { int vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, daddr, vif); } - if (cache == NULL) { + if (!cache) { struct sk_buff *skb2; struct iphdr *iph; struct net_device *dev; @@ -2267,7 +2267,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2332,7 +2332,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); @@ -2447,7 +2447,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct mr_table *mrt; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); iter->mrt = mrt; @@ -2566,7 +2566,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) struct mr_table *mrt; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); it->mrt = mrt; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 344e7cdfb8d4..2dcd2e60df64 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -516,7 +516,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) ntohs(icmph->un.echo.sequence)); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); - if (sk == NULL) { + if (!sk) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 56946f47d446..46a78204189d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -363,7 +363,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 652b92ebd7ba..26a1cb348b3d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1056,7 +1056,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; - if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { + if (odst->obsolete && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1450,7 +1450,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* Primary sanity checks. */ - if (in_dev == NULL) + if (!in_dev) return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || @@ -1553,7 +1553,7 @@ static int __mkroute_input(struct sk_buff *skb, /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); - if (out_dev == NULL) { + if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; } @@ -2054,7 +2054,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) ipv4_is_lbcast(fl4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = __ip_dev_find(net, fl4->saddr, false); - if (dev_out == NULL) + if (!dev_out) goto out; /* Special hack: user can direct multicasts @@ -2087,7 +2087,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (fl4->flowi4_oif) { dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); rth = ERR_PTR(-ENODEV); - if (dev_out == NULL) + if (!dev_out) goto out; /* RACE: Check return value of inet_select_addr instead. */ @@ -2299,7 +2299,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 metrics[RTAX_MAX]; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); @@ -2421,7 +2421,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rtm = nlmsg_data(nlh); skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } @@ -2452,7 +2452,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net_device *dev; dev = __dev_get_by_index(net, iif); - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto errout_free; } @@ -2651,7 +2651,7 @@ static __net_init int sysctl_route_net_init(struct net *net) tbl = ipv4_route_flush_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); - if (tbl == NULL) + if (!tbl) goto err_dup; /* Don't export sysctls to unprivileged users */ @@ -2661,7 +2661,7 @@ static __net_init int sysctl_route_net_init(struct net *net) tbl[0].extra1 = net; net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); - if (net->ipv4.route_hdr == NULL) + if (!net->ipv4.route_hdr) goto err_reg; return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fdf899163d44..c3852a7ff3c7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -909,7 +909,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) int i; table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; /* Update the variables to point into the current struct net */ @@ -918,7 +918,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); - if (net->ipv4.ipv4_hdr == NULL) + if (!net->ipv4.ipv4_hdr) goto err_reg; net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); @@ -956,7 +956,7 @@ static __init int sysctl_ipv4_init(void) struct ctl_table_header *hdr; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); - if (hdr == NULL) + if (!hdr) return -ENOMEM; if (register_pernet_subsys(&ipv4_sysctl_ops)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dbd51cefaf02..5bd809bfd0aa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1047,7 +1047,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), sk->sk_allocation); - if (unlikely(tp->fastopen_req == NULL)) + if (unlikely(!tp->fastopen_req)) return -ENOBUFS; tp->fastopen_req->data = msg; tp->fastopen_req->size = size; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 2eb887ec0ce3..5da55e2b5cd2 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -141,7 +141,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + if (!child) return false; spin_lock(&queue->fastopenq->lock); @@ -214,7 +214,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); - WARN_ON(req->sk == NULL); + WARN_ON(!req->sk); return true; } @@ -233,7 +233,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * temporarily vs a server not supporting Fast Open at all. */ fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - if (fastopenq == NULL || fastopenq->max_qlen == 0) + if (!fastopenq || fastopenq->max_qlen == 0) return false; if (fastopenq->qlen >= fastopenq->max_qlen) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 18b80e8bc533..1fd283684303 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -866,7 +866,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if ((tp->retransmit_skb_hint == NULL) || + if (!tp->retransmit_skb_hint || before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) tp->retransmit_skb_hint = skb; @@ -1614,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, struct tcp_sacktag_state *state, u32 skip_to_seq) { - if (next_dup == NULL) + if (!next_dup) return skb; if (before(next_dup->start_seq, skip_to_seq)) { @@ -1783,7 +1783,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (tcp_highest_sack_seq(tp) == cache->end_seq) { /* ...but better entrypoint exists! */ skb = tcp_highest_sack(sk); - if (skb == NULL) + if (!skb) break; state.fack_count = tp->fackets_out; cache++; @@ -1798,7 +1798,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (!before(start_seq, tcp_highest_sack_seq(tp))) { skb = tcp_highest_sack(sk); - if (skb == NULL) + if (!skb) break; state.fack_count = tp->fackets_out; } @@ -3698,7 +3698,7 @@ void tcp_parse_options(const struct sk_buff *skb, */ if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || - foc == NULL || !th->syn || (opsize & 1)) + !foc || !th->syn || (opsize & 1)) break; foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && @@ -4669,7 +4669,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) struct sk_buff *head; u32 start, end; - if (skb == NULL) + if (!skb) return; start = TCP_SKB_CB(skb)->seq; @@ -5124,7 +5124,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (unlikely(sk->sk_rx_dst == NULL)) + if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. @@ -5694,7 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (tcp_check_req(sk, skb, req, true) == NULL) + if (!tcp_check_req(sk, skb, req, true)) goto discard; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69f9cf684744..9ff311cf00f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -122,7 +122,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (twp == NULL || (sysctl_tcp_tw_reuse && + (!twp || (sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -494,7 +494,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) /* Only in fast or simultaneous open. If a fast open socket is * is already accepted it is treated as a connected one below. */ - if (fastopen && fastopen->sk == NULL) + if (fastopen && !fastopen->sk) break; if (!sock_owned_by_user(sk)) { @@ -1390,7 +1390,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || - dst->ops->check(dst, 0) == NULL) { + !dst->ops->check(dst, 0)) { dst_release(dst); sk->sk_rx_dst = NULL; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 71ec14c87579..78ecc4a01712 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -505,7 +505,7 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ - if (dst == NULL) + if (!dst) goto reset; dst_confirm(dst); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 274e96fb369b..f0db1599a09c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -763,7 +763,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + if (!child) goto listen_overflow; inet_csk_reqsk_queue_unlink(sk, req); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2e69b8d16e68..bdc80734cd2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -565,7 +565,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; @@ -1148,7 +1148,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Get a new skb... force flag on. */ buff = sk_stream_alloc_skb(sk, nsize, gfp); - if (buff == NULL) + if (!buff) return -ENOMEM; /* We'll just try again later. */ sk->sk_wmem_queued += buff->truesize; @@ -1707,7 +1707,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return tcp_fragment(sk, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp); - if (unlikely(buff == NULL)) + if (unlikely(!buff)) return -ENOMEM; sk->sk_wmem_queued += buff->truesize; @@ -1925,7 +1925,8 @@ static int tcp_mtu_probe(struct sock *sk) } /* We're allowed to probe. Build it now. */ - if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) + nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC); + if (!nskb) return -1; sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); @@ -2733,7 +2734,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; /* we could do better than to assign each time */ - if (hole == NULL) + if (!hole) tp->retransmit_skb_hint = skb; /* Assume this retransmit will generate @@ -2765,7 +2766,7 @@ begin_fwd: goto begin_fwd; } else if (!(sacked & TCPCB_LOST)) { - if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) + if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) hole = skb; continue; @@ -2868,14 +2869,14 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { pr_debug("%s: wrong queue state\n", __func__); return -EFAULT; } if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - if (nskb == NULL) + if (!nskb) return -ENOMEM; tcp_unlink_write_queue(skb, sk); __skb_header_release(nskb); @@ -3300,7 +3301,7 @@ void tcp_send_ack(struct sock *sk) * sock. */ buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (buff == NULL) { + if (!buff) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3344,7 +3345,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) /* We don't queue it, tcp_transmit_skb() sets ownership. */ skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (skb == NULL) + if (!skb) return -1; /* Reserve space for headers and set control bits. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 294af16633af..9f525a2a68df 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,7 +633,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); - if (sk == NULL) { + if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -1011,7 +1011,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); - if (rt == NULL) { + if (!rt) { struct net *net = sock_net(sk); fl4 = &fl4_stack; @@ -1619,7 +1619,7 @@ static void flush_stack(struct sock **stack, unsigned int count, for (i = 0; i < count; i++) { sk = stack[i]; - if (likely(skb1 == NULL)) + if (likely(!skb1)) skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); if (!skb1) { diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 2dbfc1f1f7b3..b763c39ae1d7 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -58,7 +58,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out_nosk; err = -ENOENT; - if (sk == NULL) + if (!sk) goto out_nosk; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index aac6197b7a71..cac7468db0a1 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -24,7 +24,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) { + if (!skb_dst(skb)) { const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c224c856247b..bff69746e05f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -298,7 +298,7 @@ static void __net_exit xfrm4_net_exit(struct net *net) { struct ctl_table *table; - if (net->ipv4.xfrm4_hdr == NULL) + if (!net->ipv4.xfrm4_hdr) return; table = net->ipv4.xfrm4_hdr->ctl_table_arg; -- cgit v1.2.3 From 00db41243e8d5032c2e0f5bf6063bb19324bfdb3 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Fri, 3 Apr 2015 09:17:27 +0100 Subject: ipv4: coding style: comparison for inequality with NULL The ipv4 code uses a mixture of coding styles. In some instances check for non-NULL pointer is done as x != NULL and sometimes as x. x is preferred according to checkpatch and this patch makes the code consistent by adopting the latter form. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/cipso_ipv4.c | 6 +++--- net/ipv4/devinet.c | 2 +- net/ipv4/fib_trie.c | 4 ++-- net/ipv4/geneve.c | 2 +- net/ipv4/gre_offload.c | 2 +- net/ipv4/igmp.c | 8 ++++---- net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv4/ip_fragment.c | 3 ++- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_options.c | 2 +- net/ipv4/ip_output.c | 6 +++--- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ip_tunnel.c | 2 +- net/ipv4/ip_vti.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 4 ++-- net/ipv4/route.c | 2 +- net/ipv4/tcp.c | 12 ++++++------ net/ipv4/tcp_diag.c | 2 +- net/ipv4/tcp_input.c | 12 ++++++------ net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_minisocks.c | 6 +++--- net/ipv4/tcp_output.c | 14 +++++++------- net/ipv4/udp.c | 4 ++-- net/ipv4/udp_offload.c | 4 ++-- 30 files changed, 64 insertions(+), 63 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d3b00c01bc8..8b47a4d79d04 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1269,7 +1269,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (udpfrag) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); - if (skb->next != NULL) + if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; } else { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ffe84226a2c8..c6e67aa46c32 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -569,7 +569,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, break; #endif default: - if (target_hw != NULL) + if (target_hw) memcpy(arp_ptr, target_hw, dev->addr_len); else memset(arp_ptr, 0, dev->addr_len); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 1b28e1183c1b..bdb2a07ec363 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -502,7 +502,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, atomic_set(&doi_def->refcount, 1); spin_lock(&cipso_v4_doi_list_lock); - if (cipso_v4_doi_search(doi_def->doi) != NULL) { + if (cipso_v4_doi_search(doi_def->doi)) { spin_unlock(&cipso_v4_doi_list_lock); ret_val = -EEXIST; goto doi_add_return; @@ -513,7 +513,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, doi_add_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_ADD, audit_info); - if (audit_buf != NULL) { + if (audit_buf) { const char *type_str; switch (doi_type) { case CIPSO_V4_MAP_TRANS: @@ -617,7 +617,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) doi_remove_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CIPSOV4_DEL, audit_info); - if (audit_buf != NULL) { + if (audit_buf) { audit_log_format(audit_buf, " cipso_doi=%u res=%u", doi, ret_val == 0 ? 1 : 0); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0ee21689d37e..419d23c53ec7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1290,7 +1290,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 addr = 0; struct net_device *dev; - if (in_dev != NULL) + if (in_dev) return confirm_addr_indev(in_dev, dst, local, scope); rcu_read_lock(); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9e4a3e3423b4..e13fcc602da2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -391,9 +391,9 @@ static void put_child(struct key_vector *tn, unsigned long i, BUG_ON(i >= child_length(tn)); /* update emptyChildren, overflow into fullChildren */ - if (!n && chi != NULL) + if (!n && chi) empty_child_inc(tn); - if (n != NULL && !chi) + if (n && !chi) empty_child_dec(tn); /* update fullChildren */ diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index a7d8be3dd3de..e64f8e9785d1 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -230,7 +230,7 @@ static int geneve_gro_complete(struct sk_buff *skb, int nhoff, rcu_read_lock(); ptype = gro_find_complete_by_type(type); - if (ptype != NULL) + if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); rcu_read_unlock(); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 9358f11aae40..5aa46d4b44ef 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -243,7 +243,7 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) rcu_read_lock(); ptype = gro_find_complete_by_type(type); - if (ptype != NULL) + if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); rcu_read_unlock(); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 27d204b834f9..a3a697f5ffba 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2370,7 +2370,7 @@ void ip_mc_drop_socket(struct sock *sk) inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); - if (in_dev != NULL) + if (in_dev) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); @@ -2590,10 +2590,10 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) if (unlikely(!idev)) continue; im = rcu_dereference(idev->mc_list); - if (likely(im != NULL)) { + if (likely(im)) { spin_lock_bh(&im->lock); psf = im->sources; - if (likely(psf != NULL)) { + if (likely(psf)) { state->im = im; state->idev = idev; break; @@ -2663,7 +2663,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); - if (likely(state->im != NULL)) { + if (likely(state->im)) { spin_unlock_bh(&state->im->lock); state->im = NULL; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 79c0c9439fdc..5c3dd6267ed3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -673,7 +673,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, { struct sock *newsk = sk_clone_lock(sk, priority); - if (newsk != NULL) { + if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); newsk->sk_state = TCP_SYN_RECV; @@ -843,7 +843,7 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); reqsk_put(req); } - if (queue->fastopenq != NULL) { + if (queue->fastopenq) { /* Free all the reqs queued in rskq_rst_head. */ spin_lock_bh(&queue->fastopenq->lock); acc_req = queue->fastopenq->rskq_rst_head; @@ -875,7 +875,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_af_ops->compat_getsockopt != NULL) + if (icsk->icsk_af_ops->compat_getsockopt) return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, optval, optlen); return icsk->icsk_af_ops->getsockopt(sk, level, optname, @@ -888,7 +888,7 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_af_ops->compat_setsockopt != NULL) + if (icsk->icsk_af_ops->compat_setsockopt) return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, optval, optlen); return icsk->icsk_af_ops->setsockopt(sk, level, optname, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 0fb841b9d834..d4630bf2d9aa 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -64,7 +64,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); - if (tb != NULL) { + if (tb) { write_pnet(&tb->ib_net, net); tb->port = snum; tb->fastreuse = 0; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f38e387448fb..118f0f195820 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -173,7 +173,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, GFP_ATOMIC); - if (tw != NULL) { + if (tw) { const struct inet_sock *inet = inet_sk(sk); kmemcheck_annotate_bitfield(tw, flags); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5a6cf8667a9d..cc1da6d9cb35 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -639,7 +639,8 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ - if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { + qp = ip_find(net, ip_hdr(skb), user); + if (qp) { int ret; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 00bed6fe3b66..2e0410ed8f16 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -203,7 +203,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) raw = raw_local_deliver(skb, protocol); ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot != NULL) { + if (ipprot) { int ret; if (!ipprot->no_policy) { diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 5b3d91be2db0..bd246792360b 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -264,7 +264,7 @@ int ip_options_compile(struct net *net, unsigned char *iph; int optlen, l; - if (skb != NULL) { + if (skb) { rt = skb_rtable(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 561d67b2ac74..26f6f7956168 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -257,7 +257,7 @@ static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb_dst(skb)->xfrm != NULL) { + if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(skb); } @@ -376,7 +376,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) inet_opt = rcu_dereference(inet->inet_opt); fl4 = &fl->u.ip4; rt = skb_rtable(skb); - if (rt != NULL) + if (rt) goto packet_routed; /* Make sure we can route this packet. */ @@ -587,7 +587,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip_options_fragment(frag); offset += skb->len - hlen; iph->frag_off = htons(offset>>3); - if (frag->next != NULL) + if (frag->next) iph->frag_off |= htons(IP_MF); /* Ready, complete checksum */ ip_send_check(iph); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f64b1b24c64f..7cfb0893f263 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -387,7 +387,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, skb_network_header(skb); serr->port = port; - if (skb_pull(skb, payload - skb->data) != NULL) { + if (skb_pull(skb, payload - skb->data)) { skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) return; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 31eaa9ba1803..6d364ab8e14e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -876,7 +876,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c4f93c0d1104..9f7269f3c54a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -60,7 +60,7 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); - if (tunnel != NULL) { + if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a170e4bc9006..c204b728bbc1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -316,7 +316,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) unsigned int i; mrt = ipmr_get_table(net, id); - if (mrt != NULL) + if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2dcd2e60df64..a93f260cf24c 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -971,7 +971,7 @@ bool ping_rcv(struct sk_buff *skb) skb_push(skb, skb->data - (u8 *)icmph); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); - if (sk != NULL) { + if (sk) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 46a78204189d..6d0fa8fb8af0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -293,7 +293,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - if (raw_sk != NULL) { + if (raw_sk) { iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); @@ -872,7 +872,7 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) + if (skb) amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 26a1cb348b3d..a78540f28276 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1591,7 +1591,7 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe != NULL) + if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5bd809bfd0aa..094a6822c71d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -496,7 +496,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected or passive Fast Open socket? */ if (sk->sk_state != TCP_SYN_SENT && - (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { + (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -1028,7 +1028,7 @@ static inline int select_size(const struct sock *sk, bool sg) void tcp_free_fastopen_req(struct tcp_sock *tp) { - if (tp->fastopen_req != NULL) { + if (tp->fastopen_req) { kfree(tp->fastopen_req); tp->fastopen_req = NULL; } @@ -1042,7 +1042,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) return -EOPNOTSUPP; - if (tp->fastopen_req != NULL) + if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), @@ -2138,7 +2138,7 @@ adjudge_to_death: * aborted (e.g., closed with unread data) before 3WHS * finishes. */ - if (req != NULL) + if (req) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } @@ -2776,7 +2776,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - if (icsk->icsk_accept_queue.fastopenq != NULL) + if (icsk->icsk_accept_queue.fastopenq) val = icsk->icsk_accept_queue.fastopenq->max_qlen; else val = 0; @@ -2960,7 +2960,7 @@ void tcp_done(struct sock *sk) tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); - if (req != NULL) + if (req) reqsk_fastopen_remove(sk, req, false); sk->sk_shutdown = SHUTDOWN_MASK; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 86dc119a3815..79b34a0f4a4a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -29,7 +29,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } - if (info != NULL) + if (info) tcp_get_info(sk, info); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1fd283684303..df7e7fa12733 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1256,7 +1256,7 @@ static u8 tcp_sacktag_one(struct sock *sk, fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && + if (!tcp_is_fack(tp) && tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; @@ -1535,7 +1535,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - if ((next_dup != NULL) && + if (next_dup && before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { in_sack = tcp_match_skb_to_sack(sk, skb, next_dup->start_seq, @@ -1551,7 +1551,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, if (in_sack <= 0) { tmp = tcp_shift_skb_data(sk, skb, state, start_seq, end_seq, dup_sack); - if (tmp != NULL) { + if (tmp) { if (tmp != skb) { skb = tmp; continue; @@ -5321,7 +5321,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); - if (skb != NULL) { + if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } @@ -5690,7 +5690,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } req = tp->fastopen_rsk; - if (req != NULL) { + if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -5780,7 +5780,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * ACK we have received, this would have acknowledged * our SYNACK so stop the SYNACK timer. */ - if (req != NULL) { + if (req) { /* Return RST if ack_seq is invalid. * Note that RFC793 only says to generate a * DUPACK for it but for TCP Fast Open it seems diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9ff311cf00f3..560f9571f7c4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1305,7 +1305,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Copy over the MD5 key from the original socket */ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, AF_INET); - if (key != NULL) { + if (key) { /* * We're using one, so create a matching key * on the newsk structure. If we fail to get @@ -1797,7 +1797,7 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - BUG_ON(tp->fastopen_rsk != NULL); + BUG_ON(tp->fastopen_rsk); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f0db1599a09c..d7003911c894 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -294,7 +294,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); - if (tw != NULL) { + if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); struct inet_sock *inet = inet_sk(sk); @@ -332,7 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct tcp_md5sig_key *key; tcptw->tw_md5_key = NULL; key = tp->af_specific->md5_lookup(sk, sk); - if (key != NULL) { + if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); @@ -454,7 +454,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); - if (newsk != NULL) { + if (newsk) { const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdc80734cd2c..7404e5238e00 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -641,7 +641,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL && foc->len >= 0) { + if (foc && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { @@ -2224,7 +2224,7 @@ void tcp_send_loss_probe(struct sock *sk) int mss = tcp_current_mss(sk); int err = -1; - if (tcp_send_head(sk) != NULL) { + if (tcp_send_head(sk)) { err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); goto rearm_timer; } @@ -2758,7 +2758,7 @@ begin_fwd: if (!tcp_can_forward_retransmit(sk)) break; /* Backtrack if necessary to non-L'ed skb */ - if (hole != NULL) { + if (hole) { skb = hole; hole = NULL; } @@ -2811,7 +2811,7 @@ void tcp_send_fin(struct sock *sk) */ mss_now = tcp_current_mss(sk); - if (tcp_send_head(sk) != NULL) { + if (tcp_send_head(sk)) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; @@ -3015,7 +3015,7 @@ static void tcp_connect_init(struct sock *sk) (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG - if (tp->af_specific->md5_lookup(sk, sk) != NULL) + if (tp->af_specific->md5_lookup(sk, sk)) tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif @@ -3376,8 +3376,8 @@ int tcp_write_wakeup(struct sock *sk) if (sk->sk_state == TCP_CLOSE) return -1; - if ((skb = tcp_send_head(sk)) != NULL && - before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { + skb = tcp_send_head(sk); + if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { int err; unsigned int mss = tcp_current_mss(sk); unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9f525a2a68df..2162fc6ce1c1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1522,7 +1522,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -1802,7 +1802,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk != NULL) { + if (sk) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 4915d8284a86..f9386160cbee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -285,7 +285,7 @@ void udp_del_offload(struct udp_offload *uo) pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); unlock: spin_unlock(&udp_offload_lock); - if (uo_priv != NULL) + if (uo_priv) call_rcu(&uo_priv->rcu, udp_offload_free_routine); } EXPORT_SYMBOL(udp_del_offload); @@ -394,7 +394,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) break; } - if (uo_priv != NULL) { + if (uo_priv) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr), -- cgit v1.2.3 From b52e69217b5a02469f8431934f59c0d7103dd32f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2015 14:36:42 -0700 Subject: tcp: md5: fix a typo in tcp_v4_md5_lookup() Lookup key for tcp_md5_do_lookup() has to be taken from addr_sk, not sk (which can be the listener) Fixes: fd3a154a00fb ("tcp: md5: get rid of tcp_v[46]_reqsk_md5_lookup()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 560f9571f7c4..37578d52897e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -897,9 +897,9 @@ EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, const struct sock *addr_sk) { - union tcp_md5_addr *addr; + const union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&sk->sk_daddr; + addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); -- cgit v1.2.3 From 789f558cfb3680aeb52de137418637f6b04b7d22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Apr 2015 18:51:09 -0700 Subject: tcp/dccp: get rid of central timewait timer Using a timer wheel for timewait sockets was nice ~15 years ago when memory was expensive and machines had a single processor. This does not scale, code is ugly and source of huge latencies (Typically 30 ms have been seen, cpus spinning on death_lock spinlock.) We can afford to use an extra 64 bytes per timewait sock and spread timewait load to all cpus to have better behavior. Tested: On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1 on the target (lpaa24) Before patch : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 419594 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 437171 While test is running, we can observe 25 or even 33 ms latencies. lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2 lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2 After patch : About 90% increase of throughput : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 810442 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 800992 And latencies are kept to minimal values during this load, even if network utilization is 90% higher : lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 107 ++-------------- net/dccp/minisocks.c | 19 +-- net/ipv4/inet_diag.c | 4 +- net/ipv4/inet_hashtables.c | 4 +- net/ipv4/inet_timewait_sock.c | 270 ++++++--------------------------------- net/ipv4/proc.c | 2 +- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_minisocks.c | 35 ++--- net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/tcp_ipv6.c | 4 +- net/netfilter/xt_TPROXY.c | 4 +- 11 files changed, 69 insertions(+), 386 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b7ce1003c429..360c4802288d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -31,67 +31,14 @@ struct inet_hashinfo; -#define INET_TWDR_RECYCLE_SLOTS_LOG 5 -#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) - -/* - * If time > 4sec, it is "slow" path, no recycling is required, - * so that we select tick to get range about 4 seconds. - */ -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#else -# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#endif - -static inline u32 inet_tw_time_stamp(void) -{ - return jiffies; -} - -/* TIME_WAIT reaping mechanism. */ -#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ - -#define INET_TWDR_TWKILL_QUOTA 100 - struct inet_timewait_death_row { - /* Short-time timewait calendar */ - int twcal_hand; - unsigned long twcal_jiffie; - struct timer_list twcal_timer; - struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; - - spinlock_t death_lock; - int tw_count; - int period; - u32 thread_slots; - struct work_struct twkill_work; - struct timer_list tw_timer; - int slot; - struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; - struct inet_hashinfo *hashinfo; + atomic_t tw_count; + + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_tw_recycle; int sysctl_max_tw_buckets; }; -void inet_twdr_hangman(unsigned long data); -void inet_twdr_twkill_work(struct work_struct *work); -void inet_twdr_twcal_tick(unsigned long data); - struct inet_bind_bucket; /* @@ -133,52 +80,18 @@ struct inet_timewait_sock { __be16 tw_sport; kmemcheck_bitfield_begin(flags); /* And these are ours. */ - unsigned int tw_pad0 : 1, /* 1 bit hole */ + unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; kmemcheck_bitfield_end(flags); - u32 tw_ttd; + struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; + struct inet_timewait_death_row *tw_dr; }; #define tw_tclass tw_tos -static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) -{ - return !hlist_unhashed(&tw->tw_death_node); -} - -static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) -{ - __hlist_del(&tw->tw_death_node); - inet_twsk_dead_node_init(tw); -} - -static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) -{ - if (inet_twsk_dead_hashed(tw)) { - __inet_twsk_del_dead_node(tw); - return 1; - } - return 0; -} - -#define inet_twsk_for_each(tw, node, head) \ - hlist_nulls_for_each_entry(tw, node, head, tw_node) - -#define inet_twsk_for_each_inmate(tw, jail) \ - hlist_for_each_entry(tw, jail, tw_death_node) - -#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \ - hlist_for_each_entry_safe(tw, safe, jail, tw_death_node) - static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; @@ -193,16 +106,14 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + struct inet_timewait_death_row *dr, const int state); void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo, const int timewait_len); -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr); +void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void inet_twsk_deschedule(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 332f7d6d9942..5f566663e47f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -27,28 +27,16 @@ struct inet_timewait_death_row dccp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock), .hashinfo = &dccp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&dccp_death_row), - .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&dccp_death_row), }; EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; + struct inet_timewait_sock *tw; - if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &dccp_death_row, state); if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; - inet_twsk_schedule(tw, &dccp_death_row, timeo, - DCCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 76322c9867d5..70e8b3c308ec 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -248,7 +248,7 @@ static int inet_twsk_diag_fill(struct sock *sk, struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; - s32 tmo; + long tmo; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -258,7 +258,7 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_ttd - inet_tw_time_stamp(); + tmo = tw->tw_timer.expires - jiffies; if (tmo < 0) tmo = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d4630bf2d9aa..c6fb80bd5826 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -388,7 +388,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } @@ -565,7 +565,7 @@ ok: spin_unlock(&head->lock); if (tw) { - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); while (twrefcnt) { twrefcnt--; inet_twsk_put(tw); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 118f0f195820..00ec8d5d7e7e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -67,9 +67,9 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, } /* Must be called with locally disabled BHs. */ -static void __inet_twsk_kill(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo) +static void inet_twsk_kill(struct inet_timewait_sock *tw) { + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; struct inet_bind_hashbucket *bhead; int refcnt; /* Unlink from established hashes. */ @@ -89,6 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); atomic_sub(refcnt, &tw->tw_refcnt); + atomic_dec(&tw->tw_dr->tw_count); + inet_twsk_put(tw); } void inet_twsk_free(struct inet_timewait_sock *tw) @@ -168,16 +170,34 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) +void tw_timer_handler(unsigned long data) { - struct inet_timewait_sock *tw = - kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - GFP_ATOMIC); + struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; + + if (tw->tw_kill) + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); + else + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); + inet_twsk_kill(tw); +} + +struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + struct inet_timewait_death_row *dr, + const int state) +{ + struct inet_timewait_sock *tw; + + if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets) + return NULL; + + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, + GFP_ATOMIC); if (tw) { const struct inet_sock *inet = inet_sk(sk); kmemcheck_annotate_bitfield(tw, flags); + tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; @@ -196,13 +216,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); + setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this * timewait socket. */ atomic_set(&tw->tw_refcnt, 0); - inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } @@ -210,139 +231,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat } EXPORT_SYMBOL_GPL(inet_twsk_alloc); -/* Returns non-zero if quota exceeded. */ -static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, - const int slot) -{ - struct inet_timewait_sock *tw; - unsigned int killed; - int ret; - - /* NOTE: compare this to previous version where lock - * was released after detaching chain. It was racy, - * because tw buckets are scheduled in not serialized context - * in 2.3 (with netfilter), and with softnet it is common, because - * soft irqs are not sequenced. - */ - killed = 0; - ret = 0; -rescan: - inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { - __inet_twsk_del_dead_node(tw); - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); -#endif - inet_twsk_put(tw); - killed++; - spin_lock(&twdr->death_lock); - if (killed > INET_TWDR_TWKILL_QUOTA) { - ret = 1; - break; - } - - /* While we dropped twdr->death_lock, another cpu may have - * killed off the next TW bucket in the list, therefore - * do a fresh re-read of the hlist head node with the - * lock reacquired. We still use the hlist traversal - * macro in order to get the prefetches. - */ - goto rescan; - } - - twdr->tw_count -= killed; -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); -#endif - return ret; -} - -void inet_twdr_hangman(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - unsigned int need_timer; - - twdr = (struct inet_timewait_death_row *)data; - spin_lock(&twdr->death_lock); - - if (twdr->tw_count == 0) - goto out; - - need_timer = 0; - if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { - twdr->thread_slots |= (1 << twdr->slot); - schedule_work(&twdr->twkill_work); - need_timer = 1; - } else { - /* We purged the entire slot, anything left? */ - if (twdr->tw_count) - need_timer = 1; - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); - } - if (need_timer) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); -out: - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_hangman); - -void inet_twdr_twkill_work(struct work_struct *work) -{ - struct inet_timewait_death_row *twdr = - container_of(work, struct inet_timewait_death_row, twkill_work); - int i; - - BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > - (sizeof(twdr->thread_slots) * 8)); - - while (twdr->thread_slots) { - spin_lock_bh(&twdr->death_lock); - for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { - if (!(twdr->thread_slots & (1 << i))) - continue; - - while (inet_twdr_do_twkill_work(twdr, i) != 0) { - if (need_resched()) { - spin_unlock_bh(&twdr->death_lock); - schedule(); - spin_lock_bh(&twdr->death_lock); - } - } - - twdr->thread_slots &= ~(1 << i); - } - spin_unlock_bh(&twdr->death_lock); - } -} -EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); - /* These are always called from BH context. See callers in * tcp_input.c to verify this. */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr) +void inet_twsk_deschedule(struct inet_timewait_sock *tw) { - spin_lock(&twdr->death_lock); - if (inet_twsk_del_dead_node(tw)) { - inet_twsk_put(tw); - if (--twdr->tw_count == 0) - del_timer(&twdr->tw_timer); - } - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); + if (del_timer_sync(&tw->tw_timer)) + inet_twsk_kill(tw); } EXPORT_SYMBOL(inet_twsk_deschedule); -void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo, const int timewait_len) +void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) { - struct hlist_head *list; - int slot; - /* timeout := RTO * 3.5 * * 3.5 = 1+2+0.5 to wait for two retransmits. @@ -367,115 +269,15 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ - slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - spin_lock(&twdr->death_lock); - - /* Unlink it, if it was scheduled */ - if (inet_twsk_del_dead_node(tw)) - twdr->tw_count--; - else + tw->tw_kill = timeo <= 4*HZ; + if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { atomic_inc(&tw->tw_refcnt); - - if (slot >= INET_TWDR_RECYCLE_SLOTS) { - /* Schedule to slow timer */ - if (timeo >= timewait_len) { - slot = INET_TWDR_TWKILL_SLOTS - 1; - } else { - slot = DIV_ROUND_UP(timeo, twdr->period); - if (slot >= INET_TWDR_TWKILL_SLOTS) - slot = INET_TWDR_TWKILL_SLOTS - 1; - } - tw->tw_ttd = inet_tw_time_stamp() + timeo; - slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); - list = &twdr->cells[slot]; - } else { - tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); - - if (twdr->twcal_hand < 0) { - twdr->twcal_hand = 0; - twdr->twcal_jiffie = jiffies; - twdr->twcal_timer.expires = twdr->twcal_jiffie + - (slot << INET_TWDR_RECYCLE_TICK); - add_timer(&twdr->twcal_timer); - } else { - if (time_after(twdr->twcal_timer.expires, - jiffies + (slot << INET_TWDR_RECYCLE_TICK))) - mod_timer(&twdr->twcal_timer, - jiffies + (slot << INET_TWDR_RECYCLE_TICK)); - slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - list = &twdr->twcal_row[slot]; + atomic_inc(&tw->tw_dr->tw_count); } - - hlist_add_head(&tw->tw_death_node, list); - - if (twdr->tw_count++ == 0) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); - spin_unlock(&twdr->death_lock); } EXPORT_SYMBOL_GPL(inet_twsk_schedule); -void inet_twdr_twcal_tick(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int n, slot; - unsigned long j; - unsigned long now = jiffies; - int killed = 0; - int adv = 0; - - twdr = (struct inet_timewait_death_row *)data; - - spin_lock(&twdr->death_lock); - if (twdr->twcal_hand < 0) - goto out; - - slot = twdr->twcal_hand; - j = twdr->twcal_jiffie; - - for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { - if (time_before_eq(j, now)) { - struct hlist_node *safe; - struct inet_timewait_sock *tw; - - inet_twsk_for_each_inmate_safe(tw, safe, - &twdr->twcal_row[slot]) { - __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); -#endif - inet_twsk_put(tw); - killed++; - } - } else { - if (!adv) { - adv = 1; - twdr->twcal_jiffie = j; - twdr->twcal_hand = slot; - } - - if (!hlist_empty(&twdr->twcal_row[slot])) { - mod_timer(&twdr->twcal_timer, j); - goto out; - } - } - j += 1 << INET_TWDR_RECYCLE_TICK; - slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - twdr->twcal_hand = -1; - -out: - if ((twdr->tw_count -= killed) == 0) - del_timer(&twdr->tw_timer); -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); -#endif - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); - void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { @@ -509,7 +311,7 @@ restart: rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule(tw, twdr); + inet_twsk_deschedule(tw); local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d8953ef0770c..e1f3b911dd1e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -63,7 +63,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - tcp_death_row.tw_count, sockets, + atomic_read(&tcp_death_row.tw_count), sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 37578d52897e..3571f2be4470 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1685,7 +1685,7 @@ do_time_wait: iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; goto process; @@ -2242,9 +2242,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i) { + long delta = tw->tw_timer.expires - jiffies; __be32 dest, src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2088fdcca141..63d6311b5365 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -34,18 +34,7 @@ int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&tcp_death_row), - .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&tcp_death_row), }; EXPORT_SYMBOL_GPL(tcp_death_row); @@ -158,7 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_RST; } @@ -174,11 +163,9 @@ kill_with_rst: if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -211,13 +198,12 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -267,8 +253,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -283,16 +268,15 @@ EXPORT_SYMBOL(tcp_timewait_state_process); */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw; bool recycle_ok = false; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tcp_remember_stamp(sk); - if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -355,8 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - inet_twsk_schedule(tw, &tcp_death_row, timeo, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 033f17816ef4..871641bc1ed4 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f73a97f6e68e..ad51df85aa00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1486,7 +1486,7 @@ do_time_wait: ntohs(th->dest), tcp_v6_iif(skb)); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); sk = sk2; tcp_v6_restore_cb(skb); @@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { + long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index c205b26a2bee..cca96cec1b68 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -272,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } @@ -437,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, tgi->lport ? tgi->lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } -- cgit v1.2.3