diff options
| author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 20:11:08 -0700 | 
|---|---|---|
| committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 15:49:14 -0700 | 
| commit | 3f421baa4720b708022f8bcc52a61e5cd6f10bf8 (patch) | |
| tree | e4201b1e2356cea8b7bd8d68dfba06e84002a77d | |
| parent | 463c84b97f24010a67cd871746d6a7e4c925a5f9 (diff) | |
| download | blackbird-op-linux-3f421baa4720b708022f8bcc52a61e5cd6f10bf8.tar.gz blackbird-op-linux-3f421baa4720b708022f8bcc52a61e5cd6f10bf8.zip | |
[NET]: Just move the inet_connection_sock function from tcp sources
Completing the previous changeset, this also generalises tcp_v4_synq_add,
renaming it to inet_csk_reqsk_queue_hash_add, already geing used in the
DCCP tree, which I plan to merge RSN.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/linux/ipv6.h | 7 | ||||
| -rw-r--r-- | include/net/inet_connection_sock.h | 152 | ||||
| -rw-r--r-- | include/net/tcp.h | 160 | ||||
| -rw-r--r-- | net/ipv4/Makefile | 2 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 401 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 93 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 10 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 210 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 19 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 65 | 
10 files changed, 588 insertions, 531 deletions
| diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 88591913c94f..777339b68464 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)  	return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;  } -static inline int inet_twsk_ipv6only(const struct sock *sk) -{ -	return inet_twsk(sk)->tw_ipv6only; -} -  static inline int inet_v6_ipv6only(const struct sock *sk)  {  	return likely(sk->sk_state != TCP_TIME_WAIT) ? -		ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); +		ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;  }  #else  #define __ipv6_only_sock(sk)	0 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ef609396e41b..97e002001c1a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,15 @@  #define _INET_CONNECTION_SOCK_H  #include <linux/ip.h> +#include <linux/string.h>  #include <linux/timer.h>  #include <net/request_sock.h> +#define INET_CSK_DEBUG 1 + +/* Cancel timers, when they are not required. */ +#undef INET_CSK_CLEAR_TIMERS +  struct inet_bind_bucket;  struct inet_hashinfo; @@ -61,17 +67,107 @@ struct inet_connection_sock {  	} icsk_ack;  }; +#define ICSK_TIME_RETRANS	1	/* Retransmit timer */ +#define ICSK_TIME_DACK		2	/* Delayed ack timer */ +#define ICSK_TIME_PROBE0	3	/* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN	4	/* Keepalive timer */ +  static inline struct inet_connection_sock *inet_csk(const struct sock *sk)  {  	return (struct inet_connection_sock *)sk;  } +enum inet_csk_ack_state_t { +	ICSK_ACK_SCHED	= 1, +	ICSK_ACK_TIMER  = 2, +	ICSK_ACK_PUSHED = 4 +}; +  extern void inet_csk_init_xmit_timers(struct sock *sk,  				      void (*retransmit_handler)(unsigned long),  				      void (*delack_handler)(unsigned long),  				      void (*keepalive_handler)(unsigned long));  extern void inet_csk_clear_xmit_timers(struct sock *sk); +static inline void inet_csk_schedule_ack(struct sock *sk) +{ +	inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; +} + +static inline int inet_csk_ack_scheduled(const struct sock *sk) +{ +	return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; +} + +static inline void inet_csk_delack_init(struct sock *sk) +{ +	memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); +} + +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); + +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; +#endif + +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	 +	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { +		icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS +		sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +#endif +	} else if (what == ICSK_TIME_DACK) { +		icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS +		sk_stop_timer(sk, &icsk->icsk_delack_timer); +#endif +	} +#ifdef INET_CSK_DEBUG +	else { +		pr_debug(inet_csk_timer_bug_msg); +	} +#endif +} + +/* + *	Reset the retransmission timer + */ +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, +					     unsigned long when, +					     const unsigned long max_when) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); + +	if (when > max_when) { +#ifdef INET_CSK_DEBUG +		pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", +			 sk, what, when, current_text_addr()); +#endif +		when = max_when; +	} + +	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { +		icsk->icsk_pending = what; +		icsk->icsk_timeout = jiffies + when; +		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); +	} else if (what == ICSK_TIME_DACK) { +		icsk->icsk_ack.pending |= ICSK_ACK_TIMER; +		icsk->icsk_ack.timeout = jiffies + when; +		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); +	} +#ifdef INET_CSK_DEBUG +	else { +		pr_debug(inet_csk_timer_bug_msg); +	} +#endif +} + +extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); +  extern struct request_sock *inet_csk_search_req(const struct sock *sk,  						struct request_sock ***prevp,  						const __u16 rport, @@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,  extern struct dst_entry* inet_csk_route_req(struct sock *sk,  					    const struct request_sock *req); +static inline void inet_csk_reqsk_queue_add(struct sock *sk, +					    struct request_sock *req, +					    struct sock *child) +{ +	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); +} + +extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, +					  struct request_sock *req, +					  const unsigned timeout); + +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, +						struct request_sock *req) +{ +	if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) +		inet_csk_delete_keepalive_timer(sk); +} + +static inline void inet_csk_reqsk_queue_added(struct sock *sk, +					      const unsigned long timeout) +{ +	if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) +		inet_csk_reset_keepalive_timer(sk, timeout); +} + +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) +{ +	return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) +{ +	return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) +{ +	return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); +} + +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, +					       struct request_sock *req, +					       struct request_sock **prev) +{ +	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); +} + +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, +					     struct request_sock *req, +					     struct request_sock **prev) +{ +	inet_csk_reqsk_queue_unlink(sk, req, prev); +	inet_csk_reqsk_queue_removed(sk, req); +	reqsk_free(req); +} +  #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index a943c79c88b0..dd9a5a288f88 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,18 +19,16 @@  #define _TCP_H  #define TCP_DEBUG 1 -#define INET_CSK_DEBUG 1  #define FASTRETRANS_DEBUG 1 -/* Cancel timers, when they are not required. */ -#undef INET_CSK_CLEAR_TIMERS -  #include <linux/config.h>  #include <linux/list.h>  #include <linux/tcp.h>  #include <linux/slab.h>  #include <linux/cache.h>  #include <linux/percpu.h> + +#include <net/inet_connection_sock.h>  #include <net/inet_hashtables.h>  #include <net/checksum.h>  #include <net/request_sock.h> @@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);  #define TCPOLEN_SACK_BASE_ALIGNED	4  #define TCPOLEN_SACK_PERBLOCK		8 -#define ICSK_TIME_RETRANS	1	/* Retransmit timer */ -#define ICSK_TIME_DACK		2	/* Delayed ack timer */ -#define ICSK_TIME_PROBE0	3	/* Zero window probe timer */ -#define ICSK_TIME_KEEPOPEN	4	/* Keepalive timer */ -  /* Flags in tp->nonagle */  #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */  #define TCP_NAGLE_CORK		2	/* Socket is corked	    */ @@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated;  extern atomic_t tcp_sockets_allocated;  extern int tcp_memory_pressure; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define AF_INET_FAMILY(fam) ((fam) == AF_INET) -#else -#define AF_INET_FAMILY(fam) 1 -#endif -  /*   *	Pointers to address related TCP functions   *	(i.e. things that depend on the address family) @@ -373,22 +360,6 @@ extern int			tcp_rcv_established(struct sock *sk,  extern void			tcp_rcv_space_adjust(struct sock *sk); -enum inet_csk_ack_state_t { -	ICSK_ACK_SCHED	= 1, -	ICSK_ACK_TIMER  = 2, -	ICSK_ACK_PUSHED = 4 -}; - -static inline void inet_csk_schedule_ack(struct sock *sk) -{ -	inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; -} - -static inline int inet_csk_ack_scheduled(const struct sock *sk) -{ -	return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; -} -  static inline void tcp_dec_quickack_mode(struct sock *sk,  					 const unsigned int pkts)  { @@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,  extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void inet_csk_delack_init(struct sock *sk) -{ -	memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); -} -  static inline void tcp_clear_options(struct tcp_options_received *rx_opt)  {   	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; @@ -442,7 +408,6 @@ extern void			tcp_update_metrics(struct sock *sk);  extern void			tcp_close(struct sock *sk,   					  long timeout); -extern struct sock *		inet_csk_accept(struct sock *sk, int flags, int *err);  extern unsigned int		tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);  extern int			tcp_getsockopt(struct sock *sk, int level,  @@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk)  	inet_csk_clear_xmit_timers(sk);  } -extern void inet_csk_delete_keepalive_timer(struct sock *sk); -extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);  extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);  extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef INET_CSK_DEBUG -extern const char inet_csk_timer_bug_msg[]; -#endif -  /* tcp_diag.c */  extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,  extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,  			 sk_read_actor_t recv_actor); -static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); -	 -	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { -		icsk->icsk_pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS -		sk_stop_timer(sk, &icsk->icsk_retransmit_timer); -#endif -	} else if (what == ICSK_TIME_DACK) { -		icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS -		sk_stop_timer(sk, &icsk->icsk_delack_timer); -#endif -	} -#ifdef INET_CSK_DEBUG -	else { -		pr_debug(inet_csk_timer_bug_msg); -	} -#endif -} - -/* - *	Reset the retransmission timer - */ -static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, -					     unsigned long when) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); - -	if (when > TCP_RTO_MAX) { -#ifdef INET_CSK_DEBUG -		pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", -			 sk, what, when, current_text_addr()); -#endif -		when = TCP_RTO_MAX; -	} - -	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { -		icsk->icsk_pending = what; -		icsk->icsk_timeout = jiffies + when; -		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); -	} else if (what == ICSK_TIME_DACK) { -		icsk->icsk_ack.pending |= ICSK_ACK_TIMER; -		icsk->icsk_ack.timeout = jiffies + when; -		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); -	} -#ifdef INET_CSK_DEBUG -	else { -		pr_debug(inet_csk_timer_bug_msg); -	} -#endif -} -  /* Initialize RCV_MSS value.   * RCV_MSS is an our guess about MSS used by the peer.   * We haven't any direct information about the MSS. @@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,  	tp->packets_out += tcp_skb_pcount(skb);  	if (!orig) -		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); +		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, +					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);  }  static inline void tcp_packets_out_dec(struct tcp_sock *tp,  @@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t  {  	const struct inet_connection_sock *icsk = inet_csk(sk);  	if (!tp->packets_out && !icsk->icsk_pending) -		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); +		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, +					  icsk->icsk_rto, TCP_RTO_MAX);  }  static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)  			wake_up_interruptible(sk->sk_sleep);  			if (!inet_csk_ack_scheduled(sk))  				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, -						          (3 * TCP_RTO_MIN) / 4); +						          (3 * TCP_RTO_MIN) / 4, +							  TCP_RTO_MAX);  		}  		return 1;  	} @@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk)  	return tcp_win_from_space(sk->sk_rcvbuf);   } -static inline void inet_csk_reqsk_queue_add(struct sock *sk, -					    struct request_sock *req, -					    struct sock *child) -{ -	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); -} - -static inline void inet_csk_reqsk_queue_removed(struct sock *sk, -						struct request_sock *req) -{ -	if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) -		inet_csk_delete_keepalive_timer(sk); -} - -static inline void inet_csk_reqsk_queue_added(struct sock *sk, -					      const unsigned long timeout) -{ -	if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) -		inet_csk_reset_keepalive_timer(sk, timeout); -} - -static inline int inet_csk_reqsk_queue_len(const struct sock *sk) -{ -	return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ -	return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) -{ -	return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); -} - -static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, -					       struct request_sock *req, -					       struct request_sock **prev) -{ -	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); -} - -static inline void inet_csk_reqsk_queue_drop(struct sock *sk, -					     struct request_sock *req, -					     struct request_sock **prev) -{ -	inet_csk_reqsk_queue_unlink(sk, req, prev); -	inet_csk_reqsk_queue_removed(sk, req); -	reqsk_free(req); -} -  static __inline__ void tcp_openreq_init(struct request_sock *req,  					struct tcp_options_received *rx_opt,  					struct sk_buff *skb) diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 6650d18e400f..ea0e1d87dc7e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -5,7 +5,7 @@  obj-y     := route.o inetpeer.o protocol.o \  	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \  	     ip_output.o ip_sockglue.o inet_hashtables.o \ -	     inet_timewait_sock.o \ +	     inet_timewait_sock.o inet_connection_sock.o \  	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \  	     tcp_minisocks.o tcp_cong.o \  	     datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..2712400a8bb8 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c @@ -0,0 +1,401 @@ +/* + * INET		An implementation of the TCP/IP protocol suite for the LINUX + *		operating system.  INET is implemented using the  BSD Socket + *		interface as the means of communication with the user level. + * + *		Support for INET connection oriented protocols. + * + * Authors:	See the TCP sources + * + *		This program is free software; you can redistribute it and/or + *		modify it under the terms of the GNU General Public License + *		as published by the Free Software Foundation; either version + *		2 of the License, or(at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/jhash.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_hashtables.h> +#include <net/inet_timewait_sock.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/tcp_states.h> + +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); +#endif + +/* + * This array holds the first and last local port number. + * For high-usage systems, use sysctl to change this to + * 32768-61000 + */ +int sysctl_local_port_range[2] = { 1024, 4999 }; + +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +{ +	const u32 sk_rcv_saddr = inet_rcv_saddr(sk); +	struct sock *sk2; +	struct hlist_node *node; +	int reuse = sk->sk_reuse; + +	sk_for_each_bound(sk2, node, &tb->owners) { +		if (sk != sk2 && +		    !inet_v6_ipv6only(sk2) && +		    (!sk->sk_bound_dev_if || +		     !sk2->sk_bound_dev_if || +		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { +			if (!reuse || !sk2->sk_reuse || +			    sk2->sk_state == TCP_LISTEN) { +				const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); +				if (!sk2_rcv_saddr || !sk_rcv_saddr || +				    sk2_rcv_saddr == sk_rcv_saddr) +					break; +			} +		} +	} +	return node != NULL; +} + +/* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + */ +int inet_csk_get_port(struct inet_hashinfo *hashinfo, +		      struct sock *sk, unsigned short snum) +{ +	struct inet_bind_hashbucket *head; +	struct hlist_node *node; +	struct inet_bind_bucket *tb; +	int ret; + +	local_bh_disable(); +	if (!snum) { +		int low = sysctl_local_port_range[0]; +		int high = sysctl_local_port_range[1]; +		int remaining = (high - low) + 1; +		int rover; + +		spin_lock(&hashinfo->portalloc_lock); +		if (hashinfo->port_rover < low) +			rover = low; +		else +			rover = hashinfo->port_rover; +		do { +			rover++; +			if (rover > high) +				rover = low; +			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; +			spin_lock(&head->lock); +			inet_bind_bucket_for_each(tb, node, &head->chain) +				if (tb->port == rover) +					goto next; +			break; +		next: +			spin_unlock(&head->lock); +		} while (--remaining > 0); +		hashinfo->port_rover = rover; +		spin_unlock(&hashinfo->portalloc_lock); + +		/* Exhausted local port range during search?  It is not +		 * possible for us to be holding one of the bind hash +		 * locks if this test triggers, because if 'remaining' +		 * drops to zero, we broke out of the do/while loop at +		 * the top level, not from the 'break;' statement. +		 */ +		ret = 1; +		if (remaining <= 0) +			goto fail; + +		/* OK, here is the one we will use.  HEAD is +		 * non-NULL and we hold it's mutex. +		 */ +		snum = rover; +	} else { +		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; +		spin_lock(&head->lock); +		inet_bind_bucket_for_each(tb, node, &head->chain) +			if (tb->port == snum) +				goto tb_found; +	} +	tb = NULL; +	goto tb_not_found; +tb_found: +	if (!hlist_empty(&tb->owners)) { +		if (sk->sk_reuse > 1) +			goto success; +		if (tb->fastreuse > 0 && +		    sk->sk_reuse && sk->sk_state != TCP_LISTEN) { +			goto success; +		} else { +			ret = 1; +			if (inet_csk_bind_conflict(sk, tb)) +				goto fail_unlock; +		} +	} +tb_not_found: +	ret = 1; +	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) +		goto fail_unlock; +	if (hlist_empty(&tb->owners)) { +		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) +			tb->fastreuse = 1; +		else +			tb->fastreuse = 0; +	} else if (tb->fastreuse && +		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) +		tb->fastreuse = 0; +success: +	if (!inet_csk(sk)->icsk_bind_hash) +		inet_bind_hash(sk, tb, snum); +	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + 	ret = 0; + +fail_unlock: +	spin_unlock(&head->lock); +fail: +	local_bh_enable(); +	return ret; +} + +EXPORT_SYMBOL_GPL(inet_csk_get_port); + +/* + * Wait for an incoming connection, avoid race conditions. This must be called + * with the socket locked. + */ +static int inet_csk_wait_for_connect(struct sock *sk, long timeo) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	DEFINE_WAIT(wait); +	int err; + +	/* +	 * True wake-one mechanism for incoming connections: only +	 * one process gets woken up, not the 'whole herd'. +	 * Since we do not 'race & poll' for established sockets +	 * anymore, the common case will execute the loop only once. +	 * +	 * Subtle issue: "add_wait_queue_exclusive()" will be added +	 * after any current non-exclusive waiters, and we know that +	 * it will always _stay_ after any new non-exclusive waiters +	 * because all non-exclusive waiters are added at the +	 * beginning of the wait-queue. As such, it's ok to "drop" +	 * our exclusiveness temporarily when we get woken up without +	 * having to remove and re-insert us on the wait queue. +	 */ +	for (;;) { +		prepare_to_wait_exclusive(sk->sk_sleep, &wait, +					  TASK_INTERRUPTIBLE); +		release_sock(sk); +		if (reqsk_queue_empty(&icsk->icsk_accept_queue)) +			timeo = schedule_timeout(timeo); +		lock_sock(sk); +		err = 0; +		if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) +			break; +		err = -EINVAL; +		if (sk->sk_state != TCP_LISTEN) +			break; +		err = sock_intr_errno(timeo); +		if (signal_pending(current)) +			break; +		err = -EAGAIN; +		if (!timeo) +			break; +	} +	finish_wait(sk->sk_sleep, &wait); +	return err; +} + +/* + * This will accept the next outstanding connection. + */ +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	struct sock *newsk; +	int error; + +	lock_sock(sk); + +	/* We need to make sure that this socket is listening, +	 * and that it has something pending. +	 */ +	error = -EINVAL; +	if (sk->sk_state != TCP_LISTEN) +		goto out_err; + +	/* Find already established connection */ +	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { +		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + +		/* If this is a non blocking socket don't sleep */ +		error = -EAGAIN; +		if (!timeo) +			goto out_err; + +		error = inet_csk_wait_for_connect(sk, timeo); +		if (error) +			goto out_err; +	} + +	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); +	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); +out: +	release_sock(sk); +	return newsk; +out_err: +	newsk = NULL; +	*err = error; +	goto out; +} + +EXPORT_SYMBOL(inet_csk_accept); + +/* + * Using different timers for retransmit, delayed acks and probes + * We may wish use just one timer maintaining a list of expire jiffies  + * to optimize. + */ +void inet_csk_init_xmit_timers(struct sock *sk, +			       void (*retransmit_handler)(unsigned long), +			       void (*delack_handler)(unsigned long), +			       void (*keepalive_handler)(unsigned long)) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); + +	init_timer(&icsk->icsk_retransmit_timer); +	init_timer(&icsk->icsk_delack_timer); +	init_timer(&sk->sk_timer); + +	icsk->icsk_retransmit_timer.function = retransmit_handler; +	icsk->icsk_delack_timer.function     = delack_handler; +	sk->sk_timer.function		     = keepalive_handler; + +	icsk->icsk_retransmit_timer.data =  +		icsk->icsk_delack_timer.data = +			sk->sk_timer.data  = (unsigned long)sk; + +	icsk->icsk_pending = icsk->icsk_ack.pending = 0; +} + +EXPORT_SYMBOL(inet_csk_init_xmit_timers); + +void inet_csk_clear_xmit_timers(struct sock *sk) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); + +	icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + +	sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +	sk_stop_timer(sk, &icsk->icsk_delack_timer); +	sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); + +void inet_csk_delete_keepalive_timer(struct sock *sk) +{ +	sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); + +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ +	sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); + +struct dst_entry* inet_csk_route_req(struct sock *sk, +				     const struct request_sock *req) +{ +	struct rtable *rt; +	const struct inet_request_sock *ireq = inet_rsk(req); +	struct ip_options *opt = inet_rsk(req)->opt; +	struct flowi fl = { .oif = sk->sk_bound_dev_if, +			    .nl_u = { .ip4_u = +				      { .daddr = ((opt && opt->srr) ? +						  opt->faddr : +						  ireq->rmt_addr), +					.saddr = ireq->loc_addr, +					.tos = RT_CONN_FLAGS(sk) } }, +			    .proto = sk->sk_protocol, +			    .uli_u = { .ports = +				       { .sport = inet_sk(sk)->sport, +					 .dport = ireq->rmt_port } } }; + +	if (ip_route_output_flow(&rt, &fl, sk, 0)) { +		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); +		return NULL; +	} +	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { +		ip_rt_put(rt); +		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); +		return NULL; +	} +	return &rt->u.dst; +} + +EXPORT_SYMBOL_GPL(inet_csk_route_req); + +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, +				 const u32 rnd, const u16 synq_hsize) +{ +	return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) +#else +#define AF_INET_FAMILY(fam) 1 +#endif + +struct request_sock *inet_csk_search_req(const struct sock *sk, +					 struct request_sock ***prevp, +					 const __u16 rport, const __u32 raddr, +					 const __u32 laddr) +{ +	const struct inet_connection_sock *icsk = inet_csk(sk); +	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; +	struct request_sock *req, **prev; + +	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, +						    lopt->nr_table_entries)]; +	     (req = *prev) != NULL; +	     prev = &req->dl_next) { +		const struct inet_request_sock *ireq = inet_rsk(req); + +		if (ireq->rmt_port == rport && +		    ireq->rmt_addr == raddr && +		    ireq->loc_addr == laddr && +		    AF_INET_FAMILY(req->rsk_ops->family)) { +			BUG_TRAP(!req->sk); +			*prevp = prev; +			break; +		} +	} + +	return req; +} + +EXPORT_SYMBOL_GPL(inet_csk_search_req); + +void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +				   const unsigned timeout) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; +	const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, +				     lopt->hash_rnd, lopt->nr_table_entries); + +	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); +	inet_csk_reqsk_queue_added(sk, timeout); +} + +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8177b86570db..581016a6a93f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1805,98 +1805,6 @@ int tcp_disconnect(struct sock *sk, int flags)  }  /* - *	Wait for an incoming connection, avoid race - *	conditions. This must be called with the socket locked. - */ -static int wait_for_connect(struct sock *sk, long timeo) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); -	DEFINE_WAIT(wait); -	int err; - -	/* -	 * True wake-one mechanism for incoming connections: only -	 * one process gets woken up, not the 'whole herd'. -	 * Since we do not 'race & poll' for established sockets -	 * anymore, the common case will execute the loop only once. -	 * -	 * Subtle issue: "add_wait_queue_exclusive()" will be added -	 * after any current non-exclusive waiters, and we know that -	 * it will always _stay_ after any new non-exclusive waiters -	 * because all non-exclusive waiters are added at the -	 * beginning of the wait-queue. As such, it's ok to "drop" -	 * our exclusiveness temporarily when we get woken up without -	 * having to remove and re-insert us on the wait queue. -	 */ -	for (;;) { -		prepare_to_wait_exclusive(sk->sk_sleep, &wait, -					  TASK_INTERRUPTIBLE); -		release_sock(sk); -		if (reqsk_queue_empty(&icsk->icsk_accept_queue)) -			timeo = schedule_timeout(timeo); -		lock_sock(sk); -		err = 0; -		if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) -			break; -		err = -EINVAL; -		if (sk->sk_state != TCP_LISTEN) -			break; -		err = sock_intr_errno(timeo); -		if (signal_pending(current)) -			break; -		err = -EAGAIN; -		if (!timeo) -			break; -	} -	finish_wait(sk->sk_sleep, &wait); -	return err; -} - -/* - *	This will accept the next outstanding connection. - */ - -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); -	struct sock *newsk; -	int error; - -	lock_sock(sk); - -	/* We need to make sure that this socket is listening, -	 * and that it has something pending. -	 */ -	error = -EINVAL; -	if (sk->sk_state != TCP_LISTEN) -		goto out_err; - -	/* Find already established connection */ -	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { -		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - -		/* If this is a non blocking socket don't sleep */ -		error = -EAGAIN; -		if (!timeo) -			goto out_err; - -		error = wait_for_connect(sk, timeo); -		if (error) -			goto out_err; -	} - -	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); -	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); -out: -	release_sock(sk); -	return newsk; -out_err: -	newsk = NULL; -	*err = error; -	goto out; -} - -/*   *	Socket option code for TCP.   */  int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -2344,7 +2252,6 @@ void __init tcp_init(void)  	tcp_register_congestion_control(&tcp_reno);  } -EXPORT_SYMBOL(inet_csk_accept);  EXPORT_SYMBOL(tcp_close);  EXPORT_SYMBOL(tcp_destroy_sock);  EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a8c5c2d90cb..b35badf53aa5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1278,7 +1278,7 @@ static int tcp_check_sack_reneging(struct sock *sk)  		inet_csk(sk)->icsk_retransmits++;  		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, -					  inet_csk(sk)->icsk_rto); +					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);  		return 1;  	}  	return 0; @@ -1961,7 +1961,7 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)  	if (!tp->packets_out) {  		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);  	} else { -		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); +		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);  	}  } @@ -2147,7 +2147,8 @@ static void tcp_ack_probe(struct sock *sk)  		 */  	} else {  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, -					  min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); +					  min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), +					  TCP_RTO_MAX);  	}  } @@ -3968,7 +3969,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,  			inet_csk(sk)->icsk_ack.ato	 = TCP_ATO_MIN;  			tcp_incr_quickack(sk);  			tcp_enter_quickack_mode(sk); -			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); +			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, +						  TCP_DELACK_MAX, TCP_RTO_MAX);  discard:  			__kfree_skb(skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cd41265d17f..2f605b9e6b67 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,138 +97,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {  	.port_rover	= 1024 - 1,  }; -/* - * This array holds the first and last local port number. - * For high-usage systems, use sysctl to change this to - * 32768-61000 - */ -int sysctl_local_port_range[2] = { 1024, 4999 }; - -static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) -{ -	const u32 sk_rcv_saddr = inet_rcv_saddr(sk); -	struct sock *sk2; -	struct hlist_node *node; -	int reuse = sk->sk_reuse; - -	sk_for_each_bound(sk2, node, &tb->owners) { -		if (sk != sk2 && -		    !inet_v6_ipv6only(sk2) && -		    (!sk->sk_bound_dev_if || -		     !sk2->sk_bound_dev_if || -		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { -			if (!reuse || !sk2->sk_reuse || -			    sk2->sk_state == TCP_LISTEN) { -				const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); -				if (!sk2_rcv_saddr || !sk_rcv_saddr || -				    sk2_rcv_saddr == sk_rcv_saddr) -					break; -			} -		} -	} -	return node != NULL; -} - -/* Obtain a reference to a local port for the given sock, - * if snum is zero it means select any available local port. - */ -int inet_csk_get_port(struct inet_hashinfo *hashinfo, -		      struct sock *sk, unsigned short snum) -{ -	struct inet_bind_hashbucket *head; -	struct hlist_node *node; -	struct inet_bind_bucket *tb; -	int ret; - -	local_bh_disable(); -	if (!snum) { -		int low = sysctl_local_port_range[0]; -		int high = sysctl_local_port_range[1]; -		int remaining = (high - low) + 1; -		int rover; - -		spin_lock(&hashinfo->portalloc_lock); -		if (hashinfo->port_rover < low) -			rover = low; -		else -			rover = hashinfo->port_rover; -		do { -			rover++; -			if (rover > high) -				rover = low; -			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; -			spin_lock(&head->lock); -			inet_bind_bucket_for_each(tb, node, &head->chain) -				if (tb->port == rover) -					goto next; -			break; -		next: -			spin_unlock(&head->lock); -		} while (--remaining > 0); -		hashinfo->port_rover = rover; -		spin_unlock(&hashinfo->portalloc_lock); - -		/* Exhausted local port range during search?  It is not -		 * possible for us to be holding one of the bind hash -		 * locks if this test triggers, because if 'remaining' -		 * drops to zero, we broke out of the do/while loop at -		 * the top level, not from the 'break;' statement. -		 */ -		ret = 1; -		if (unlikely(remaining <= 0)) -			goto fail; - -		/* OK, here is the one we will use.  HEAD is -		 * non-NULL and we hold it's mutex. -		 */ -		snum = rover; -	} else { -		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; -		spin_lock(&head->lock); -		inet_bind_bucket_for_each(tb, node, &head->chain) -			if (tb->port == snum) -				goto tb_found; -	} -	tb = NULL; -	goto tb_not_found; -tb_found: -	if (!hlist_empty(&tb->owners)) { -		if (sk->sk_reuse > 1) -			goto success; -		if (tb->fastreuse > 0 && -		    sk->sk_reuse && sk->sk_state != TCP_LISTEN) { -			goto success; -		} else { -			ret = 1; -			if (inet_csk_bind_conflict(sk, tb)) -				goto fail_unlock; -		} -	} -tb_not_found: -	ret = 1; -	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) -		goto fail_unlock; -	if (hlist_empty(&tb->owners)) { -		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) -			tb->fastreuse = 1; -		else -			tb->fastreuse = 0; -	} else if (tb->fastreuse && -		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) -		tb->fastreuse = 0; -success: -	if (!inet_csk(sk)->icsk_bind_hash) -		inet_bind_hash(sk, tb, snum); -	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); - 	ret = 0; - -fail_unlock: -	spin_unlock(&head->lock); -fail: -	local_bh_enable(); -	return ret; -} -  static int tcp_v4_get_port(struct sock *sk, unsigned short snum)  {  	return inet_csk_get_port(&tcp_hashinfo, sk, snum); @@ -568,52 +436,6 @@ static inline int inet_iif(const struct sk_buff *skb)  	return ((struct rtable *)skb->dst)->rt_iif;  } -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, -				 const u32 rnd, const u16 synq_hsize) -{ -	return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); -} - -struct request_sock *inet_csk_search_req(const struct sock *sk, -					 struct request_sock ***prevp, -					 const __u16 rport, const __u32 raddr, -					 const __u32 laddr) -{ -	const struct inet_connection_sock *icsk = inet_csk(sk); -	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; -	struct request_sock *req, **prev; - -	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, -						    lopt->nr_table_entries)]; -	     (req = *prev) != NULL; -	     prev = &req->dl_next) { -		const struct inet_request_sock *ireq = inet_rsk(req); - -		if (ireq->rmt_port == rport && -		    ireq->rmt_addr == raddr && -		    ireq->loc_addr == laddr && -		    AF_INET_FAMILY(req->rsk_ops->family)) { -			BUG_TRAP(!req->sk); -			*prevp = prev; -			break; -		} -	} - -	return req; -} - -static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); -	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; -	const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, -				     lopt->hash_rnd, lopt->nr_table_entries); - -	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); -	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); -} - -  /*   * This routine does path mtu discovery as defined in RFC1191.   */ @@ -963,36 +785,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)  			req->ts_recent);  } -struct dst_entry* inet_csk_route_req(struct sock *sk, -				     const struct request_sock *req) -{ -	struct rtable *rt; -	const struct inet_request_sock *ireq = inet_rsk(req); -	struct ip_options *opt = inet_rsk(req)->opt; -	struct flowi fl = { .oif = sk->sk_bound_dev_if, -			    .nl_u = { .ip4_u = -				      { .daddr = ((opt && opt->srr) ? -						  opt->faddr : -						  ireq->rmt_addr), -					.saddr = ireq->loc_addr, -					.tos = RT_CONN_FLAGS(sk) } }, -			    .proto = sk->sk_protocol, -			    .uli_u = { .ports = -				       { .sport = inet_sk(sk)->sport, -					 .dport = ireq->rmt_port } } }; - -	if (ip_route_output_flow(&rt, &fl, sk, 0)) { -		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); -		return NULL; -	} -	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { -		ip_rt_put(rt); -		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); -		return NULL; -	} -	return &rt->u.dst; -} -  /*   *	Send a SYN-ACK after having received an ACK.   *	This still operates on a request_sock only, not on a big @@ -1222,7 +1014,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	if (want_cookie) {  	   	reqsk_free(req);  	} else { -		tcp_v4_synq_add(sk, req); +		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);  	}  	return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6f0a7e30ceac..f458eacb5ef2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1493,7 +1493,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)  					if (skb ==  					    skb_peek(&sk->sk_write_queue))  						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, -									  inet_csk(sk)->icsk_rto); +									  inet_csk(sk)->icsk_rto, +									  TCP_RTO_MAX);  				}  				packet_cnt -= tcp_skb_pcount(skb); @@ -1546,7 +1547,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)  			break;  		if (skb == skb_peek(&sk->sk_write_queue)) -			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); +			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, +						  inet_csk(sk)->icsk_rto, +						  TCP_RTO_MAX);  		NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);  	} @@ -1826,7 +1829,8 @@ int tcp_connect(struct sock *sk)  	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);  	/* Timer for repeating the SYN until an answer. */ -	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, +				  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);  	return 0;  } @@ -1901,7 +1905,8 @@ void tcp_send_ack(struct sock *sk)  		if (buff == NULL) {  			inet_csk_schedule_ack(sk);  			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; -			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); +			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, +						  TCP_DELACK_MAX, TCP_RTO_MAX);  			return;  		} @@ -2033,7 +2038,8 @@ void tcp_send_probe0(struct sock *sk)  			icsk->icsk_backoff++;  		tp->probes_out++;  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,  -					  min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); +					  min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), +					  TCP_RTO_MAX);  	} else {  		/* If packet was not sent due to local congestion,  		 * do not backoff and do not remember probes_out. @@ -2045,7 +2051,8 @@ void tcp_send_probe0(struct sock *sk)  			tp->probes_out=1;  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,   					  min(icsk->icsk_rto << icsk->icsk_backoff, -					      TCP_RESOURCE_PROBE_INTERVAL)); +					      TCP_RESOURCE_PROBE_INTERVAL), +					  TCP_RTO_MAX);  	}  } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0b71380ee42f..c03930c48f42 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,55 +36,14 @@ static void tcp_write_timer(unsigned long);  static void tcp_delack_timer(unsigned long);  static void tcp_keepalive_timer (unsigned long data); -#ifdef INET_CSK_DEBUG -const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; -EXPORT_SYMBOL(inet_csk_timer_bug_msg); -#endif - -/* - * Using different timers for retransmit, delayed acks and probes - * We may wish use just one timer maintaining a list of expire jiffies  - * to optimize. - */ -void inet_csk_init_xmit_timers(struct sock *sk, -			       void (*retransmit_handler)(unsigned long), -			       void (*delack_handler)(unsigned long), -			       void (*keepalive_handler)(unsigned long)) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); - -	init_timer(&icsk->icsk_retransmit_timer); -	init_timer(&icsk->icsk_delack_timer); -	init_timer(&sk->sk_timer); - -	icsk->icsk_retransmit_timer.function = retransmit_handler; -	icsk->icsk_delack_timer.function     = delack_handler; -	sk->sk_timer.function		     = keepalive_handler; - -	icsk->icsk_retransmit_timer.data =  -		icsk->icsk_delack_timer.data = -			sk->sk_timer.data  = (unsigned long)sk; - -	icsk->icsk_pending = icsk->icsk_ack.pending = 0; -} - -void inet_csk_clear_xmit_timers(struct sock *sk) -{ -	struct inet_connection_sock *icsk = inet_csk(sk); - -	icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; - -	sk_stop_timer(sk, &icsk->icsk_retransmit_timer); -	sk_stop_timer(sk, &icsk->icsk_delack_timer); -	sk_stop_timer(sk, &sk->sk_timer); -} -  void tcp_init_xmit_timers(struct sock *sk)  {  	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,  				  &tcp_keepalive_timer);  } +EXPORT_SYMBOL(tcp_init_xmit_timers); +  static void tcp_write_err(struct sock *sk)  {  	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -392,7 +351,8 @@ static void tcp_retransmit_timer(struct sock *sk)  		if (!icsk->icsk_retransmits)  			icsk->icsk_retransmits = 1;  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, -					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); +					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), +					  TCP_RTO_MAX);  		goto out;  	} @@ -416,7 +376,7 @@ static void tcp_retransmit_timer(struct sock *sk)  out_reset_timer:  	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); -	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);  	if (icsk->icsk_retransmits > sysctl_tcp_retries1)  		__sk_dst_reset(sk); @@ -553,16 +513,6 @@ static void tcp_synack_timer(struct sock *sk)  		inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);  } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ -	sk_stop_timer(sk, &sk->sk_timer); -} - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ -	sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} -  void tcp_set_keepalive(struct sock *sk, int val)  {  	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) @@ -653,8 +603,3 @@ out:  	bh_unlock_sock(sk);  	sock_put(sk);  } - -EXPORT_SYMBOL(inet_csk_clear_xmit_timers); -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); -EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | 

