From 1946e672c173559155a3e210fe95dbf8b7b8ddf7 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 28 Dec 2016 17:52:32 +0800 Subject: ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob Different namespace application might require fast recycling TIME-WAIT sockets independently of the host. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 28ce5ee831f5..06fde26a82b7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -29,12 +29,6 @@ int sysctl_tcp_abort_on_overflow __read_mostly; -struct inet_timewait_death_row tcp_death_row = { - .sysctl_max_tw_buckets = NR_FILE * 2, - .hashinfo = &tcp_hashinfo, -}; -EXPORT_SYMBOL_GPL(tcp_death_row); - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -100,6 +94,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; + struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { @@ -153,7 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) inet_twsk_reschedule(tw, tw->tw_timeout); @@ -264,11 +259,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; bool recycle_ok = false; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tcp_remember_stamp(sk); - tw = inet_twsk_alloc(sk, &tcp_death_row, state); + tw = inet_twsk_alloc(sk, tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); -- cgit v1.2.3 From bec41a11dd3dc8c54f766b4f494140ca92ba7c10 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:39 -0800 Subject: tcp: remove early retransmit This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 19 +++-------- include/linux/tcp.h | 3 +- include/net/tcp.h | 19 ----------- net/ipv4/inet_diag.c | 1 - net/ipv4/tcp.c | 3 -- net/ipv4/tcp_input.c | 60 ++-------------------------------- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_metrics.c | 1 - net/ipv4/tcp_minisocks.c | 1 - net/ipv4/tcp_output.c | 11 +++---- net/ipv4/tcp_timer.c | 3 -- net/ipv6/tcp_ipv6.c | 1 - 12 files changed, 12 insertions(+), 111 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 7dd65c9cf707..7de2cf79e16f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. tcp_early_retrans - INTEGER - Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold - for triggering fast retransmit when the amount of outstanding data is - small and when no previously unsent data can be transmitted (such - that limited transmit could be used). Also controls the use of - Tail loss probe (TLP) that converts RTOs occurring due to tail - losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). + Tail loss probe (TLP) converts RTOs occurring due to tail + losses into fast recovery (draft-ietf-tcpm-rack). Note that + TLP requires RACK to function properly (see tcp_recovery below) Possible values: - 0 disables ER - 1 enables ER - 2 enables ER but delays fast recovery and fast retransmit - by a fourth of RTT. This mitigates connection falsely - recovers when network has a small degree of reordering - (less than 3 packets). - 3 enables delayed ER and TLP. - 4 enables TLP only. + 0 disables TLP + 3 or 4 enables TLP Default: 3 tcp_ecn - INTEGER diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8e5f4c15d0e5..4733368f953a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,8 +224,7 @@ struct tcp_sock { repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; - u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - syn_data:1, /* SYN includes data */ + u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 423438dd6fe9..c55d65f74f7f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); /* tcp_input.c */ -void tcp_resume_early_retransmit(struct sock *sk); void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); @@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; } -/* TCP early-retransmit (ER) is similar to but more conservative than - * the thin-dupack feature. Enable ER only if thin-dupack is disabled. - */ -static inline void tcp_enable_early_retrans(struct tcp_sock *tp) -{ - struct net *net = sock_net((struct sock *)tp); - - tp->do_early_retrans = sysctl_tcp_early_retrans && - sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - !(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) && - net->ipv4.sysctl_tcp_reordering == 3; -} - -static inline void tcp_disable_early_retrans(struct tcp_sock *tp) -{ - tp->do_early_retrans = 0; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index d216e40623d3..3828b3a805cd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c8d46c140b4a..d9023e8ed53e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; - tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; else { tp->thin_dupack = val; - if (tp->thin_dupack) - tcp_disable_early_retrans(tp); } break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a041a92348ee..79c819077a59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tcp_disable_fack(tp); } - if (metric > 0) - tcp_disable_early_retrans(tp); tp->rack.reord = 1; } @@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static bool tcp_pause_early_retransmit(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned long delay; - - /* Delay early retransmit and entering fast recovery for - * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples - * available, or RTO is scheduled to fire first. - */ - if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt_us) - return false; - - delay = max(usecs_to_jiffies(tp->srtt_us >> 5), - msecs_to_jiffies(2)); - - if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) - return false; - - inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, - TCP_RTO_MAX); - return true; -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) tcp_is_sack(tp) && !tcp_send_head(sk)) return true; - /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious - * retransmissions due to small network reorderings, we implement - * Mitigation A.3 in the RFC and delay the retransmission for a short - * interval if appropriate. - */ - if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && - !tcp_may_send_now(sk)) - return !tcp_pause_early_retransmit(sk, flag); - return false; } @@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = @@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk) } } -/* This function is called when the delayed ER timer fires. TCP enters - * fast recovery and performs fast-retransmit. - */ -void tcp_resume_early_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_rearm_rto(sk); - - /* Stop if ER is disabled after the delayed ER timer is scheduled */ - if (!tp->do_early_retrans) - return; - - tcp_enter_recovery(sk, false); - tcp_update_scoreboard(sk, 1); - tcp_xmit_retransmit_queue(sk); -} - /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) skb_mstamp_get(&sack_state.ack_time); - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ebf3e0c4967a..63214136cf1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) int state; if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ba8f02d0f283..b9ed0d50aead 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk) val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) { tcp_disable_fack(tp); - tcp_disable_early_retrans(tp); tp->reordering = val; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 06fde26a82b7..bdb443471c39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->sacked_out = 0; newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; newtp->lsndtime = treq->snt_synack.stamp_jiffies; newsk->sk_txhash = treq->txhash; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6327e4d368a4..9a1a1494b9dd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); - } NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); @@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) - return false; /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { tcp_rearm_rto(sk); @@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || - !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + !tp->packets_out || !tcp_is_sack(tp) || + icsk->icsk_ca_state != TCP_CA_Open) return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 953c02a8566e..40d893556e67 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk) case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; - case ICSK_TIME_EARLY_RETRANS: - tcp_resume_early_retransmit(sk); - break; case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f52c3742b404..fc14e04028bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1745,7 +1745,6 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; -- cgit v1.2.3 From 3541f9e8bdebce02458882b66b638d7302c1f616 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2017 08:04:56 -0800 Subject: tcp: add tcp_mss_clamp() helper Small cleanup factorizing code doing the TCP_MAXSEG clamping. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 +++++++++ net/ipv4/tcp_ipv4.c | 5 +---- net/ipv4/tcp_minisocks.c | 7 ++----- net/ipv4/tcp_output.c | 14 ++++---------- net/ipv6/tcp_ipv6.c | 5 +---- 5 files changed, 17 insertions(+), 23 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f88f4649ba6f..cfc2d9506ce8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -445,4 +445,13 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); +static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) +{ + /* We use READ_ONCE() here because socket might not be locked. + * This happens for listeners. + */ + u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); + + return (user_mss && user_mss < mss) ? user_mss : mss; +} #endif /* _LINUX_TCP_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8c9e9aa17d66..8c124d4ef4b7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1324,10 +1324,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric_advmss(dst); - if (tcp_sk(sk)->rx_opt.user_mss && - tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) - newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bdb443471c39..dff7d2aaf861 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -360,15 +360,12 @@ void tcp_openreq_init_rwin(struct request_sock *req, { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk_listener); - u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); int full_space = tcp_full_space(sk_listener); - int mss = dst_metric_advmss(dst); u32 window_clamp; __u8 rcv_wscale; + int mss; - if (user_mss && user_mss < mss) - mss = user_mss; - + mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); window_clamp = READ_ONCE(tp->window_clamp); /* Set this up on the first call only */ req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6d5bab8a3ea6..956bea9a5394 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3062,7 +3062,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct sk_buff *skb; int tcp_header_size; struct tcphdr *th; - u16 user_mss; int mss; skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); @@ -3092,10 +3091,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, } skb_dst_set(skb, dst); - mss = dst_metric_advmss(dst); - user_mss = READ_ONCE(tp->rx_opt.user_mss); - if (user_mss && user_mss < mss) - mss = user_mss; + mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES @@ -3201,9 +3197,7 @@ static void tcp_connect_init(struct sock *sk) if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); - tp->advmss = dst_metric_advmss(dst); - if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) - tp->advmss = tp->rx_opt.user_mss; + tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); tcp_initialize_rcv_mss(sk); @@ -3280,8 +3274,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) * user-MSS. Reserve maximum option space for middleboxes that add * private TCP options. The cost is reduced data space in SYN :( */ - if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) - tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; + tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); + space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 64834ec5ab73..6b9fc63fd4d2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1147,10 +1147,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric_advmss(dst); - if (tcp_sk(sk)->rx_opt.user_mss && - tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) - newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); -- cgit v1.2.3 From eee2faabc63d863a129000b698a2bca54dff643d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 22 Feb 2017 13:23:56 +0300 Subject: tcp: account for ts offset only if tsecr not zero We can get SYN with zero tsecr, don't apply offset in this case. Fixes: ee684b6f2830 ("tcp: send packets with a socket timestamp") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dff7d2aaf861..7e16243cdb58 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -101,7 +101,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { - tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; + if (tmp_opt.rcv_tsecr) + tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); -- cgit v1.2.3