diff options
author | Yuchung Cheng <ycheng@google.com> | 2016-02-02 10:33:05 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-02-07 14:09:50 -0500 |
commit | 31ba0c10723e9eba378f96de1d1a9426129949e1 (patch) | |
tree | 98b0ed4aeb112803ccbb80ef18754acf58faa87d /net/ipv4/tcp_input.c | |
parent | e662ca40de846e0a2be6326a7c4668326ddb194c (diff) | |
download | talos-obmc-linux-31ba0c10723e9eba378f96de1d1a9426129949e1.tar.gz talos-obmc-linux-31ba0c10723e9eba378f96de1d1a9426129949e1.zip |
tcp: move cwnd reduction after recovery state procesing
Currently the cwnd is reduced and increased in various different
places. The reduction happens in various places in the recovery
state processing (tcp_fastretrans_alert) while the increase
happens afterward.
A better sequence is to identify lost packets and update
the congestion control state (icsk_ca_state) first. Then base
on the new state, up/down the cwnd in one central place. It's
more clear to reason cwnd changes.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 60 |
1 files changed, 28 insertions, 32 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 84a4ab9c05d1..dc810df53e90 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2471,14 +2471,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tcp_ecn_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, - int fast_rexmit, int flag) +static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, + int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - int newly_acked_sacked = prior_unsacked - - (tp->packets_out - tp->sacked_out); if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) return; @@ -2496,7 +2494,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, } else { sndcnt = min(delta, newly_acked_sacked); } - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + /* Force a fast retransmit upon entering fast recovery */ + sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; } @@ -2541,7 +2540,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) +static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2555,8 +2554,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - } else { - tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); } } @@ -2720,8 +2717,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, const int acked, - const int prior_unsacked, int flag) +static bool tcp_try_undo_partial(struct sock *sk, const int acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2736,10 +2732,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked, * can undo. Otherwise we clock out new packets but do not * mark more packets lost or retransmit more. */ - if (tp->retrans_out) { - tcp_cwnd_reduction(sk, prior_unsacked, 0, flag); + if (tp->retrans_out) return true; - } if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; @@ -2758,21 +2752,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked, * taking into account both packets sitting in receiver's buffer and * packets lost by network. * - * Besides that it does CWND reduction, when packet loss is detected - * and changes state of machine. + * Besides that it updates the congestion state when packet loss or ECN + * is detected. But it does not reduce the cwnd, it is done by the + * congestion control later. * * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const int acked, - const int prior_unsacked, - bool is_dupack, int flag, int *rexmit) + bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + int fast_rexmit = 0, flag = *ack_flag; bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2819,8 +2813,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, /* Use RACK to detect loss */ if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && - tcp_rack_mark_lost(sk)) + tcp_rack_mark_lost(sk)) { flag |= FLAG_LOST_RETRANS; + *ack_flag |= FLAG_LOST_RETRANS; + } /* E. Process state. */ switch (icsk->icsk_ca_state) { @@ -2829,7 +2825,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) + if (tcp_try_undo_partial(sk, acked)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || @@ -2858,7 +2854,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, prior_unsacked); + tcp_try_to_open(sk, flag); return; } @@ -2880,7 +2876,6 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); *rexmit = REXMIT_LOST; } @@ -3306,9 +3301,6 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) /* Decide wheather to run the increase function of congestion control. */ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { - if (tcp_in_cwnd_reduction(sk)) - return false; - /* If reordering is high then always grow cwnd whenever data is * delivered regardless of its ordering. Otherwise stay conservative * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/ @@ -3551,6 +3543,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ + int acked_sacked; /* Number of packets newly acked or sacked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ sack_state.first_sackt.v64 = 0; @@ -3647,15 +3640,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); + acked_sacked = prior_unsacked - (tp->packets_out - tp->sacked_out); /* Advance cwnd if state allows */ - if (tcp_may_raise_cwnd(sk, flag)) + if (tcp_in_cwnd_reduction(sk)) { + /* Reduce cwnd if state mandates */ + tcp_cwnd_reduction(sk, acked_sacked, flag); + } else if (tcp_may_raise_cwnd(sk, flag)) { + /* Advance cwnd if state allows */ tcp_cong_avoid(sk, ack, acked); + } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); @@ -3672,8 +3670,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3696,8 +3693,7 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, prior_unsacked, - is_dupack, flag, &rexmit); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); tcp_xmit_recovery(sk, rexmit); } |