diff options
author | Eric Dumazet <edumazet@google.com> | 2018-10-15 09:37:52 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-10-15 22:56:41 -0700 |
commit | 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 (patch) | |
tree | 17f35c12eeef92a69902b197a73c1123eaac9e63 | |
parent | 1a3aea2534f4f3083f29b2b047aa83a9d6c777a4 (diff) | |
download | talos-op-linux-5f6188a8003d080e3753b8f14f4a5a2325ae1ff6.tar.gz talos-op-linux-5f6188a8003d080e3753b8f14f4a5a2325ae1ff6.zip |
tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh
In EDT design, I made the mistake of using tcp_wstamp_ns
to store the last tcp_clock_ns() sample and to store the
pacing virtual timer.
This causes major regressions at high speed flows.
Introduce tcp_clock_cache to store last tcp_clock_ns().
This is needed because some arches have slow high-resolution
kernel time service.
tcp_wstamp_ns is only updated when a packet is sent.
Note that we can remove tcp_mstamp in the future since
tcp_mstamp is essentially tcp_clock_cache/1000, so the
apparent socket size increase is temporary.
Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
3 files changed, 8 insertions, 4 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 848f5b25e178..8ed77bb4ed86 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -249,6 +249,7 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 059b67af28b1..f14df66a0c85 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) { u64 val = tcp_clock_ns(); - /* departure time for next data packet */ - if (val > tp->tcp_wstamp_ns) - tp->tcp_wstamp_ns = val; + if (val > tp->tcp_clock_cache) + tp->tcp_clock_cache = val; val = div_u64(val, NSEC_PER_USEC); if (val > tp->tcp_mstamp) @@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; } + + /* TODO: might take care of jitter here */ + tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; inet = inet_sk(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 61023d50cd60..676020663ce8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk) */ start_ts = tcp_skb_timestamp(skb); if (!start_ts) - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; + skb->skb_mstamp_ns = tp->tcp_clock_cache; else if (icsk->icsk_user_timeout && (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) goto abort; |