diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 63 |
3 files changed, 60 insertions, 13 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4a0335854b89..8ecd7ad959b4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -279,6 +279,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), + SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3d69ec8dac57..38c8ec90ff68 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -733,6 +733,15 @@ static struct ctl_table ipv4_table[] = { .extra2 = &gso_max_segs, }, { + .procname = "tcp_autocorking", + .data = &sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4638e6f0238..0ca87547becb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; int sysctl_tcp_min_tso_segs __read_mostly = 2; +int sysctl_tcp_autocorking __read_mostly = 1; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -619,19 +621,52 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) tp->snd_up = tp->write_seq; } -static inline void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle) +/* If a not yet filled skb is pushed, do not send it if + * we have packets in Qdisc or NIC queues : + * Because TX completion will happen shortly, it gives a chance + * to coalesce future sendmsg() payload into this skb, without + * need for a timer, and with no latency trade off. + * As packets containing data payload have a bigger truesize + * than pure acks (dataless) packets, the last check prevents + * autocorking if we only have an ACK in Qdisc/NIC queues. + */ +static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) { - if (tcp_send_head(sk)) { - struct tcp_sock *tp = tcp_sk(sk); + return skb->len < size_goal && + sysctl_tcp_autocorking && + atomic_read(&sk->sk_wmem_alloc) > skb->truesize; +} + +static void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; - if (!(flags & MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, tcp_write_queue_tail(sk)); + if (!tcp_send_head(sk)) + return; + + skb = tcp_write_queue_tail(sk); + if (!(flags & MSG_MORE) || forced_push(tp)) + tcp_mark_push(tp, skb); + + tcp_mark_urg(tp, flags); + + if (tcp_should_autocork(sk, skb, size_goal)) { - tcp_mark_urg(tp, flags); - __tcp_push_pending_frames(sk, mss_now, - (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); + /* avoid atomic op if TSQ_THROTTLED bit is already set */ + if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + } + return; } + + if (flags & MSG_MORE) + nonagle = TCP_NAGLE_CORK; + + __tcp_push_pending_frames(sk, mss_now, nonagle); } static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, @@ -934,7 +969,8 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -944,7 +980,7 @@ wait_for_memory: out: if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); return copied; do_error: @@ -1225,7 +1261,8 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1236,7 +1273,7 @@ wait_for_memory: out: if (copied) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); release_sock(sk); return copied + copied_syn; |