diff options
author | Eric Dumazet <edumazet@google.com> | 2015-10-02 11:43:35 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-03 04:32:43 -0700 |
commit | ca6fb06518836ef9b65dc0aac02ff97704d52a05 (patch) | |
tree | 3fbe433aac9dcf1cae49e6715ced18bd3505d811 /net/ipv4/tcp_input.c | |
parent | 1b33bc3e9e903f7293f7dfe80a875b2a5d0305aa (diff) | |
download | talos-obmc-linux-ca6fb06518836ef9b65dc0aac02ff97704d52a05.tar.gz talos-obmc-linux-ca6fb06518836ef9b65dc0aac02ff97704d52a05.zip |
tcp: attach SYNACK messages to request sockets instead of listener
If a listen backlog is very big (to avoid syncookies), then
the listener sk->sk_wmem_alloc is the main source of false
sharing, as we need to touch it twice per SYNACK re-transmit
and TX completion.
(One SYN packet takes listener lock once, but up to 6 SYNACK
are generated)
By attaching the skb to the request socket, we remove this
source of contention.
Tested:
listen(fd, 10485760); // single listener (no SO_REUSEPORT)
16 RX/TX queue NIC
Sustain a SYNFLOOD attack of ~320,000 SYN per second,
Sending ~1,400,000 SYNACK per second.
Perf profiles now show listener spinlock being next bottleneck.
20.29% [kernel] [k] queued_spin_lock_slowpath
10.06% [kernel] [k] __inet_lookup_established
5.12% [kernel] [k] reqsk_timer_handler
3.22% [kernel] [k] get_next_timer_interrupt
3.00% [kernel] [k] tcp_make_synack
2.77% [kernel] [k] ipt_do_table
2.70% [kernel] [k] run_timer_softirq
2.50% [kernel] [k] ip_finish_output
2.04% [kernel] [k] cascade
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a56912772354..27108757c310 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6120,8 +6120,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct request_sock *req; bool want_cookie = false; struct flowi fl; - int err; - /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -6230,21 +6228,24 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); - if (!want_cookie) + if (!want_cookie) { fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); - err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, - skb_get_queue_mapping(skb), &foc); + tcp_reqsk_record_syn(sk, req, skb); + } if (fastopen_sk) { + af_ops->send_synack(fastopen_sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc, false); sock_put(fastopen_sk); } else { - if (err || want_cookie) - goto drop_and_free; - tcp_rsk(req)->tfo_listener = false; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + if (!want_cookie) + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + af_ops->send_synack(sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc, !want_cookie); + if (want_cookie) + goto drop_and_free; } - tcp_reqsk_record_syn(sk, req, skb); - + reqsk_put(req); return 0; drop_and_release: |