From b604dd9883f783a94020d772e4fe03160f455372 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:08 +0100 Subject: rxrpc: Fix RTT gathering Fix RTT information gathering in AF_RXRPC by the following means: (1) Enable Rx timestamping on the transport socket with SO_TIMESTAMPNS. (2) If the sk_buff doesn't have a timestamp set when rxrpc_data_ready() collects it, set it at that point. (3) Allow ACKs to be requested on the last packet of a client call, but not a service call. We need to be careful lest we undo: bf7d620abf22c321208a4da4f435e7af52551a21 Author: David Howells Date: Thu Oct 6 08:11:51 2016 +0100 rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase but that only really applies to service calls that we're handling, since the client side gets to send the final ACK (or not). (4) When about to transmit an ACK or DATA packet, record the Tx timestamp before only; don't update the timestamp afterwards. (5) Switch the ordering between recording the serial and recording the timestamp to always set the serial number first. The serial number shouldn't be seen referenced by an ACK packet until we've transmitted the packet bearing it - so in the Rx path, we don't need the timestamp until we've checked the serial number. Fixes: cf1a6474f807 ("rxrpc: Add per-peer RTT tracker") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/rxrpc/local_object.c') diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 777c3ed4cfc0..81de7d889ffa 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -173,6 +173,15 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) _debug("setsockopt failed"); goto error; } + + /* We want receive timestamps. */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } break; default: -- cgit v1.2.3 From 37a675e768d7606fe8a53e0c459c9b53e121ac20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:09 +0100 Subject: rxrpc: Fix transport sockopts to get IPv4 errors on an IPv6 socket It seems that enabling IPV6_RECVERR on an IPv6 socket doesn't also turn on IP_RECVERR, so neither local errors nor ICMP-transported remote errors from IPv4 peer addresses are returned to the AF_RXRPC protocol. Make the sockopt setting code in rxrpc_open_socket() fall through from the AF_INET6 case to the AF_INET case to turn on all the AF_INET options too in the AF_INET6 case. Fixes: f2aeed3a591f ("rxrpc: Fix error reception on AF_INET6 sockets") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net/rxrpc/local_object.c') diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 81de7d889ffa..94d234e9c685 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -135,10 +135,10 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } switch (local->srx.transport.family) { - case AF_INET: - /* we want to receive ICMP errors */ + case AF_INET6: + /* we want to receive ICMPv6 errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -146,19 +146,22 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, + opt = IPV6_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } - break; - case AF_INET6: + /* Fall through and set IPv4 options too otherwise we don't get + * errors from IPv4 packets sent through the IPv6 socket. + */ + + case AF_INET: /* we want to receive ICMP errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -166,8 +169,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, + opt = IP_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); -- cgit v1.2.3 From 2cfa2271604bb26e75b828d38f357ed084464795 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 5 Oct 2018 14:05:35 +0100 Subject: rxrpc: Fix the data_ready handler Fix the rxrpc_data_ready() function to pick up all packets and to not miss any. There are two problems: (1) The sk_data_ready pointer on the UDP socket is set *after* it is bound. This means that it's open for business before we're ready to dequeue packets and there's a tiny window exists in which a packet can sneak onto the receive queue, but we never know about it. Fix this by setting the pointers on the socket prior to binding it. (2) skb_recv_udp() will return an error (such as ENETUNREACH) if there was an error on the transmission side, even though we set the sk_error_report hook. Because rxrpc_data_ready() returns immediately in such a case, it never actually removes its packet from the receive queue. Fix this by abstracting out the UDP dequeuing and checksumming into a separate function that keeps hammering on skb_recv_udp() until it returns -EAGAIN, passing the packets extracted to the remainder of the function. and two potential problems: (3) It might be possible in some circumstances or in the future for packets to be being added to the UDP receive queue whilst rxrpc is running consuming them, so the data_ready() handler might get called less often than once per packet. Allow for this by fully draining the queue on each call as (2). (4) If a packet fails the checksum check, the code currently returns after discarding the packet without checking for more. Allow for this by fully draining the queue on each call as (2). Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Acked-by: Paolo Abeni --- net/rxrpc/input.c | 68 +++++++++++++++++++++++++++--------------------- net/rxrpc/local_object.c | 11 ++++---- 2 files changed, 44 insertions(+), 35 deletions(-) (limited to 'net/rxrpc/local_object.c') diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c5af9955665b..c3114fa66c92 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1121,7 +1121,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *udp_sk) +void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@ -1130,39 +1130,11 @@ void rxrpc_data_ready(struct sock *udp_sk) struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; - struct sk_buff *skb; unsigned int channel; - int ret, skew = 0; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } - - if (skb->tstamp == 0) - skb->tstamp = ktime_get_real(); - - rxrpc_new_skb(skb, rxrpc_skb_rx_received); - - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0); - /* The UDP protocol already released all skb resources; * we are free to add our own data there. */ @@ -1181,6 +1153,8 @@ void rxrpc_data_ready(struct sock *udp_sk) } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { @@ -1398,3 +1372,37 @@ reject_packet: rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); } + +void rxrpc_data_ready(struct sock *udp_sk) +{ + struct sk_buff *skb; + int ret; + + for (;;) { + skb = skb_recv_udp(udp_sk, 0, 1, &ret); + if (!skb) { + if (ret == -EAGAIN) + return; + + /* If there was a transmission failure, we get an error + * here that we need to ignore. + */ + _debug("UDP socket error %d", ret); + continue; + } + + rxrpc_new_skb(skb, rxrpc_skb_rx_received); + + /* we'll probably need to checksum it (didn't call sock_recvmsg) */ + if (skb_checksum_complete(skb)) { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0); + _debug("csum failed"); + continue; + } + + __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0); + + rxrpc_input_packet(udp_sk, skb); + } +} diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 94d234e9c685..30862f44c9f1 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -122,6 +122,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) return ret; } + /* set the socket up */ + sock = local->socket->sk; + sock->sk_user_data = local; + sock->sk_data_ready = rxrpc_data_ready; + sock->sk_error_report = rxrpc_error_report; + /* if a local address was supplied then bind it */ if (local->srx.transport_len > sizeof(sa_family_t)) { _debug("bind"); @@ -191,11 +197,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) BUG(); } - /* set the socket up */ - sock = local->socket->sk; - sock->sk_user_data = local; - sock->sk_data_ready = rxrpc_data_ready; - sock->sk_error_report = rxrpc_error_report; _leave(" = 0"); return 0; -- cgit v1.2.3 From 5271953cad31b97dea80f848c16e96ad66401199 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 11:10:51 +0100 Subject: rxrpc: Use the UDP encap_rcv hook Use the UDP encap_rcv hook to cut the bit out of the rxrpc packet reception in which a packet is placed onto the UDP receive queue and then immediately removed again by rxrpc. Going via the queue in this manner seems like it should be unnecessary. This does, however, require the invention of a value to place in encap_type as that's one of the conditions to switch packets out to the encap_rcv hook. Possibly the value doesn't actually matter for anything other than sockopts on the UDP socket, which aren't accessible outside of rxrpc anyway. This seems to cut a bit of time out of the time elapsed between each sk_buff being timestamped and turning up in rxrpc (the final number in the following trace excerpts). I measured this by making the rxrpc_rx_packet trace point print the time elapsed between the skb being timestamped and the current time (in ns), e.g.: ... 424.278721: rxrpc_rx_packet: ... ACK 25026 So doing a 512MiB DIO read from my test server, with an unmodified kernel: N min max sum mean stddev 27605 2626 7581 7.83992e+07 2840.04 181.029 and with the patch applied: N min max sum mean stddev 27547 1895 12165 6.77461e+07 2459.29 255.02 Signed-off-by: David Howells --- include/uapi/linux/udp.h | 1 + net/rxrpc/ar-internal.h | 2 +- net/rxrpc/input.c | 50 ++++++++++++------------------------------------ net/rxrpc/local_object.c | 27 +++++++++++++++++++++----- 4 files changed, 36 insertions(+), 44 deletions(-) (limited to 'net/rxrpc/local_object.c') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09d00f8c442b..09502de447f5 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -40,5 +40,6 @@ struct udphdr { #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ +#define UDP_ENCAP_RXRPC 6 #endif /* _UAPI_LINUX_UDP_H */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 63c43b3a2096..ab60c0313fd4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -966,7 +966,7 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -void rxrpc_data_ready(struct sock *); +int rxrpc_input_packet(struct sock *, struct sk_buff *); /* * insecure.c diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c3114fa66c92..1866aeef2284 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1121,7 +1121,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@ -1135,6 +1135,13 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) _enter("%p", udp_sk); + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + + rxrpc_new_skb(skb, rxrpc_skb_rx_received); + + skb_pull(skb, sizeof(struct udphdr)); + /* The UDP protocol already released all skb resources; * we are free to add our own data there. */ @@ -1148,8 +1155,8 @@ void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); - return; + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + return 0; } } @@ -1332,7 +1339,7 @@ discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; + return 0; out_unlock: rcu_read_unlock(); @@ -1371,38 +1378,5 @@ reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); -} - -void rxrpc_data_ready(struct sock *udp_sk) -{ - struct sk_buff *skb; - int ret; - - for (;;) { - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - - /* If there was a transmission failure, we get an error - * here that we need to ignore. - */ - _debug("UDP socket error %d", ret); - continue; - } - - rxrpc_new_skb(skb, rxrpc_skb_rx_received); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0); - _debug("csum failed"); - continue; - } - - __UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0); - - rxrpc_input_packet(udp_sk, skb); - } + return 0; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 30862f44c9f1..cad0691c2bb4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ar-internal.h" @@ -108,7 +109,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { - struct sock *sock; + struct sock *usk; int ret, opt; _enter("%p{%d,%d}", @@ -123,10 +124,26 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* set the socket up */ - sock = local->socket->sk; - sock->sk_user_data = local; - sock->sk_data_ready = rxrpc_data_ready; - sock->sk_error_report = rxrpc_error_report; + usk = local->socket->sk; + inet_sk(usk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(usk); + + rcu_assign_sk_user_data(usk, local); + + udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; + udp_sk(usk)->encap_rcv = rxrpc_input_packet; + udp_sk(usk)->encap_destroy = NULL; + udp_sk(usk)->gro_receive = NULL; + udp_sk(usk)->gro_complete = NULL; + + udp_encap_enable(); +#if IS_ENABLED(CONFIG_IPV6) + if (local->srx.transport.family == AF_INET6) + udpv6_encap_enable(); +#endif + usk->sk_error_report = rxrpc_error_report; /* if a local address was supplied then bind it */ if (local->srx.transport_len > sizeof(sa_family_t)) { -- cgit v1.2.3