From b1ed4c4fa9a5ccf325184fd90edc50978ef6e33a Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 27 Jun 2016 15:33:56 -0700 Subject: tcp: add an ability to dump and restore window parameters We found that sometimes a restored tcp socket doesn't work. A reason of this bug is incorrect window parameters and in this case tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The other side drops packets with this seq, because seq is less than tp->rcv_nxt ( tcp_sequence() ). Data from a send queue is sent only if there is enough space in a window, so when we restore unacked data, we need to expand a window to fit this data. This was in a first version of this patch: "tcp: extend window to fit all restored unacked data in a send queue" Then Alexey recommended me to restore window parameters instead of adjusted them according with data in a sent queue. This sounds resonable. rcv_wnd has to be restored, because it was reported to another side and the offered window is never shrunk. One of reasons why we need to restore snd_wnd was described above. Cc: Pavel Emelyanov Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c7ed147449c..108ef2a6665c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); } +static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) +{ + struct tcp_repair_window opt; + + if (!tp->repair) + return -EPERM; + + if (len != sizeof(opt)) + return -EINVAL; + + if (copy_from_user(&opt, optbuf, sizeof(opt))) + return -EFAULT; + + if (opt.max_window < opt.snd_wnd) + return -EINVAL; + + if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) + return -EINVAL; + + if (after(opt.rcv_wup, tp->rcv_nxt)) + return -EINVAL; + + tp->snd_wl1 = opt.snd_wl1; + tp->snd_wnd = opt.snd_wnd; + tp->max_window = opt.max_window; + + tp->rcv_wnd = opt.rcv_wnd; + tp->rcv_wup = opt.rcv_wup; + + return 0; +} + static int tcp_repair_options_est(struct tcp_sock *tp, struct tcp_repair_opt __user *optbuf, unsigned int len) { @@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->tsoffset = val - tcp_time_stamp; break; + case TCP_REPAIR_WINDOW: + err = tcp_repair_set_window(tp, optval, optlen); + break; case TCP_NOTSENT_LOWAT: tp->notsent_lowat = val; sk->sk_write_space(sk); @@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EINVAL; break; + case TCP_REPAIR_WINDOW: { + struct tcp_repair_window opt; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len != sizeof(opt)) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + opt.snd_wl1 = tp->snd_wl1; + opt.snd_wnd = tp->snd_wnd; + opt.max_window = tp->max_window; + opt.rcv_wnd = tp->rcv_wnd; + opt.rcv_wup = tp->rcv_wup; + + if (copy_to_user(optval, &opt, len)) + return -EFAULT; + return 0; + } case TCP_QUEUE_SEQ: if (tp->repair_queue == TCP_SEND_QUEUE) val = tp->write_seq; -- cgit v1.2.3 From 19689e38eca5d7b32755182d4e62efd7a5376c45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2016 18:51:53 +0200 Subject: tcp: md5: use kmalloc() backed scratch areas Some arches have virtually mapped kernel stacks, or will soon have. tcp_md5_hash_header() uses an automatic variable to copy tcp header before mangling th->check and calling crypto function, which might be problematic on such arches. David says that using percpu storage is also problematic on non SMP builds. Just use kmalloc() to allocate scratch areas. Signed-off-by: Eric Dumazet Reported-by: Andy Lutomirski Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- net/ipv4/tcp.c | 10 ++++++++++ net/ipv4/tcp_ipv4.c | 31 ++++++++++++++----------------- net/ipv6/tcp_ipv6.c | 29 ++++++++++++++++------------- 4 files changed, 41 insertions(+), 32 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index d825858fe4f1..c00e7d51bb18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1384,7 +1384,7 @@ union tcp_md5sum_block { /* - pool: digest algorithm, hash description and scratch buffer */ struct tcp_md5sig_pool { struct ahash_request *md5_req; - union tcp_md5sum_block md5_blk; + void *scratch; }; /* - functions */ @@ -1420,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void) local_bh_enable(); } -int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, unsigned int header_len); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 108ef2a6665c..032a96d78c99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3026,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void) return; for_each_possible_cpu(cpu) { + void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; struct ahash_request *req; + if (!scratch) { + scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + + sizeof(struct tcphdr), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!scratch) + return; + per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; + } if (per_cpu(tcp_md5sig_pool, cpu).md5_req) continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3708de2a6683..32b048e524d6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, GFP_KERNEL); } -static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - __be32 daddr, __be32 saddr, int nbytes) +static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, + __be32 daddr, __be32 saddr, + const struct tcphdr *th, int nbytes) { struct tcp4_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip4; - - /* - * 1. the TCP pseudo-header (in the order: source IP address, - * destination IP address, zero-padded protocol number, and - * segment length) - */ + bp = hp->scratch; bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; bp->protocol = IPPROTO_TCP; bp->len = cpu_to_be16(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } @@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2255d2bf5f6b..37cf91323319 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } -static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - const struct in6_addr *daddr, - const struct in6_addr *saddr, int nbytes) +static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip6; + bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } -static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { @@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; -- cgit v1.2.3