diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/esp4.c | 200 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 14 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 6 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 6 | ||||
-rw-r--r-- | net/ipv4/route.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 7 | ||||
-rw-r--r-- | net/ipv4/udp.c | 24 |
12 files changed, 200 insertions, 103 deletions
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 421a80b09b62..477937465a20 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -49,7 +49,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) len = ALIGN(len, crypto_tfm_ctx_alignment()); } - len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len += sizeof(struct aead_request) + crypto_aead_reqsize(aead); len = ALIGN(len, __alignof__(struct scatterlist)); len += sizeof(struct scatterlist) * nfrags; @@ -68,17 +68,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } -static inline struct aead_givcrypt_request *esp_tmp_givreq( - struct crypto_aead *aead, u8 *iv) -{ - struct aead_givcrypt_request *req; - - req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), - crypto_tfm_ctx_alignment()); - aead_givcrypt_set_tfm(req, aead); - return req; -} - static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) { struct aead_request *req; @@ -97,14 +86,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static inline struct scatterlist *esp_givreq_sg( - struct crypto_aead *aead, struct aead_givcrypt_request *req) -{ - return (void *)ALIGN((unsigned long)(req + 1) + - crypto_aead_reqsize(aead), - __alignof__(struct scatterlist)); -} - static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -113,14 +94,37 @@ static void esp_output_done(struct crypto_async_request *base, int err) xfrm_output_resume(skb, err); } +/* Move ESP header back into place. */ +static void esp_restore_header(struct sk_buff *skb, unsigned int offset) +{ + struct ip_esp_hdr *esph = (void *)(skb->data + offset); + void *tmp = ESP_SKB_CB(skb)->tmp; + __be32 *seqhi = esp_tmp_seqhi(tmp); + + esph->seq_no = esph->spi; + esph->spi = *seqhi; +} + +static void esp_output_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); +} + +static void esp_output_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_output_restore_header(skb); + esp_output_done(base, err); +} + static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct aead_givcrypt_request *req; + struct aead_request *req; struct scatterlist *sg; - struct scatterlist *asg; struct sk_buff *trailer; void *tmp; u8 *iv; @@ -129,17 +133,19 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int clen; int alen; int plen; + int ivlen; int tfclen; int nfrags; int assoclen; - int sglists; int seqhilen; __be32 *seqhi; + __be64 seqno; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); + ivlen = crypto_aead_ivsize(aead); tfclen = 0; if (x->tfcpad) { @@ -160,16 +166,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) { err = -ENOMEM; goto error; @@ -177,9 +181,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); - req = esp_tmp_givreq(aead, iv); - asg = esp_givreq_sg(aead, req); - sg = asg + sglists; + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -235,36 +238,53 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_UDP; } - esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + aead_request_set_callback(req, 0, esp_output_done, skb); + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); + *seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + } + + esph->spi = x->id.spi; + sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, - esph->enc_data + crypto_aead_ivsize(aead) - skb->data, - clen + alen); + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); - if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); - - aead_givcrypt_set_callback(req, 0, esp_output_done, skb); - aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, assoclen); - aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); + aead_request_set_ad(req, assoclen); + + seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + + memset(iv, 0, ivlen); + memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), + min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; - err = crypto_aead_givencrypt(req); - if (err == -EINPROGRESS) + err = crypto_aead_encrypt(req); + + switch (err) { + case -EINPROGRESS: goto error; - if (err == -EBUSY) + case -EBUSY: err = NET_XMIT_DROP; + break; + + case 0: + if ((x->props.flags & XFRM_STATE_ESN)) + esp_output_restore_header(skb); + } kfree(tmp); @@ -363,6 +383,20 @@ static void esp_input_done(struct crypto_async_request *base, int err) xfrm_input_resume(skb, esp_input_done2(skb, err)); } +static void esp_input_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, 0); + __skb_pull(skb, 4); +} + +static void esp_input_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_input_restore_header(skb); + esp_input_done(base, err); +} + /* * Note: detecting truncated vs. non-truncated authentication data is very * expensive, so we only support truncated data, which is the recommended @@ -374,19 +408,18 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; - int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); + int ivlen = crypto_aead_ivsize(aead); + int elen = skb->len - sizeof(*esph) - ivlen; int nfrags; int assoclen; - int sglists; int seqhilen; __be32 *seqhi; void *tmp; u8 *iv; struct scatterlist *sg; - struct scatterlist *asg; int err = -EINVAL; - if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) goto out; if (elen <= 0) @@ -399,17 +432,15 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } err = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; @@ -417,36 +448,39 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); - asg = esp_req_sg(aead, req); - sg = asg + sglists; + sg = esp_req_sg(aead, req); skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr *)skb->data; - /* Get ivec. This can be wrong, check against another impls. */ - iv = esph->enc_data; - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + aead_request_set_callback(req, 0, esp_input_done, skb); + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + aead_request_set_callback(req, 0, esp_input_done_esn, skb); + } - aead_request_set_callback(req, 0, esp_input_done, skb); - aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, assoclen); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); + + aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); + aead_request_set_ad(req, assoclen); err = crypto_aead_decrypt(req); if (err == -EINPROGRESS) goto out; + if ((x->props.flags & XFRM_STATE_ESN)) + esp_input_restore_header(skb); + err = esp_input_done2(skb, err); out: @@ -518,10 +552,16 @@ static void esp_destroy(struct xfrm_state *x) static int esp_init_aead(struct xfrm_state *x) { + char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = -ENAMETOOLONG; + if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -560,15 +600,19 @@ static int esp_init_authenc(struct xfrm_state *x) if ((x->props.flags & XFRM_STATE_ESN)) { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authencesn(%s,%s)", + "%s%sauthencesn(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authenc(%s,%s)", + "%s%sauthenc(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..09b62e17dd8c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1164,6 +1164,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; + new_fa->tb_id = tb->tb_id; err = netdev_switch_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1764,7 +1765,7 @@ void fib_table_flush_external(struct fib_table *tb) /* record local slen */ slen = fa->fa_slen; - if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) + if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) continue; netdev_switch_fib_ipv4_del(n->key, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9f7269f3c54a..0c152087ca15 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, goto drop; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - skb->mark = be32_to_cpu(tunnel->parms.i_key); return xfrm_input(skb, nexthdr, spi, encap_type); } @@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; + u32 orig_mark = skb->mark; + int ret; if (!tunnel) return 1; @@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(tunnel->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); @@ -216,8 +221,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(tunnel->parms.o_key); - switch (skb->protocol) { case htons(ETH_P_IP): xfrm_decode_session(skb, &fl, AF_INET); @@ -233,6 +236,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); + return vti_xmit(skb, dev, &fl); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 13bfe84bf3ca..a61200754f4b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c69db7fa25ee..2d0e265fef6e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bff62fc87b8e..f45f2a12f37b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -902,6 +902,10 @@ static int ip_error(struct sk_buff *skb) bool send; int code; + /* IP on this device is disabled. */ + if (!in_dev) + goto out; + net = dev_net(rt->dst.dev); if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..f1377f2a0472 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -402,6 +402,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; + u64_stats_init(&tp->syncp); tp->reordering = sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2598,6 +2599,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); @@ -2665,10 +2667,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; - spin_lock_bh(&sk->sk_lock.slock); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; - spin_unlock_bh(&sk->sk_lock.slock); + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7a5ae50c80c8..84be008c945c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; + icsk->icsk_ca_setsockopt = 1; if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); @@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ - if (ca == icsk->icsk_ca_ops) + if (ca == icsk->icsk_ca_ops) { + icsk->icsk_ca_setsockopt = 1; goto out; + } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3c673d5e6cff..46b087a27503 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,10 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, + * as we certainly are not changing upper 32bit value (0) + */ tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc790ea9960f..c9ab964189a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2698,16 +2698,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); + if ((flag & FLAG_SND_UNA_ADVANCED) && + tcp_try_undo_loss(sk, false)) + return; + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ /* Step 3.b. A timeout is spurious if not all data are * lost, i.e., never-retransmitted data are (s)acked. */ - if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) + if ((flag & FLAG_ORIG_SACK_ACKED) && + tcp_try_undo_loss(sk, true)) return; - if (after(tp->snd_nxt, tp->high_seq) && - (flag & FLAG_DATA_SACKED || is_dupack)) { - tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + if (after(tp->snd_nxt, tp->high_seq)) { + if (flag & FLAG_DATA_SACKED || is_dupack) + tp->frto = 0; /* Step 3.a. loss was real */ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { tp->high_seq = tp->snd_nxt; __tcp_push_pending_frames(sk, tcp_current_mss(sk), @@ -2732,8 +2737,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) else if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); } - if (tcp_try_undo_loss(sk, false)) - return; tcp_xmit_retransmit_queue(sk); } @@ -3283,7 +3286,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; + u64_stats_update_begin(&tp->syncp); tp->bytes_acked += delta; + u64_stats_update_end(&tp->syncp); tp->snd_una = ack; } @@ -3292,7 +3297,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; + u64_stats_update_begin(&tp->syncp); tp->bytes_received += delta; + u64_stats_update_end(&tp->syncp); tp->rcv_nxt = seq; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e5d7649136fc..17e7339ee5ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; - tw->tw_flowlabel = np->flow_label >> 12; + tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); tw->tw_ipv6only = sk->sk_ipv6only; } #endif @@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) rcu_read_unlock(); } - if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + /* If no valid choice made yet, assign current system default ca. */ + if (!ca_got_dst && + (!icsk->icsk_ca_setsockopt || + !try_module_get(icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d10b7e0112eb..83aa604f9273 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -90,6 +90,7 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/igmp.h> +#include <linux/inetdevice.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/timer.h> @@ -1345,10 +1346,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -1962,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; + int ours; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) @@ -1971,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb) uh = udp_hdr(skb); if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) + skb->pkt_type == PACKET_MULTICAST) { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + + if (!in_dev) + return; + + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else if (skb->pkt_type == PACKET_HOST) + } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else + } else { return; + } if (!sk) return; |