diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_SYNPROXY.c | 4 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 29 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 52 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 81 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
11 files changed, 177 insertions, 31 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2301ef872e..70f92aaca411 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1845,13 +1845,8 @@ static __net_init int inet_init_net(struct net *net) return 0; } -static __net_exit void inet_exit_net(struct net *net) -{ -} - static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, - .exit = inet_exit_net, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 0e70f3f65f6f..748dc3ce58d3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5fe5a3981d43..fc34fd1668d6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1151,7 +1151,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap_type = cfg->nh_encap_type, }; u32 tb_id = l3mdev_fib_table(cfg->dev); - int err = -EINVAL; + int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); if (err) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0b980e841927..59ded25acd04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -820,6 +820,15 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &tcp_min_snd_mss_max, }, { + .procname = "tcp_mtu_probe_floor", + .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, + { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 61082065b26a..94df48bcecc2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1182,7 +1182,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo; @@ -1274,9 +1274,10 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@ -1284,7 +1285,7 @@ new_segment: if (!skb) goto wait_for_memory; - process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@ -1789,19 +1790,21 @@ static int tcp_zerocopy_receive(struct sock *sk, break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags); - while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@ -3794,8 +3797,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 56be7d27f208..95b59540eee1 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -346,7 +346,7 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) /* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * bdp = bw * min_rtt * gain + * bdp = ceil(bw * min_rtt * gain) * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT @@ -370,7 +370,9 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; - /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; return bdp; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index a3a386236d93..babc156deabb 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb, } #endif +static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, + const struct tcp_ulp_ops *ulp_ops) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); + if (!nest) + return -EMSGSIZE; + + err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); + if (err) + goto nla_failure; + + if (ulp_ops->get_info) + err = ulp_ops->get_info(sk, skb); + if (err) + goto nla_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_failure: + nla_nest_cancel(skb, nest); + return err; +} + static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); + int err = 0; + #ifdef CONFIG_TCP_MD5SIG if (net_admin) { struct tcp_md5sig_info *md5sig; - int err = 0; rcu_read_lock(); md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); @@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, } #endif + if (net_admin) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) + err = tcp_diag_put_ulp(skb, sk, ulp_ops); + if (err) + return err; + } return 0; } static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) { + struct inet_connection_sock *icsk = inet_csk(sk); size_t size = 0; #ifdef CONFIG_TCP_MD5SIG @@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } #endif + if (net_admin) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) { + size += nla_total_size(0) + + nla_total_size(TCP_ULP_NAME_MAX); + if (ulp_ops->get_info_size) + size += ulp_ops->get_info_size(sk); + } + } return size; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c21e8a22fb3b..706cbb3b2986 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th, #endif } +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped + * value on success. + */ +static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +{ + const unsigned char *ptr = (const unsigned char *)(th + 1); + int length = (th->doff * 4) - sizeof(struct tcphdr); + u16 mss = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return mss; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + if (length < 2) + return mss; + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return mss; + if (opsize > length) + return mss; /* fail on partial options */ + if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) { + u16 in_mss = get_unaligned_be16(ptr); + + if (in_mss) { + if (user_mss && user_mss < in_mss) + in_mss = user_mss; + mss = in_mss; + } + } + ptr += opsize - 2; + length -= opsize; + } + } + return mss; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -6422,9 +6465,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(const struct sock *sk, - const struct sk_buff *skb, - const char *proto) +static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; @@ -6444,7 +6485,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); + proto, sk->sk_num, msg); return want_cookie; } @@ -6466,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk, } } +/* If a SYN cookie is required and supported, returns a clamped MSS value to be + * used for SYN cookie generation. + */ +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct tcphdr *th) +{ + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + + if (!tcp_syn_flood_action(sk, rsk_ops->slab_name)) + return 0; + + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + return 0; + } + + mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + if (!mss) + mss = af_ops->mss_clamp; + + return mss; +} +EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6487,7 +6558,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, */ if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d57641cb3477..fd394ad179a0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, th); + if (mss) { + *cookie = __cookie_v4_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -2637,6 +2652,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8a645f304e6c..42abc9bd687a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1403,7 +1403,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c801cd37cc2a..dbd9d2d0ee63 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } |