diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 11 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 21 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 20 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_defrag_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 25 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 200 | ||||
-rw-r--r-- | net/ipv4/udp.c | 32 |
16 files changed, 106 insertions, 254 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c22920525e5d..775824720b6b 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -353,6 +353,7 @@ config INET_ESP select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES + select CRYPTO_ECHAINIV ---help--- Support for IPsec ESP. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c29809f765dc..62c049b647e9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -56,7 +56,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 744e5936c10d..d07fc076bea0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head) if (!n->tn_bits) kmem_cache_free(trie_leaf_kmem, n); - else if (n->tn_bits <= TNODE_KMALLOC_MAX) - kfree(n); else - vfree(n); + kvfree(n); } #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) @@ -1396,9 +1394,10 @@ found: struct fib_info *fi = fa->fa_info; int nhsel, err; - if ((index >= (1ul << fa->fa_slen)) && - ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH))) - continue; + if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { + if (index >= (1ul << fa->fa_slen)) + continue; + } if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; if (fi->fib_dead) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bb8e7ad8548..6029157a19ed 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -361,13 +361,20 @@ struct sock *inet_diag_find_one_icsk(struct net *net, req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); #if IS_ENABLED(CONFIG_IPV6) - else if (req->sdiag_family == AF_INET6) - sk = inet6_lookup(net, hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); + else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, hashinfo, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); + } #endif else return ERR_PTR(-EINVAL); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3f00810b7288..187c6fcc3027 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7c51c4e1661f..56fdf4e0dce4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1240,6 +1240,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b1209b63381f..d77eb0c3b684 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -316,7 +316,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) { + if (sysctl_ip_early_demux && + !skb_dst(skb) && + !skb->sk && + !ip_is_fragment(iph)) { const struct net_protocol *ipprot; int protocol = iph->protocol; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c7bd72e9b544..89e8861e05fc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -943,17 +943,31 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67f7c9de0b16..2ed9dd2b5f2f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata; /* Persistent data: */ +#ifdef IPCONFIG_DYNAMIC static int ic_proto_used; /* Protocol used, if any */ +#else +#define ic_proto_used 0 +#endif static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ static u8 ic_domain[64]; /* DNS (not NIS) domain name */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 6fb869f646bf..a04dee536b8e 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, { int err; - skb_orphan(skb); - local_bh_disable(); err = ip_defrag(net, skb, user); local_bh_enable(); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 46ce410703b1..4d367b4139a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -24,7 +24,6 @@ #include <net/cipso_ipv4.h> #include <net/inet_frag.h> #include <net/ping.h> -#include <net/tcp_memcontrol.h> static int zero; static int one = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fd17eec93525..0c36ef4a3f86 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -939,7 +940,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1212,7 +1213,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } @@ -2638,6 +2639,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); @@ -2703,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0003d409fec5..1c2a73406261 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2164,8 +2164,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt, oldcnt; - int err; + int cnt, oldcnt, lost; unsigned int mss; /* Use SACK to deduce losses of new sequences sent during recovery */ const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; @@ -2205,9 +2204,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; mss = tcp_skb_mss(skb); - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, - mss, GFP_ATOMIC); - if (err < 0) + /* If needed, chop off the prefix to mark as lost. */ + lost = (packets - oldcnt) * mss; + if (lost < skb->len && + tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) break; cnt = packets; } @@ -2366,8 +2366,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) tp->snd_ssthresh = tp->prior_ssthresh; tcp_ecn_withdraw_cwr(tp); } - } else { - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; tp->undo_marker = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7d1fb50f381..7f6ff037adaf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -73,7 +73,6 @@ #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/secure_seq.h> -#include <net/tcp_memcontrol.h> #include <net/busy_poll.h> #include <linux/inet.h> @@ -312,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ -void tcp_req_err(struct sock *sk, u32 seq) +void tcp_req_err(struct sock *sk, u32 seq, bool abort) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); @@ -324,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { + } else if (abort) { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly @@ -384,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } seq = ntohl(th->seq); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, + type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -708,7 +712,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -723,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -785,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -804,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c deleted file mode 100644 index 18bc7f745e9c..000000000000 --- a/net/ipv4/tcp_memcontrol.c +++ /dev/null @@ -1,200 +0,0 @@ -#include <net/tcp.h> -#include <net/tcp_memcontrol.h> -#include <net/sock.h> -#include <net/ip.h> -#include <linux/nsproxy.h> -#include <linux/memcontrol.h> -#include <linux/module.h> - -int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) -{ - struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct page_counter *counter_parent = NULL; - /* - * The root cgroup does not use page_counters, but rather, - * rely on the data already collected by the network - * subsystem - */ - if (memcg == root_mem_cgroup) - return 0; - - memcg->tcp_mem.memory_pressure = 0; - - if (parent) - counter_parent = &parent->tcp_mem.memory_allocated; - - page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent); - - return 0; -} - -void tcp_destroy_cgroup(struct mem_cgroup *memcg) -{ - if (memcg == root_mem_cgroup) - return; - - if (memcg->tcp_mem.active) - static_branch_dec(&memcg_sockets_enabled_key); -} - -static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) -{ - int ret; - - if (memcg == root_mem_cgroup) - return -EINVAL; - - ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages); - if (ret) - return ret; - - if (!memcg->tcp_mem.active) { - /* - * The active flag needs to be written after the static_key - * update. This is what guarantees that the socket activation - * function is the last one to run. See sock_update_memcg() for - * details, and note that we don't mark any socket as belonging - * to this memcg until that flag is up. - * - * We need to do this, because static_keys will span multiple - * sites, but we can't control their order. If we mark a socket - * as accounted, but the accounting functions are not patched in - * yet, we'll lose accounting. - * - * We never race with the readers in sock_update_memcg(), - * because when this value change, the code to process it is not - * patched in yet. - */ - static_branch_inc(&memcg_sockets_enabled_key); - memcg->tcp_mem.active = true; - } - - return 0; -} - -enum { - RES_USAGE, - RES_LIMIT, - RES_MAX_USAGE, - RES_FAILCNT, -}; - -static DEFINE_MUTEX(tcp_limit_mutex); - -static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long nr_pages; - int ret = 0; - - buf = strstrip(buf); - - switch (of_cft(of)->private) { - case RES_LIMIT: - /* see memcontrol.c */ - ret = page_counter_memparse(buf, "-1", &nr_pages); - if (ret) - break; - mutex_lock(&tcp_limit_mutex); - ret = tcp_update_limit(memcg, nr_pages); - mutex_unlock(&tcp_limit_mutex); - break; - default: - ret = -EINVAL; - break; - } - return ret ?: nbytes; -} - -static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(css); - u64 val; - - switch (cft->private) { - case RES_LIMIT: - if (memcg == root_mem_cgroup) - val = PAGE_COUNTER_MAX; - else - val = memcg->tcp_mem.memory_allocated.limit; - val *= PAGE_SIZE; - break; - case RES_USAGE: - if (memcg == root_mem_cgroup) - val = atomic_long_read(&tcp_memory_allocated); - else - val = page_counter_read(&memcg->tcp_mem.memory_allocated); - val *= PAGE_SIZE; - break; - case RES_FAILCNT: - if (memcg == root_mem_cgroup) - return 0; - val = memcg->tcp_mem.memory_allocated.failcnt; - break; - case RES_MAX_USAGE: - if (memcg == root_mem_cgroup) - return 0; - val = memcg->tcp_mem.memory_allocated.watermark; - val *= PAGE_SIZE; - break; - default: - BUG(); - } - return val; -} - -static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg; - - memcg = mem_cgroup_from_css(of_css(of)); - if (memcg == root_mem_cgroup) - return nbytes; - - switch (of_cft(of)->private) { - case RES_MAX_USAGE: - page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated); - break; - case RES_FAILCNT: - memcg->tcp_mem.memory_allocated.failcnt = 0; - break; - } - - return nbytes; -} - -static struct cftype tcp_files[] = { - { - .name = "kmem.tcp.limit_in_bytes", - .write = tcp_cgroup_write, - .read_u64 = tcp_cgroup_read, - .private = RES_LIMIT, - }, - { - .name = "kmem.tcp.usage_in_bytes", - .read_u64 = tcp_cgroup_read, - .private = RES_USAGE, - }, - { - .name = "kmem.tcp.failcnt", - .private = RES_FAILCNT, - .write = tcp_cgroup_reset, - .read_u64 = tcp_cgroup_read, - }, - { - .name = "kmem.tcp.max_usage_in_bytes", - .private = RES_MAX_USAGE, - .write = tcp_cgroup_reset, - .read_u64 = tcp_cgroup_read, - }, - { } /* terminate */ -}; - -static int __init tcp_memcontrol_init(void) -{ - WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); - return 0; -} -__initcall(tcp_memcontrol_init); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc45b538e237..be0b21852b13 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct sock *sk, *result; struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; + bool select_ok = true; u32 hash = 0; begin: @@ -512,14 +513,18 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (sk2) { - result = sk2; - goto found; + if (select_ok) { + struct sock *sk2; + + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (sk2) { + result = sk2; + select_ok = false; + goto found; + } } matches = 1; } @@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; + bool select_ok = true; u32 hash = 0; rcu_read_lock(); @@ -601,14 +607,18 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, skb, + if (select_ok) { + struct sock *sk2; + + sk2 = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - goto found; + if (sk2) { + result = sk2; + select_ok = false; + goto found; + } } matches = 1; } |