diff options
Diffstat (limited to 'net/ipv4')
36 files changed, 170 insertions, 97 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1830e6f0e9cc..f75069883f2b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -90,7 +90,7 @@ #include <linux/random.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/inet.h> #include <linux/igmp.h> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 72d6f056d863..ae206163c273 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 062a67ca9a21..4cd2ee8857d2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -26,7 +26,7 @@ */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/module.h> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dbad5a1c161a..7db2ad2e82d3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -14,7 +14,7 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/types.h> @@ -46,6 +46,7 @@ #include <net/rtnetlink.h> #include <net/xfrm.h> #include <net/l3mdev.h> +#include <net/lwtunnel.h> #include <trace/events/fib.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -85,7 +86,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); @@ -677,6 +678,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_mx_len = nla_len(attr); break; case RTA_MULTIPATH: + err = lwtunnel_valid_encap_type_attr(nla_data(attr), + nla_len(attr)); + if (err < 0) + goto errout; cfg->fc_mp = nla_data(attr); cfg->fc_mp_len = nla_len(attr); break; @@ -691,6 +696,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; break; } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c1bc1e92de0e..9a375b908d01 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -13,7 +13,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> @@ -1279,8 +1279,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate) - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); + if (fi->fib_nh->nh_lwtstate && + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1316,8 +1317,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate) - lwtunnel_fill_encap(skb, nh->nh_lwtstate); + if (nh->nh_lwtstate && + lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); @@ -1618,8 +1621,13 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, int mp_hash) { + bool oif_check; + + oif_check = (fl4->flowi4_oif == 0 || + fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (res->fi->fib_nhs > 1 && oif_check) { if (mp_hash < 0) mp_hash = get_hash_from_flowi4(fl4) >> 1; @@ -1629,7 +1637,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && !fl4->flowi4_oif) + res->type == RTN_UNICAST && oif_check) fib_select_default(fl4, res); if (!fl4->saddr) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1b0e7d1f5217..2919d1a10cfd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -50,7 +50,7 @@ #define VERSION "0.409" -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f79d7a8ab1c6..0777ea949223 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -91,7 +91,7 @@ #include <linux/errno.h> #include <linux/timer.h> #include <linux/init.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <net/checksum.h> #include <net/xfrm.h> #include <net/inet_common.h> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 15db786d50ed..44fd86de2823 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -72,7 +72,7 @@ #include <linux/module.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } @@ -1167,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); + kfree(pmc); } spin_unlock_bh(&im->lock); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d5d3ead0a6c3..19ea045c50ed 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) + const struct inet_bind_bucket *tb, bool relax, + bool reuseport_ok) { struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; + bool reuse = sk->sk_reuse; + bool reuseport = !!sk->sk_reuseport && reuseport_ok; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_bucket *tb; kuid_t uid = sock_i_uid(sk); u32 remaining, offset; + bool reuseport_ok = !!snum; if (port) { have_port: @@ -165,7 +167,8 @@ other_parity_scan: smallest_size = tb->num_owners; smallest_port = port; } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false, + reuseport_ok)) goto tb_found; goto next_port; } @@ -206,13 +209,14 @@ tb_found: sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) goto success; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true, + reuseport_ok)) { if ((reuse || (tb->fastreuseport > 0 && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && uid_eq(tb->fastuid, uid))) && - smallest_size != -1 && --attempts >= 0) { + !snum && smallest_size != -1 && --attempts >= 0) { spin_unlock_bh(&head->lock); goto again; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 78fd62048335..c9c1cb635d9a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -17,7 +17,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 4d158ff1def1..93157f2f4758 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <linux/skbuff.h> #include <linux/ip.h> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9ffc2625cddd..b67719f45953 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -42,7 +42,7 @@ * Hirokazu Takahashi: sendfile() on UDP works now. */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -826,11 +826,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (copy_from_iter(to, len, &msg->msg_iter) != len) + if (!copy_from_iter_full(to, len, &msg->msg_iter)) return -EFAULT; } else { __wsum csum = 0; - if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) + if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter)) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } @@ -958,7 +958,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { @@ -1629,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8b13881ed064..900011709e3b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -44,7 +44,7 @@ #include <net/ip_fib.h> #include <linux/errqueue.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * SOL_IP control messages. @@ -148,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 > skb->len) + if (skb_transport_offset(skb) + 4 > (int)skb->len) return; /* All current transport protocols have the port numbers in the @@ -1225,14 +1225,27 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fed3d29f9eb3..0fd1976ab63b 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { @@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 071a785c65eb..fd9f34bbd740 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -61,7 +61,7 @@ #include <net/ipconfig.h> #include <net/route.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <net/checksum.h> #include <asm/processor.h> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 79489f017854..00d4229b6954 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -96,7 +96,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 665505d86b12..efc1e76d4977 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -26,7 +26,7 @@ * */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/types.h> #include <linux/capability.h> #include <linux/errno.h> diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1258a9ab62ef..a467e1236c43 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -24,7 +24,7 @@ #include <linux/err.h> #include <net/compat.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 308b456723f0..91656a1d8fbd 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -20,7 +20,7 @@ #include <linux/icmp.h> #include <net/ip.h> #include <net/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 21db00d0362b..0a783cd73faf 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -144,6 +144,11 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) rcu_read_lock_bh(); c = __clusterip_config_find(net, clusterip); if (c) { +#ifdef CONFIG_PROC_FS + if (!c->pde) + c = NULL; + else +#endif if (unlikely(!atomic_inc_not_zero(&c->refcount))) c = NULL; else if (entry) @@ -166,14 +171,15 @@ clusterip_config_init_nodelist(struct clusterip_config *c, static struct clusterip_config * clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, - struct net_device *dev) + struct net_device *dev) { + struct net *net = dev_net(dev); struct clusterip_config *c; - struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) - return NULL; + return ERR_PTR(-ENOMEM); c->dev = dev; c->clusterip = ip; @@ -185,6 +191,17 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, atomic_set(&c->refcount, 1); atomic_set(&c->entries, 1); + spin_lock_bh(&cn->lock); + if (__clusterip_config_find(net, ip)) { + spin_unlock_bh(&cn->lock); + kfree(c); + + return ERR_PTR(-EBUSY); + } + + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); + #ifdef CONFIG_PROC_FS { char buffer[16]; @@ -195,16 +212,16 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { + spin_lock_bh(&cn->lock); + list_del_rcu(&c->list); + spin_unlock_bh(&cn->lock); kfree(c); - return NULL; + + return ERR_PTR(-ENOMEM); } } #endif - spin_lock_bh(&cn->lock); - list_add_rcu(&c->list, &cn->configs); - spin_unlock_bh(&cn->lock); - return c; } @@ -410,9 +427,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) config = clusterip_config_init(cipinfo, e->ip.dst.s_addr, dev); - if (!config) { + if (IS_ERR(config)) { dev_put(dev); - return -ENOMEM; + return PTR_ERR(config); } dev_mc_add(config->dev, config->clustermac); } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index f273098e48fd..37fb9552e858 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -63,10 +63,10 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, return dev_match || flags & XT_RPFILTER_LOOSE; } -static bool rpfilter_is_local(const struct sk_buff *skb) +static bool +rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { - const struct rtable *rt = skb_rtable(skb); - return rt && (rt->rt_flags & RTCF_LOCAL); + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -79,7 +79,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (rpfilter_is_local(skb)) + if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fd8220213afc..146d86105183 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); + skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, ip4_dst_hoplimit(skb_dst(nskb))); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 965b1a161369..2981291910dd 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -26,13 +26,6 @@ static __be32 get_saddr(__be32 addr) return addr; } -static bool fib4_is_local(const struct sk_buff *skb) -{ - const struct rtable *rt = skb_rtable(skb); - - return rt && (rt->rt_flags & RTCF_LOCAL); -} - #define DSCP_BITS 0xfc void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, @@ -95,8 +88,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { - nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && + nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { + nft_fib_store_result(dest, priv->result, pkt, + nft_in(pkt)->ifindex); return; } @@ -131,7 +126,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, switch (res.type) { case RTN_UNICAST: break; - case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + case RTN_LOCAL: /* Should not see RTN_LOCAL here */ return; default: break; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 5b2635e69a92..68d77b1f1495 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -609,15 +609,15 @@ int ping_getfrag(void *from, char *to, fraglen -= sizeof(struct icmphdr); if (fraglen < 0) BUG(); - if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), + if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr), fraglen, &pfh->wcheck, - &pfh->msg->msg_iter) != fraglen) + &pfh->msg->msg_iter)) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { - if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, - &pfh->msg->msg_iter) != fraglen) + if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter)) return -EFAULT; } @@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2300fae11b22..4e49e5cb001c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -41,7 +41,7 @@ #include <linux/atomic.h> #include <asm/byteorder.h> #include <asm/current.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/stddef.h> #include <linux/slab.h> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fa5c037227cb..709ffe67d1de 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -65,7 +65,7 @@ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> @@ -798,6 +798,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf struct rtable *rt; struct flowi4 fl4; const struct iphdr *iph = (const struct iphdr *) skb->data; + struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; @@ -805,7 +806,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1913,7 +1914,8 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -2470,7 +2472,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..b2fa498b15d1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -433,13 +433,6 @@ static struct ctl_table ipv4_table[] = { .extra2 = &tcp_adv_win_scale_max, }, { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -958,7 +951,14 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec, + }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ef3165114ba..0efb4c7f6704 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,7 +277,7 @@ #include <net/ip.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/busy_poll.h> @@ -770,6 +770,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f9..dd2560c83a85 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) @@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c790754ae3e..41dcbd568cbe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5078,7 +5078,7 @@ static void tcp_check_space(struct sock *sk) if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { tcp_new_space(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f533ada..fe9da4fb96bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d46f4d5b1c62..ba8f02d0f283 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -606,7 +606,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, return ret; } -EXPORT_SYMBOL_GPL(tcp_peer_is_proven); void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b45101f3d2bd..8ce50dc3ab8c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -769,6 +769,7 @@ static void tcp_tasklet_func(unsigned long data) list_del(&tp->tsq_node); sk = (struct sock *)tp; + smp_mb__before_atomic(); clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); if (!sk->sk_lock.owned && @@ -1037,7 +1038,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); /* Our usage of tstamp should remain private */ - skb->tstamp.tv64 = 0; + skb->tstamp = 0; /* Cleanup our debris for IP stacks */ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), @@ -2517,9 +2518,11 @@ u32 __tcp_select_window(struct sock *sk) int full_space = min_t(int, tp->window_clamp, allowed_space); int window; - if (mss > full_space) + if (unlikely(mss > full_space)) { mss = full_space; - + if (mss <= 0) + return 0; + } if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -3202,7 +3205,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #endif /* Do not fool tcpdump (if any), clean our debris */ - skb->tstamp.tv64 = 0; + skb->tstamp = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9ca279b130d5..8aab7d78d25b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -79,7 +79,7 @@ #define pr_fmt(fmt) "UDP: " fmt -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/bootmem.h> #include <linux/highmem.h> @@ -1501,7 +1501,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } |