diff options
Diffstat (limited to 'net')
63 files changed, 529 insertions, 410 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index c603d33d5410..5d01edf8d819 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -653,15 +653,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - dev = dev_get_by_name(&init_net, devname); + rtnl_lock(); + dev = __dev_get_by_name(&init_net, devname); if (!dev) { + rtnl_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25->ax25_dev) { + rtnl_unlock(); + res = -ENODEV; + break; + } ax25_fillin_cb(ax25, ax25->ax25_dev); - dev_put(dev); + rtnl_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 9a3a301e1e2f..d92195cd7834 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; @@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + dev->ax25_ptr = NULL; dev_put(dev); kfree(ax25_dev); return; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index d70f363c52ae..6d5859714f52 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -147,7 +147,7 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; - if (!access_ok(VERIFY_WRITE, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; error = wait_event_interruptible(socket_client->queue_wait, diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 02e55b78132f..75f602e1ce94 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -136,7 +136,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, if (count == 0) return 0; - if (!access_ok(VERIFY_WRITE, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; error = wait_event_interruptible(debug_log->queue_wait, diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 7acfc83087d5..7ee4fea93637 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -13,39 +13,24 @@ extern char bpfilter_umh_start; extern char bpfilter_umh_end; -static struct umh_info info; -/* since ip_getsockopt() can run in parallel, serialize access to umh */ -static DEFINE_MUTEX(bpfilter_lock); - -static void shutdown_umh(struct umh_info *info) +static void shutdown_umh(void) { struct task_struct *tsk; - if (!info->pid) + if (bpfilter_ops.stop) return; - tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID); + + tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID); if (tsk) { force_sig(SIGKILL, tsk); put_task_struct(tsk); } - fput(info->pipe_to_umh); - fput(info->pipe_from_umh); - info->pid = 0; } static void __stop_umh(void) { - if (IS_ENABLED(CONFIG_INET)) { - bpfilter_process_sockopt = NULL; - shutdown_umh(&info); - } -} - -static void stop_umh(void) -{ - mutex_lock(&bpfilter_lock); - __stop_umh(); - mutex_unlock(&bpfilter_lock); + if (IS_ENABLED(CONFIG_INET)) + shutdown_umh(); } static int __bpfilter_process_sockopt(struct sock *sk, int optname, @@ -63,10 +48,10 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.cmd = optname; req.addr = (long __force __user)optval; req.len = optlen; - mutex_lock(&bpfilter_lock); - if (!info.pid) + if (!bpfilter_ops.info.pid) goto out; - n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); + n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); __stop_umh(); @@ -74,7 +59,8 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, goto out; } pos = 0; - n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); + n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply), + &pos); if (n != sizeof(reply)) { pr_err("read fail %zd\n", n); __stop_umh(); @@ -83,37 +69,59 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, } ret = reply.status; out: - mutex_unlock(&bpfilter_lock); return ret; } -static int __init load_umh(void) +static int start_umh(void) { int err; /* fork usermode process */ - info.cmdline = "bpfilter_umh"; err = fork_usermode_blob(&bpfilter_umh_start, &bpfilter_umh_end - &bpfilter_umh_start, - &info); + &bpfilter_ops.info); if (err) return err; - pr_info("Loaded bpfilter_umh pid %d\n", info.pid); + bpfilter_ops.stop = false; + pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid); /* health check that usermode process started correctly */ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { - stop_umh(); + shutdown_umh(); return -EFAULT; } - if (IS_ENABLED(CONFIG_INET)) - bpfilter_process_sockopt = &__bpfilter_process_sockopt; return 0; } +static int __init load_umh(void) +{ + int err; + + mutex_lock(&bpfilter_ops.lock); + if (!bpfilter_ops.stop) { + err = -EFAULT; + goto out; + } + err = start_umh(); + if (!err && IS_ENABLED(CONFIG_INET)) { + bpfilter_ops.sockopt = &__bpfilter_process_sockopt; + bpfilter_ops.start = &start_umh; + } +out: + mutex_unlock(&bpfilter_ops.lock); + return err; +} + static void __exit fini_umh(void) { - stop_umh(); + mutex_lock(&bpfilter_ops.lock); + if (IS_ENABLED(CONFIG_INET)) { + shutdown_umh(); + bpfilter_ops.start = NULL; + bpfilter_ops.sockopt = NULL; + } + mutex_unlock(&bpfilter_ops.lock); } module_init(load_umh); module_exit(fini_umh); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S index 40311d10d2f2..7f1c521dcc2f 100644 --- a/net/bpfilter/bpfilter_umh_blob.S +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .init.rodata, "a" + .section .bpfilter_umh, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 5372e2042adf..2cb8da465b98 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -65,6 +65,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb->tstamp = 0; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d21a23698410..c93c35bb73dd 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -265,7 +265,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if (neigh->hh.hh_len) { + if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d240b3e7919f..eabf8bf28a3f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -107,6 +107,7 @@ struct br_tunnel_info { /* private vlan flags */ enum { BR_VLFLAG_PER_PORT_STATS = BIT(0), + BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), }; /** diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4a2f31157ef5..96abf8feb9dc 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -80,16 +80,18 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, - u16 vid, u16 flags, struct netlink_ext_ack *extack) + struct net_bridge_vlan *v, u16 flags, + struct netlink_ext_ack *extack) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q add. */ - err = br_switchdev_port_vlan_add(dev, vid, flags, extack); + err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); if (err == -EOPNOTSUPP) - return vlan_vid_add(dev, br->vlan_proto, vid); + return vlan_vid_add(dev, br->vlan_proto, v->vid); + v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV; return err; } @@ -121,19 +123,17 @@ static void __vlan_del_list(struct net_bridge_vlan *v) } static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, - u16 vid) + const struct net_bridge_vlan *v) { int err; /* Try switchdev op first. In case it is not supported, fallback to * 8021q del. */ - err = br_switchdev_port_vlan_del(dev, vid); - if (err == -EOPNOTSUPP) { - vlan_vid_del(dev, br->vlan_proto, vid); - return 0; - } - return err; + err = br_switchdev_port_vlan_del(dev, v->vid); + if (!(v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)) + vlan_vid_del(dev, br->vlan_proto, v->vid); + return err == -EOPNOTSUPP ? 0 : err; } /* Returns a master vlan, if it didn't exist it gets created. In all cases a @@ -242,7 +242,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, * This ensures tagged traffic enters the bridge when * promiscuous mode is disabled by br_manage_promisc(). */ - err = __vlan_vid_add(dev, br, v->vid, flags, extack); + err = __vlan_vid_add(dev, br, v, flags, extack); if (err) goto out; @@ -305,7 +305,7 @@ out_fdb_insert: out_filt: if (p) { - __vlan_vid_del(dev, br, v->vid); + __vlan_vid_del(dev, br, v); if (masterv) { if (v->stats && masterv->stats != v->stats) free_percpu(v->stats); @@ -338,7 +338,7 @@ static int __vlan_del(struct net_bridge_vlan *v) __vlan_delete_pvid(vg, v->vid); if (p) { - err = __vlan_vid_del(p->dev, p->br, v->vid); + err = __vlan_vid_del(p->dev, p->br, v); if (err) goto out; } else { diff --git a/net/can/gw.c b/net/can/gw.c index faa3da88a127..53859346dc9a 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -416,13 +416,29 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); - /* check for checksum updates when the CAN frame has been modified */ + /* Has the CAN frame been modified? */ if (modidx) { - if (gwj->mod.csumfunc.crc8) + /* get available space for the processed CAN frame type */ + int max_len = nskb->len - offsetof(struct can_frame, data); + + /* dlc may have changed, make sure it fits to the CAN frame */ + if (cf->can_dlc > max_len) + goto out_delete; + + /* check for checksum updates in classic CAN length only */ + if (gwj->mod.csumfunc.crc8) { + if (cf->can_dlc > 8) + goto out_delete; + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + } + + if (gwj->mod.csumfunc.xor) { + if (cf->can_dlc > 8) + goto out_delete; - if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } } /* clear the skb timestamp if not configured the other way */ @@ -434,6 +450,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; + + return; + + out_delete: + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 87afb9ec4c68..9cab80207ced 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -255,6 +255,7 @@ enum { Opt_nocephx_sign_messages, Opt_tcp_nodelay, Opt_notcp_nodelay, + Opt_abort_on_full, }; static match_table_t opt_tokens = { @@ -280,6 +281,7 @@ static match_table_t opt_tokens = { {Opt_nocephx_sign_messages, "nocephx_sign_messages"}, {Opt_tcp_nodelay, "tcp_nodelay"}, {Opt_notcp_nodelay, "notcp_nodelay"}, + {Opt_abort_on_full, "abort_on_full"}, {-1, NULL} }; @@ -535,6 +537,10 @@ ceph_parse_options(char *options, const char *dev_name, opt->flags &= ~CEPH_OPT_TCP_NODELAY; break; + case Opt_abort_on_full: + opt->flags |= CEPH_OPT_ABORT_ON_FULL; + break; + default: BUG_ON(token); } @@ -549,7 +555,8 @@ out: } EXPORT_SYMBOL(ceph_parse_options); -int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) +int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, + bool show_all) { struct ceph_options *opt = client->options; size_t pos = m->count; @@ -574,6 +581,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) seq_puts(m, "nocephx_sign_messages,"); if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) seq_puts(m, "notcp_nodelay,"); + if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) + seq_puts(m, "abort_on_full,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 02952605d121..46f65709a6ff 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -375,7 +375,7 @@ static int client_options_show(struct seq_file *s, void *p) struct ceph_client *client = s->private; int ret; - ret = ceph_print_client_options(s, client); + ret = ceph_print_client_options(s, client, true); if (ret) return ret; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 2f126eff275d..d5718284db57 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -544,7 +544,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page, * shortly. */ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, - size_t kvlen, size_t len, int more) + size_t kvlen, size_t len, bool more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; @@ -560,24 +560,15 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, return r; } -static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, bool more) -{ - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); - int ret; - - ret = kernel_sendpage(sock, page, offset, size, flags); - if (ret == -EAGAIN) - ret = 0; - - return ret; -} - +/* + * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST + */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, bool more) + int offset, size_t size, int more) { - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; - struct bio_vec bvec; + ssize_t (*sendpage)(struct socket *sock, struct page *page, + int offset, size_t size, int flags); + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; int ret; /* @@ -589,19 +580,11 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, * triggers one of hardened usercopy checks. */ if (page_count(page) >= 1 && !PageSlab(page)) - return __ceph_tcp_sendpage(sock, page, offset, size, more); - - bvec.bv_page = page; - bvec.bv_offset = offset; - bvec.bv_len = size; - - if (more) - msg.msg_flags |= MSG_MORE; + sendpage = sock->ops->sendpage; else - msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ + sendpage = sock_no_sendpage; - iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size); - ret = sock_sendmsg(sock, &msg); + ret = sendpage(sock, page, offset, size, flags); if (ret == -EAGAIN) ret = 0; @@ -1572,6 +1555,7 @@ static int write_partial_message_data(struct ceph_connection *con) struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; u32 crc; dout("%s %p msg %p\n", __func__, con, msg); @@ -1592,7 +1576,6 @@ static int write_partial_message_data(struct ceph_connection *con) struct page *page; size_t page_offset; size_t length; - bool last_piece; int ret; if (!cursor->resid) { @@ -1600,10 +1583,11 @@ static int write_partial_message_data(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, - &last_piece); - ret = ceph_tcp_sendpage(con->sock, page, page_offset, - length, !last_piece); + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + if (length == cursor->total_resid) + more = MSG_MORE; + ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, + more); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); @@ -1633,13 +1617,16 @@ static int write_partial_message_data(struct ceph_connection *con) */ static int write_partial_skip(struct ceph_connection *con) { + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; int ret; dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_SIZE); - ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); + if (size == con->out_skip) + more = MSG_MORE; + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more); if (ret <= 0) goto out; con->out_skip -= ret; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d23a9f81f3d7..fa9530dd876e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2315,7 +2315,7 @@ again: (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || pool_full(osdc, req->r_t.base_oloc.pool))) { dout("req %p full/pool_full\n", req); - if (osdc->abort_on_full) { + if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { err = -ENOSPC; } else { pr_warn_ratelimited("FULL or reached pool quota\n"); @@ -2545,7 +2545,7 @@ static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) { bool victims = false; - if (osdc->abort_on_full && + if (ceph_test_opt(osdc->client, ABORT_ON_FULL) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc))) for_each_request(osdc, abort_on_full_fn, &victims); } diff --git a/net/compat.c b/net/compat.c index f7084780a8f8..959d1c51826d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -358,7 +358,7 @@ static int do_set_sock_timeout(struct socket *sock, int level, if (optlen < sizeof(*up)) return -EINVAL; - if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + if (!access_ok(up, sizeof(*up)) || __get_user(ktime.tv_sec, &up->tv_sec) || __get_user(ktime.tv_usec, &up->tv_usec)) return -EFAULT; @@ -438,7 +438,7 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname, if (!err) { if (put_user(sizeof(*up), optlen) || - !access_ok(VERIFY_WRITE, up, sizeof(*up)) || + !access_ok(up, sizeof(*up)) || __put_user(ktime.tv_sec, &up->tv_sec) || __put_user(ktime.tv_usec, &up->tv_usec)) err = -EFAULT; @@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) ctv = (struct compat_timeval __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); + if (tv.tv_sec == -1) return err; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } err = 0; if (put_user(tv.tv_sec, &ctv->tv_sec) || @@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta ctv = (struct compat_timespec __user *) userstamp; err = -ENOENT; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return err; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - ts = ktime_to_timespec(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + ts = ktime_to_timespec(kt); } err = 0; if (put_user(ts.tv_sec, &ctv->tv_sec) || @@ -587,8 +590,8 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, compat_alloc_user_space(sizeof(struct group_req)); u32 interface; - if (!access_ok(VERIFY_READ, gr32, sizeof(*gr32)) || - !access_ok(VERIFY_WRITE, kgr, sizeof(struct group_req)) || + if (!access_ok(gr32, sizeof(*gr32)) || + !access_ok(kgr, sizeof(struct group_req)) || __get_user(interface, &gr32->gr_interface) || __put_user(interface, &kgr->gr_interface) || copy_in_user(&kgr->gr_group, &gr32->gr_group, @@ -608,8 +611,8 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, sizeof(struct group_source_req)); u32 interface; - if (!access_ok(VERIFY_READ, gsr32, sizeof(*gsr32)) || - !access_ok(VERIFY_WRITE, kgsr, + if (!access_ok(gsr32, sizeof(*gsr32)) || + !access_ok(kgsr, sizeof(struct group_source_req)) || __get_user(interface, &gsr32->gsr_interface) || __put_user(interface, &kgsr->gsr_interface) || @@ -628,7 +631,7 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, struct group_filter __user *kgf; u32 interface, fmode, numsrc; - if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) || + if (!access_ok(gf32, __COMPAT_GF0_SIZE) || __get_user(interface, &gf32->gf_interface) || __get_user(fmode, &gf32->gf_fmode) || __get_user(numsrc, &gf32->gf_numsrc)) @@ -638,7 +641,7 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, if (koptlen < GROUP_FILTER_SIZE(numsrc)) return -EINVAL; kgf = compat_alloc_user_space(koptlen); - if (!access_ok(VERIFY_WRITE, kgf, koptlen) || + if (!access_ok(kgf, koptlen) || __put_user(interface, &kgf->gf_interface) || __put_user(fmode, &kgf->gf_fmode) || __put_user(numsrc, &kgf->gf_numsrc) || @@ -672,7 +675,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, return getsockopt(sock, level, optname, optval, optlen); koptlen = compat_alloc_user_space(sizeof(*koptlen)); - if (!access_ok(VERIFY_READ, optlen, sizeof(*optlen)) || + if (!access_ok(optlen, sizeof(*optlen)) || __get_user(ulen, optlen)) return -EFAULT; @@ -682,14 +685,14 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, if (klen < GROUP_FILTER_SIZE(0)) return -EINVAL; - if (!access_ok(VERIFY_WRITE, koptlen, sizeof(*koptlen)) || + if (!access_ok(koptlen, sizeof(*koptlen)) || __put_user(klen, koptlen)) return -EFAULT; /* have to allow space for previous compat_alloc_user_space, too */ kgf = compat_alloc_user_space(klen+sizeof(*optlen)); - if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) || + if (!access_ok(gf32, __COMPAT_GF0_SIZE) || __get_user(interface, &gf32->gf_interface) || __get_user(fmode, &gf32->gf_fmode) || __get_user(numsrc, &gf32->gf_numsrc) || @@ -703,18 +706,18 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, if (err) return err; - if (!access_ok(VERIFY_READ, koptlen, sizeof(*koptlen)) || + if (!access_ok(koptlen, sizeof(*koptlen)) || __get_user(klen, koptlen)) return -EFAULT; ulen = klen - (sizeof(*kgf)-sizeof(*gf32)); - if (!access_ok(VERIFY_WRITE, optlen, sizeof(*optlen)) || + if (!access_ok(optlen, sizeof(*optlen)) || __put_user(ulen, optlen)) return -EFAULT; - if (!access_ok(VERIFY_READ, kgf, klen) || - !access_ok(VERIFY_WRITE, gf32, ulen) || + if (!access_ok(kgf, klen) || + !access_ok(gf32, ulen) || __get_user(interface, &kgf->gf_interface) || __get_user(fmode, &kgf->gf_fmode) || __get_user(numsrc, &kgf->gf_numsrc) || diff --git a/net/core/dev.c b/net/core/dev.c index 1b5a4410be0e..82f20022259d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1821,7 +1821,7 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL static atomic_t netstamp_needed_deferred; static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) @@ -1840,7 +1840,7 @@ static DECLARE_WORK(netstamp_work, netstamp_clear); void net_enable_timestamp(void) { -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL int wanted; while (1) { @@ -1860,7 +1860,7 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL int wanted; while (1) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d05402868575..158264f7cfaf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -793,8 +793,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); + if (ops->get_regs_len) { + int ret = ops->get_regs_len(dev); + + if (ret > 0) + info.regdump_len = ret; + } + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); @@ -1337,6 +1342,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return -EFAULT; reglen = ops->get_regs_len(dev); + if (reglen <= 0) + return reglen; + if (regs.len > reglen) regs.len = reglen; diff --git a/net/core/filter.c b/net/core/filter.c index 447dd1bad31f..2b3b436ef545 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4203,7 +4203,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some options are supported */ switch (optname) { case TCP_BPF_IW: - if (val <= 0 || tp->data_segs_out > 0) + if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else tp->snd_cwnd = val; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 763a7b08df67..3e27a779f288 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> +#include <linux/kmemleak.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -443,12 +444,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); - else + } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); + kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + } if (!buckets) { kfree(ret); return NULL; @@ -468,10 +471,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head) size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { kfree(buckets); - else + } else { + kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); + } kfree(nht); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 48f61885fd6f..5ea1bed08ede 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4104,6 +4104,11 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (!addr) { + NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request"); + return -EINVAL; + } + if (brport_idx) { dev = __dev_get_by_index(net, brport_idx); if (!dev) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 37317ffec146..26d848484912 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5270,7 +5270,6 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long chunk; struct sk_buff *skb; struct page *page; - gfp_t gfp_head; int i; *errcode = -EMSGSIZE; @@ -5280,12 +5279,8 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, if (npages > MAX_SKB_FRAGS) return NULL; - gfp_head = gfp_mask; - if (gfp_head & __GFP_DIRECT_RECLAIM) - gfp_head |= __GFP_RETRY_MAYFAIL; - *errcode = -ENOBUFS; - skb = alloc_skb(header_len, gfp_head); + skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; diff --git a/net/core/sock.c b/net/core/sock.c index f00902c532cc..6aa2e7e0b4fb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; +#if BITS_PER_LONG==32 + seqlock_init(&sk->sk_stamp_seq); +#endif atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL @@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) struct timeval tv; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sk->sk_stamp); + tv = ktime_to_timeval(sock_read_timestamp(sk)); if (tv.tv_sec == -1) return -ENOENT; if (tv.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); - tv = ktime_to_timeval(sk->sk_stamp); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); + tv = ktime_to_timeval(kt); } return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; } @@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) struct timespec ts; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sk->sk_stamp); + ts = ktime_to_timespec(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return -ENOENT; if (ts.tv_sec == 0) { - sk->sk_stamp = ktime_get_real(); + ktime_t kt = ktime_get_real(); + sock_write_timestamp(sk, kt); ts = ktime_to_timespec(sk->sk_stamp); } return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 5e04ed25bc0e..1e976bb93d99 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -1,28 +1,54 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/module.h> #include <linux/uaccess.h> #include <linux/bpfilter.h> #include <uapi/linux/bpf.h> #include <linux/wait.h> #include <linux/kmod.h> +#include <linux/fs.h> +#include <linux/file.h> -int (*bpfilter_process_sockopt)(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set); -EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); +struct bpfilter_umh_ops bpfilter_ops; +EXPORT_SYMBOL_GPL(bpfilter_ops); + +static void bpfilter_umh_cleanup(struct umh_info *info) +{ + mutex_lock(&bpfilter_ops.lock); + bpfilter_ops.stop = true; + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); + info->pid = 0; + mutex_unlock(&bpfilter_ops.lock); +} static int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set) { - if (!bpfilter_process_sockopt) { - int err = request_module("bpfilter"); + int err; + mutex_lock(&bpfilter_ops.lock); + if (!bpfilter_ops.sockopt) { + mutex_unlock(&bpfilter_ops.lock); + err = request_module("bpfilter"); + mutex_lock(&bpfilter_ops.lock); if (err) - return err; - if (!bpfilter_process_sockopt) - return -ECHILD; + goto out; + if (!bpfilter_ops.sockopt) { + err = -ECHILD; + goto out; + } + } + if (bpfilter_ops.stop) { + err = bpfilter_ops.start(); + if (err) + goto out; } - return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); + err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set); +out: + mutex_unlock(&bpfilter_ops.lock); + return err; } int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, @@ -41,3 +67,15 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, return bpfilter_mbox_request(sk, optname, optval, len, false); } + +static int __init bpfilter_sockopt_init(void) +{ + mutex_init(&bpfilter_ops.lock); + bpfilter_ops.stop = true; + bpfilter_ops.info.cmdline = "bpfilter_umh"; + bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup; + + return 0; +} + +module_init(bpfilter_sockopt_init); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 04ba321ae5ce..e258a00b4a3d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1826,7 +1826,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return err < 0 ? err : skb->len; + return skb->len ? : err; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f8eb78d042a4..cfec3af54c8d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -198,11 +198,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct fib_table *fib_empty_table(struct net *net) { - u32 id; + u32 id = 1; - for (id = 1; id <= RT_TABLE_MAX; id++) + while (1) { if (!fib_get_table(net, id)) return fib_new_table(net, id); + + if (id++ == RT_TABLE_MAX) + break; + } return NULL; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 0c9f171fb085..632863541082 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1065,7 +1065,8 @@ static int gue_err(struct sk_buff *skb, u32 info) * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP) + if (guehdr->proto_ctype == IPPROTO_UDP || + guehdr->proto_ctype == IPPROTO_UDPLITE) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c7a7bd58a23c..d1d09f3e5f9e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -676,6 +676,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -719,6 +722,9 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { erspan_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; @@ -762,6 +768,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); + if (!pskb_inet_may_pull(skb)) + goto free_skb; + if (tunnel->collect_md) { gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index fffcc130900e..82f341e84fae 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -148,19 +148,17 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { + __be16 _ports[2], *ports; struct sockaddr_in sin; - __be16 *ports; - int end; - - end = skb_transport_offset(skb) + 4; - if (end > 0 && !pskb_may_pull(skb, end)) - return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (!ports) + return; sin.sin_family = AF_INET; sin.sin_addr.s_addr = ip_hdr(skb)->daddr; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 284a22154b4e..c4f5602308ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); - unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; - /* ensure we can access the inner net header, for several users below */ - if (skb->protocol == htons(ETH_P_IP)) - inner_nhdr_len = sizeof(struct iphdr); - else if (skb->protocol == htons(ETH_P_IPV6)) - inner_nhdr_len = sizeof(struct ipv6hdr); - if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) - goto tx_error; - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index de31b302d69c..d7b43e700023 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -241,6 +241,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct flowi fl; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { @@ -253,15 +256,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); break; default: - dev->stats.tx_errors++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; + goto tx_err; } /* override mark with tunnel output key */ fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); return vti_xmit(skb, dev, &fl); + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int vti4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f87dbc78b6bc..71a29e9c0620 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -226,7 +226,7 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) { dst_negative_advice(sk); - } else if (!tp->syn_data && !tp->syn_fastopen) { + } else { sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 521e471f1cf9..93d5ad2b1a69 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4736,8 +4736,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) - return PTR_ERR(idev); + if (!idev) + return -ENOBUFS; if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; @@ -5154,7 +5154,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return err < 0 ? err : skb->len; + return skb->len ? : err; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0cd291034f0..d99753b5e39b 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -310,6 +310,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket @@ -320,9 +321,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, goto out; } + rcu_read_lock(); + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out_unlock; + } + } + /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; - chk_addr_ret = inet_addr_type(net, v4addr); + chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); + rcu_read_unlock(); + if (!inet_can_nonlocal_bind(net, inet) && v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && @@ -350,6 +362,9 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, err = -EINVAL; goto out_unlock; } + } + + if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index bde08aa549f3..ee4a4e54d016 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -341,6 +341,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) skb_reset_network_header(skb); iph = ipv6_hdr(skb); iph->daddr = fl6->daddr; + ip6_flow_hdr(iph, 0, 0); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -700,17 +701,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports; - int end; + __be16 _ports[2], *ports; - end = skb_transport_offset(skb) + 4; - if (end <= 0 || pskb_may_pull(skb, end)) { + ports = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_ports), &_ports); + if (ports) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ - ports = (__be16 *)skb_transport_header(skb); - sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index bd675c61deb1..7da7bf3b7fe3 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -131,6 +131,14 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (validate_gue_flags(guehdr, optlen)) return -EINVAL; + /* Handling exceptions for direct UDP encapsulation in GUE would lead to + * recursion. Besides, this kind of encapsulation can't even be + * configured currently. Discard this. + */ + if (guehdr->proto_ctype == IPPROTO_UDP || + guehdr->proto_ctype == IPPROTO_UDPLITE) + return -EOPNOTSUPP; + skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); ret = gue6_err_proto_handler(guehdr->proto_ctype, skb, opt, type, code, offset, info); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5d7aa2c2770c..bbcdfd299692 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -423,10 +423,10 @@ static int icmp6_iif(const struct sk_buff *skb) static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr) { - struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; + struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct dst_entry *dst; @@ -437,12 +437,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, int iif = 0; int addr_type = 0; int len; - u32 mark = IP6_REPLY_MARK(net, skb->mark); + u32 mark; if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; + if (!skb->dev) + return; + net = dev_net(skb->dev); + mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ae3786132c23..6613d8dbb0e5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -627,7 +627,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } - res = fib6_dump_table(tb, skb, cb); + if (!cb->args[0]) { + res = fib6_dump_table(tb, skb, cb); + if (!res) + cb->args[0] = 1; + } goto out; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 229e55c99021..09d0826742f8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -881,6 +881,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -923,6 +926,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int nhoff; int thoff; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -995,8 +1001,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, goto tx_err; } } else { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - switch (skb->protocol) { case htons(ETH_P_IP): memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1004,7 +1008,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, &dsfield, &encap_limit); break; case htons(ETH_P_IPV6): - if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr)) goto tx_err; if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 99179b9c8384..0c6403cf8b52 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1243,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - /* ensure we can access the full inner ip header */ - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return -1; - iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1321,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - return -1; - ipv6h = ipv6_hdr(skb); tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || @@ -1405,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &t->dev->stats; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 706fe42e4928..8b6eefff2f7e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -522,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct ipv6hdr *ipv6h; struct flowi fl; int ret; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IPV6): - ipv6h = ipv6_hdr(skb); - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8276f1224f16..30337b38274b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -51,6 +51,7 @@ #include <linux/export.h> #include <net/ip6_checksum.h> #include <linux/netconf.h> +#include <net/ip_tunnels.h> #include <linux/nospec.h> @@ -599,13 +600,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; - int err; - err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; - } + if (!pskb_inet_may_pull(skb)) + goto tx_err; + + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) + goto tx_err; read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -614,6 +614,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, read_unlock(&mrt_lock); kfree_skb(skb); return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + kfree_skb(skb); + return NETDEV_TX_OK; } static int reg_vif_get_iflink(const struct net_device *dev) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a5bb59ee50ac..36a3d8dc61f5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -210,7 +210,7 @@ found: if (next && next->ip_defrag_offset < end) goto discard_fq; - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ + /* Note : skb->ip_defrag_offset and skb->sk share the same location */ dev = skb->dev; if (dev) fq->iif = dev->ifindex; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 194bc162866d..40b225f87d5e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dev); + + n = neigh_create(&nd_tbl, daddr, dev); + return IS_ERR(n) ? NULL : n; } static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, @@ -5054,12 +5056,16 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, { struct net *net; int delay; + int ret; if (!write) return -EINVAL; net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; - proc_dointvec(ctl, write, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (ret) + return ret; + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 51c9f75f34b9..1e03305c0549 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1021,6 +1021,9 @@ tx_error: static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + if (!pskb_inet_may_pull(skb)) + goto tx_err; + switch (skb->protocol) { case htons(ETH_P_IP): sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9cbf363172bd..7c3505006f8e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1390,10 +1390,7 @@ do_udp_sendmsg: ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; - if (!ipv6_addr_any(daddr)) - fl6.daddr = *daddr; - else - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; fl6.fl6_sport = inet->inet_sport; @@ -1421,6 +1418,9 @@ do_udp_sendmsg: } } + if (ipv6_addr_any(&fl6.daddr)) + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = false; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index dc240cb47ddf..93aaec3a54ec 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops); DEFINE_PER_CPU(bool, nf_skb_duplicated); EXPORT_SYMBOL_GPL(nf_skb_duplicated); -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -347,7 +347,7 @@ static int __nf_register_net_hook(struct net *net, int pf, if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); #endif -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]); #endif BUG_ON(p == new_hooks); @@ -405,7 +405,7 @@ static void __nf_unregister_net_hook(struct net *net, int pf, if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_dec_ingress_queue(); #endif -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]); #endif } else { diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 9cd180bda092..7554c56b2e63 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -33,12 +33,6 @@ #define CONNCOUNT_SLOTS 256U -#ifdef CONFIG_LOCKDEP -#define CONNCOUNT_LOCK_SLOTS 8U -#else -#define CONNCOUNT_LOCK_SLOTS 256U -#endif - #define CONNCOUNT_GC_MAX_NODES 8 #define MAX_KEYLEN 5 @@ -49,8 +43,6 @@ struct nf_conncount_tuple { struct nf_conntrack_zone zone; int cpu; u32 jiffies32; - bool dead; - struct rcu_head rcu_head; }; struct nf_conncount_rb { @@ -60,7 +52,7 @@ struct nf_conncount_rb { struct rcu_head rcu_head; }; -static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; +static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp; struct nf_conncount_data { unsigned int keylen; @@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -enum nf_conncount_list_add -nf_conncount_add(struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone) -{ - struct nf_conncount_tuple *conn; - - if (WARN_ON_ONCE(list->count > INT_MAX)) - return NF_CONNCOUNT_ERR; - - conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return NF_CONNCOUNT_ERR; - - conn->tuple = *tuple; - conn->zone = *zone; - conn->cpu = raw_smp_processor_id(); - conn->jiffies32 = (u32)jiffies; - conn->dead = false; - spin_lock_bh(&list->list_lock); - if (list->dead == true) { - kmem_cache_free(conncount_conn_cachep, conn); - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_SKIP; - } - list_add_tail(&conn->node, &list->head); - list->count++; - spin_unlock_bh(&list->list_lock); - return NF_CONNCOUNT_ADDED; -} -EXPORT_SYMBOL_GPL(nf_conncount_add); - -static void __conn_free(struct rcu_head *h) -{ - struct nf_conncount_tuple *conn; - - conn = container_of(h, struct nf_conncount_tuple, rcu_head); - kmem_cache_free(conncount_conn_cachep, conn); -} - -static bool conn_free(struct nf_conncount_list *list, +static void conn_free(struct nf_conncount_list *list, struct nf_conncount_tuple *conn) { - bool free_entry = false; - - spin_lock_bh(&list->list_lock); - - if (conn->dead) { - spin_unlock_bh(&list->list_lock); - return free_entry; - } + lockdep_assert_held(&list->list_lock); list->count--; - conn->dead = true; - list_del_rcu(&conn->node); - if (list->count == 0) { - list->dead = true; - free_entry = true; - } + list_del(&conn->node); - spin_unlock_bh(&list->list_lock); - call_rcu(&conn->rcu_head, __conn_free); - return free_entry; + kmem_cache_free(conncount_conn_cachep, conn); } static const struct nf_conntrack_tuple_hash * find_or_evict(struct net *net, struct nf_conncount_list *list, - struct nf_conncount_tuple *conn, bool *free_entry) + struct nf_conncount_tuple *conn) { const struct nf_conntrack_tuple_hash *found; unsigned long a, b; int cpu = raw_smp_processor_id(); - __s32 age; + u32 age; found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found) @@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, */ age = a - b; if (conn->cpu == cpu || age >= 2) { - *free_entry = conn_free(list, conn); + conn_free(list, conn); return ERR_PTR(-ENOENT); } return ERR_PTR(-EAGAIN); } -void nf_conncount_lookup(struct net *net, - struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) +static int __nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collect = 0; - bool free_entry = false; - - /* best effort only */ - *addit = tuple ? true : false; /* check the saved connections */ list_for_each_entry_safe(conn, conn_n, &list->head, node) { if (collect > CONNCOUNT_GC_MAX_NODES) break; - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { /* Not found, but might be about to be confirmed */ if (PTR_ERR(found) == -EAGAIN) { - if (!tuple) - continue; - if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_id(&conn->zone, conn->zone.dir) == nf_ct_zone_id(zone, zone->dir)) - *addit = false; - } else if (PTR_ERR(found) == -ENOENT) + return 0; /* already exists */ + } else { collect++; + } continue; } found_ct = nf_ct_tuplehash_to_ctrack(found); - if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && + if (nf_ct_tuple_equal(&conn->tuple, tuple) && nf_ct_zone_equal(found_ct, zone, zone->dir)) { /* * We should not see tuples twice unless someone hooks @@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net, * * Attempt to avoid a re-add in this case. */ - *addit = false; + nf_ct_put(found_ct); + return 0; } else if (already_closed(found_ct)) { /* * we do not care about connections which are @@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net, nf_ct_put(found_ct); } + + if (WARN_ON_ONCE(list->count > INT_MAX)) + return -EOVERFLOW; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + + conn->tuple = *tuple; + conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; + list_add_tail(&conn->node, &list->head); + list->count++; + return 0; } -EXPORT_SYMBOL_GPL(nf_conncount_lookup); + +int nf_conncount_add(struct net *net, + struct nf_conncount_list *list, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + int ret; + + /* check the saved connections */ + spin_lock_bh(&list->list_lock); + ret = __nf_conncount_add(net, list, tuple, zone); + spin_unlock_bh(&list->list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_conncount_add); void nf_conncount_list_init(struct nf_conncount_list *list) { spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); list->count = 0; - list->dead = false; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); -/* Return true if the list is empty */ +/* Return true if the list is empty. Must be called with BH disabled. */ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_list *list) { @@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net, struct nf_conncount_tuple *conn, *conn_n; struct nf_conn *found_ct; unsigned int collected = 0; - bool free_entry = false; bool ret = false; + /* don't bother if other cpu is already doing GC */ + if (!spin_trylock(&list->list_lock)) + return false; + list_for_each_entry_safe(conn, conn_n, &list->head, node) { - found = find_or_evict(net, list, conn, &free_entry); + found = find_or_evict(net, list, conn); if (IS_ERR(found)) { - if (PTR_ERR(found) == -ENOENT) { - if (free_entry) - return true; + if (PTR_ERR(found) == -ENOENT) collected++; - } continue; } @@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - if (conn_free(list, conn)) - return true; + conn_free(list, conn); collected++; continue; } nf_ct_put(found_ct); if (collected > CONNCOUNT_GC_MAX_NODES) - return false; + break; } - spin_lock_bh(&list->list_lock); - if (!list->count) { - list->dead = true; + if (!list->count) ret = true; - } - spin_unlock_bh(&list->list_lock); + spin_unlock(&list->list_lock); return ret; } @@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h) kmem_cache_free(conncount_rb_cachep, rbconn); } +/* caller must hold tree nf_conncount_locks[] lock */ static void tree_nodes_free(struct rb_root *root, struct nf_conncount_rb *gc_nodes[], unsigned int gc_count) @@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root, while (gc_count) { rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); - rb_erase(&rbconn->node, root); - call_rcu(&rbconn->rcu_head, __tree_nodes_free); + if (!rbconn->list.count) { + rb_erase(&rbconn->node, root); + call_rcu(&rbconn->rcu_head, __tree_nodes_free); + } spin_unlock(&rbconn->list.list_lock); } } @@ -341,20 +301,19 @@ insert_tree(struct net *net, struct rb_root *root, unsigned int hash, const u32 *key, - u8 keylen, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - bool node_found = false; - - spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + u8 keylen = data->keylen; + bool do_gc = true; + spin_lock_bh(&nf_conncount_locks[hash]); +restart: parent = NULL; rbnode = &(root->rb_node); while (*rbnode) { @@ -368,45 +327,32 @@ insert_tree(struct net *net, } else if (diff > 0) { rbnode = &((*rbnode)->rb_right); } else { - /* unlikely: other cpu added node already */ - node_found = true; - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { + int ret; + + ret = nf_conncount_add(net, &rbconn->list, tuple, zone); + if (ret) count = 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { + else count = rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ - node_found = false; - } - break; + tree_nodes_free(root, gc_nodes, gc_count); + goto out_unlock; } if (gc_count >= ARRAY_SIZE(gc_nodes)) continue; - if (nf_conncount_gc_list(net, &rbconn->list)) + if (do_gc && nf_conncount_gc_list(net, &rbconn->list)) gc_nodes[gc_count++] = rbconn; } if (gc_count) { tree_nodes_free(root, gc_nodes, gc_count); - /* tree_node_free before new allocation permits - * allocator to re-use newly free'd object. - * - * This is a rare event; in most cases we will find - * existing node to re-use. (or gc_count is 0). - */ - - if (gc_count >= ARRAY_SIZE(gc_nodes)) - schedule_gc_worker(data, hash); + schedule_gc_worker(data, hash); + gc_count = 0; + do_gc = false; + goto restart; } - if (node_found) - goto out_unlock; - /* expected case: match, insert new node */ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); if (rbconn == NULL) @@ -430,7 +376,7 @@ insert_tree(struct net *net, rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); out_unlock: - spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + spin_unlock_bh(&nf_conncount_locks[hash]); return count; } @@ -441,7 +387,6 @@ count_tree(struct net *net, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { - enum nf_conncount_list_add ret; struct rb_root *root; struct rb_node *parent; struct nf_conncount_rb *rbconn; @@ -454,7 +399,6 @@ count_tree(struct net *net, parent = rcu_dereference_raw(root->rb_node); while (parent) { int diff; - bool addit; rbconn = rb_entry(parent, struct nf_conncount_rb, node); @@ -464,31 +408,36 @@ count_tree(struct net *net, } else if (diff > 0) { parent = rcu_dereference_raw(parent->rb_right); } else { - /* same source network -> be counted! */ - nf_conncount_lookup(net, &rbconn->list, tuple, zone, - &addit); + int ret; - if (!addit) + if (!tuple) { + nf_conncount_gc_list(net, &rbconn->list); return rbconn->list.count; + } - ret = nf_conncount_add(&rbconn->list, tuple, zone); - if (ret == NF_CONNCOUNT_ERR) { - return 0; /* hotdrop */ - } else if (ret == NF_CONNCOUNT_ADDED) { - return rbconn->list.count; - } else { - /* NF_CONNCOUNT_SKIP, rbconn is already - * reclaimed by gc, insert a new tree node - */ + spin_lock_bh(&rbconn->list.list_lock); + /* Node might be about to be free'd. + * We need to defer to insert_tree() in this case. + */ + if (rbconn->list.count == 0) { + spin_unlock_bh(&rbconn->list.list_lock); break; } + + /* same source network -> be counted! */ + ret = __nf_conncount_add(net, &rbconn->list, tuple, zone); + spin_unlock_bh(&rbconn->list.list_lock); + if (ret) + return 0; /* hotdrop */ + else + return rbconn->list.count; } } if (!tuple) return 0; - return insert_tree(net, data, root, hash, key, keylen, tuple, zone); + return insert_tree(net, data, root, hash, key, tuple, zone); } static void tree_gc_worker(struct work_struct *work) @@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work) struct rb_node *node; unsigned int tree, next_tree, gc_count = 0; - tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; + tree = data->gc_tree % CONNCOUNT_SLOTS; root = &data->root[tree]; + local_bh_disable(); rcu_read_lock(); for (node = rb_first(root); node != NULL; node = rb_next(node)) { rbconn = rb_entry(node, struct nf_conncount_rb, node); if (nf_conncount_gc_list(data->net, &rbconn->list)) - gc_nodes[gc_count++] = rbconn; + gc_count++; } rcu_read_unlock(); + local_bh_enable(); + + cond_resched(); spin_lock_bh(&nf_conncount_locks[tree]); + if (gc_count < ARRAY_SIZE(gc_nodes)) + goto next; /* do not bother */ - if (gc_count) { - tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + node = rb_first(root); + while (node != NULL) { + rbconn = rb_entry(node, struct nf_conncount_rb, node); + node = rb_next(node); + + if (rbconn->list.count > 0) + continue; + + gc_nodes[gc_count++] = rbconn; + if (gc_count >= ARRAY_SIZE(gc_nodes)) { + tree_nodes_free(root, gc_nodes, gc_count); + gc_count = 0; + } } + tree_nodes_free(root, gc_nodes, gc_count); +next: clear_bit(tree, data->pending_trees); next_tree = (tree + 1) % CONNCOUNT_SLOTS; - next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); + next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree); if (next_tree < CONNCOUNT_SLOTS) { data->gc_tree = next_tree; @@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void) { int i; - BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); - BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); - - for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) + for (i = 0; i < CONNCOUNT_SLOTS; ++i) spin_lock_init(&nf_conncount_locks[i]); conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fec814dace5a..2b0a93300dd7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + if (!nest) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) goto nla_put_failure; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index b90d96ba4a12..af1497ab9464 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int count; - bool addit; tuple_ptr = &tuple; @@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, - &addit); - count = priv->list.count; - - if (!addit) - goto out; - - if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { + if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; return; } - count++; -out: + + count = priv->list.count; if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 03f37c4e64fe..1d3144d19903 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr) sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { - bh_lock_sock(s); + sock_hold(s); goto found; } s = NULL; @@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, if (nr->your_index == index && nr->your_id == id && !ax25cmp(&nr->dest_addr, dest)) { - bh_lock_sock(s); + sock_hold(s); goto found; } } @@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void) if (i != 0 && j != 0) { if ((sk=nr_find_socket(i, j)) == NULL) break; - bh_unlock_sock(sk); + sock_put(sk); } id++; @@ -920,6 +920,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) } if (sk != NULL) { + bh_lock_sock(sk); skb_reset_transport_header(skb); if (frametype == NR_CONNACK && skb->len == 22) @@ -929,6 +930,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) ret = nr_process_rx_frame(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; } @@ -960,10 +962,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) (make = nr_make_new(sk)) == NULL) { nr_transmit_refusal(skb, 0); if (sk) - bh_unlock_sock(sk); + sock_put(sk); return 0; } + bh_lock_sock(sk); + window = skb->data[20]; skb->sk = make; @@ -1016,6 +1020,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); bh_unlock_sock(sk); + sock_put(sk); nr_insert_socket(make); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 57e07768c9d1..f54cf17ef7a8 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -276,10 +276,12 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); if (flags & IP6_FH_F_FRAG) { - if (frag_off) + if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - else - key->ip.frag = OVS_FRAG_TYPE_FIRST; + key->ip.proto = nexthdr; + return 0; + } + key->ip.frag = OVS_FRAG_TYPE_FIRST; } else { key->ip.frag = OVS_FRAG_TYPE_NONE; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index eedacdebcd4c..d0945253f43b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2628,7 +2628,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_put; } err = -ENXIO; @@ -2828,7 +2828,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out; + goto out_unlock; } err = -ENXIO; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 2dcb555e6350..4e0c36acf866 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -522,7 +522,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; else - i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); + i = DIV_ROUND_UP(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); if (work_alloc == 0) { @@ -879,7 +879,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * Instead of knowing how to return a partial rdma read/write we insist that there * be enough work requests to send the entire message. */ - i = ceil(op->op_count, max_sge); + i = DIV_ROUND_UP(op->op_count, max_sge); work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); if (work_alloc != i) { diff --git a/net/rds/message.c b/net/rds/message.c index f139420ba1f6..50f13f1d4ae0 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -341,7 +341,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in { struct rds_message *rm; unsigned int i; - int num_sgs = ceil(total_len, PAGE_SIZE); + int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); int ret; @@ -351,7 +351,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); - rm->data.op_nents = ceil(total_len, PAGE_SIZE); + rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); if (!rm->data.op_sg) { rds_message_put(rm); diff --git a/net/rds/rds.h b/net/rds/rds.h index 02ec4a3b2799..4ffe100ff5e6 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -48,10 +48,6 @@ void rdsdebug(char *fmt, ...) } #endif -/* XXX is there one of these somewhere? */ -#define ceil(x, y) \ - ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) - #define RDS_FRAG_SHIFT 12 #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) diff --git a/net/rds/send.c b/net/rds/send.c index 3d822bad7de9..fd8b687d5c05 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1107,7 +1107,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) size_t total_payload_len = payload_len, rdma_payload_len = 0; bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); - int num_sgs = ceil(payload_len, PAGE_SIZE); + int num_sgs = DIV_ROUND_UP(payload_len, PAGE_SIZE); int namelen; struct rds_iov_vector_arr vct; int ind; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index b9bbcf3d6c63..c16f0a362c32 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -623,7 +623,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net) if (rtn->rds_tcp_sysctl) unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - if (net != &init_net && rtn->ctl_table) + if (net != &init_net) kfree(rtn->ctl_table); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4da4a78d369..c4e56602e0c6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -146,6 +146,9 @@ static int smc_release(struct socket *sock) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } + + sk->sk_prot->unhash(sk); + if (smc->clcsock) { if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ @@ -170,7 +173,6 @@ static int smc_release(struct socket *sock) smc_conn_free(&smc->conn); release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ out: return rc; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c7ae1ed5324f..a6a060925e5d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -542,7 +542,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->tstamp; + sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ len = skb->len; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 8c3936403fea..0bea8ff8b0d3 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -89,7 +89,7 @@ proc_dodebug(struct ctl_table *table, int write, left = *lenp; if (write) { - if (!access_ok(VERIFY_READ, buffer, left)) + if (!access_ok(buffer, left)) return -EFAULT; p = buffer; while (left && __get_user(c, p) >= 0 && isspace(c)) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index fb2c0d8f359f..d27f30a9a01d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -319,7 +319,6 @@ static int tipc_enable_bearer(struct net *net, const char *name, res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); - kfree(b); errstr = "failed to create discoverer"; goto rejected; } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 21f6ccc89401..77e4b2418f30 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -904,6 +904,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) { + kfree_skb(args); + return -EMSGSIZE; + } nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { |