diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 16 | ||||
-rw-r--r-- | net/core/filter.c | 41 | ||||
-rw-r--r-- | net/core/net_namespace.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 12 | ||||
-rw-r--r-- | net/core/skbuff.c | 40 | ||||
-rw-r--r-- | net/core/sock.c | 2 |
6 files changed, 79 insertions, 35 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index af4a1b0adc10..2c1c67fad64d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - if (next_cpu != RPS_NO_CPU) { + if (next_cpu < nr_cpu_ids) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow * table entry), switch if one of the following holds: - * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is unset (>= nr_cpu_ids). * - Current CPU is offline. * - The current CPU's queue tail has advanced beyond the * last packet that was enqueued using this table entry. @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * have been dequeued, thus preserving in order delivery. */ if (unlikely(tcpu != next_cpu) && - (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } - if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; goto done; @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - int cpu; + unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; cpu = ACCESS_ONCE(rflow->cpu); - if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < (int)(10 * flow_table->mask))) @@ -5209,7 +5209,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) diff --git a/net/core/filter.c b/net/core/filter.c index b669e75d2b36..bf831a85c315 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +/** + * bpf_skb_clone_not_writable - is the header of a clone not writable + * @skb: buffer to check + * @len: length up to which to write, can be negative + * + * Returns true if modifying the header part of the cloned buffer + * does require the data to be copied. I.e. this version works with + * negative lengths needed for eBPF case! + */ +static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len) +{ + return skb_header_cloned(skb) || + (int) skb_headroom(skb) + len > skb->hdr_len; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; + int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; char buf[16]; @@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) * * so check for invalid 'offset' and too large 'len' */ - if (unlikely(offset > 0xffff || len > sizeof(buf))) + if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, buf); @@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) -static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a3abb719221f..572af0011997 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -16,7 +16,6 @@ #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/net_namespace.h> -#include <linux/rtnetlink.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> @@ -602,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) } err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_GETNSID, net, peer, -1); + RTM_NEWNSID, net, peer, -1); if (err < 0) goto err_out; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 358d52a38533..666e0928ba40 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask) + u32 flags, u32 mask, int nlflags) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); - nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) return -EMSGSIZE; @@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask) < 0) + skb, portid, seq, dev, filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask) < 0) + filter_mask, + NLM_F_MULTI) < 0) break; idx++; } @@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev) goto errout; } - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); if (err < 0) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3b6e5830256e..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -280,13 +280,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { @@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp = gfp_mask; if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); nc->frag.size = PAGE_SIZE << (page ? order : 0); } @@ -4124,19 +4142,21 @@ EXPORT_SYMBOL(skb_try_coalesce); */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { - if (xnet) - skb_orphan(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - skb->mark = 0; skb_sender_cpu_clear(skb); - skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); + + if (!xnet) + return; + + skb_orphan(skb); + skb->mark = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf325ca..292f42228bfb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1474,8 +1474,8 @@ void sk_release_kernel(struct sock *sk) return; sock_hold(sk); - sock_net_set(sk, get_net(&init_net)); sock_release(sk->sk_socket); + sock_net_set(sk, get_net(&init_net)); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); |