summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/filter.c41
-rw-r--r--net/core/net_namespace.c3
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c40
-rw-r--r--net/core/sock.c2
6 files changed, 79 insertions, 35 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index af4a1b0adc10..2c1c67fad64d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- if (netif_needs_gso(dev, skb, features)) {
+ if (netif_needs_gso(skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
@@ -3079,7 +3079,7 @@ static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
- if (next_cpu != RPS_NO_CPU) {
+ if (next_cpu < nr_cpu_ids) {
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
@@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
* If the desired CPU (where last recvmsg was done) is
* different from current CPU (one in the rx-queue flow
* table entry), switch if one of the following holds:
- * - Current CPU is unset (equal to RPS_NO_CPU).
+ * - Current CPU is unset (>= nr_cpu_ids).
* - Current CPU is offline.
* - The current CPU's queue tail has advanced beyond the
* last packet that was enqueued using this table entry.
@@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
* have been dequeued, thus preserving in order delivery.
*/
if (unlikely(tcpu != next_cpu) &&
- (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+ (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
rflow->last_qtail)) >= 0)) {
tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
}
- if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+ if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
goto done;
@@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
struct rps_dev_flow_table *flow_table;
struct rps_dev_flow *rflow;
bool expire = true;
- int cpu;
+ unsigned int cpu;
rcu_read_lock();
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
cpu = ACCESS_ONCE(rflow->cpu);
- if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+ if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
(int)(10 * flow_table->mask)))
@@ -5209,7 +5209,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
diff --git a/net/core/filter.c b/net/core/filter.c
index b669e75d2b36..bf831a85c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
+/**
+ * bpf_skb_clone_not_writable - is the header of a clone not writable
+ * @skb: buffer to check
+ * @len: length up to which to write, can be negative
+ *
+ * Returns true if modifying the header part of the cloned buffer
+ * does require the data to be copied. I.e. this version works with
+ * negative lengths needed for eBPF case!
+ */
+static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
+{
+ return skb_header_cloned(skb) ||
+ (int) skb_headroom(skb) + len > skb->hdr_len;
+}
+
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- unsigned int offset = (unsigned int) r2;
+ int offset = (int) r2;
void *from = (void *) (long) r3;
unsigned int len = (unsigned int) r4;
char buf[16];
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*
* so check for invalid 'offset' and too large 'len'
*/
- if (unlikely(offset > 0xffff || len > sizeof(buf)))
+ if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
-static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ int offset = (int) r2;
__sum16 sum, *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ int offset = (int) r2;
__sum16 sum, *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a3abb719221f..572af0011997 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,7 +16,6 @@
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/net_namespace.h>
-#include <linux/rtnetlink.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
@@ -602,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
}
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_GETNSID, net, peer, -1);
+ RTM_NEWNSID, net, peer, -1);
if (err < 0)
goto err_out;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 358d52a38533..666e0928ba40 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2854,7 +2854,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode,
- u32 flags, u32 mask)
+ u32 flags, u32 mask, int nlflags)
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
@@ -2863,7 +2863,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
- nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2969,7 +2969,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
if (idx >= cb->args[0] &&
br_dev->netdev_ops->ndo_bridge_getlink(
- skb, portid, seq, dev, filter_mask) < 0)
+ skb, portid, seq, dev, filter_mask,
+ NLM_F_MULTI) < 0)
break;
idx++;
}
@@ -2977,7 +2978,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
if (ops->ndo_bridge_getlink) {
if (idx >= cb->args[0] &&
ops->ndo_bridge_getlink(skb, portid, seq, dev,
- filter_mask) < 0)
+ filter_mask,
+ NLM_F_MULTI) < 0)
break;
idx++;
}
@@ -3018,7 +3020,7 @@ static int rtnl_bridge_notify(struct net_device *dev)
goto errout;
}
- err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0);
if (err < 0)
goto errout;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3b6e5830256e..3cfff2a3d651 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -280,13 +280,14 @@ nodata:
EXPORT_SYMBOL(__alloc_skb);
/**
- * build_skb - build a network buffer
+ * __build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of fragment, or 0 if head was kmalloced
+ * @frag_size: size of data, or 0 if head was kmalloced
*
* Allocate a new &sk_buff. Caller provides space holding head and
* skb_shared_info. @data must have been allocated by kmalloc() only if
- * @frag_size is 0, otherwise data should come from the page allocator.
+ * @frag_size is 0, otherwise data should come from the page allocator
+ * or vmalloc()
* The return is the new skb buffer.
* On a failure the return is %NULL, and @data is not freed.
* Notes :
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb);
* before giving packet to stack.
* RX rings only contains data buffers, not full skbs.
*/
-struct sk_buff *build_skb(void *data, unsigned int frag_size)
+struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct skb_shared_info *shinfo;
struct sk_buff *skb;
@@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = SKB_TRUESIZE(size);
- skb->head_frag = frag_size != 0;
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
@@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
return skb;
}
+
+/* build_skb() is wrapper over __build_skb(), that specifically
+ * takes care of skb->head and skb->pfmemalloc
+ * This means that if @frag_size is not zero, then @data must be backed
+ * by a page fragment, not kmalloc() or vmalloc()
+ */
+struct sk_buff *build_skb(void *data, unsigned int frag_size)
+{
+ struct sk_buff *skb = __build_skb(data, frag_size);
+
+ if (skb && frag_size) {
+ skb->head_frag = 1;
+ if (virt_to_head_page(data)->pfmemalloc)
+ skb->pfmemalloc = 1;
+ }
+ return skb;
+}
EXPORT_SYMBOL(build_skb);
struct netdev_alloc_cache {
@@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
gfp_t gfp = gfp_mask;
if (order) {
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
+ gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
nc->frag.size = PAGE_SIZE << (page ? order : 0);
}
@@ -4124,19 +4142,21 @@ EXPORT_SYMBOL(skb_try_coalesce);
*/
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- if (xnet)
- skb_orphan(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
skb_dst_drop(skb);
- skb->mark = 0;
skb_sender_cpu_clear(skb);
- skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
+
+ if (!xnet)
+ return;
+
+ skb_orphan(skb);
+ skb->mark = 0;
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/core/sock.c b/net/core/sock.c
index e891bcf325ca..292f42228bfb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1474,8 +1474,8 @@ void sk_release_kernel(struct sock *sk)
return;
sock_hold(sk);
- sock_net_set(sk, get_net(&init_net));
sock_release(sk->sk_socket);
+ sock_net_set(sk, get_net(&init_net));
sock_put(sk);
}
EXPORT_SYMBOL(sk_release_kernel);
OpenPOWER on IntegriCloud