diff options
author | Jesper Dangaard Brouer <brouer@redhat.com> | 2019-04-12 17:07:43 +0200 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-04-17 19:09:25 -0700 |
commit | 8f0504a97e1ba6b70e1c8b5a88255c280f263287 (patch) | |
tree | 762bb6478b65c42ba2e0e79f52d8421cc0dcb707 /kernel/bpf | |
parent | ba0509b6881efd0c8b26c36490cba87d8fb324c0 (diff) | |
download | blackbird-op-linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.tar.gz blackbird-op-linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.zip |
bpf: cpumap do bulk allocation of SKBs
As cpumap now batch consume xdp_frame's from the ptr_ring, it knows how many
SKBs it need to allocate. Thus, lets bulk allocate these SKBs via
kmem_cache_alloc_bulk() API, and use the previously introduced function
build_skb_around().
Notice that the flag __GFP_ZERO asks the slab/slub allocator to clear the
memory for us. This does clear a larger area than needed, but my micro
benchmarks on Intel CPUs show that this is slightly faster due to being a
cacheline aligned area is cleared for the SKBs. (For SLUB allocator, there
is a future optimization potential, because SKBs will with high probability
originate from same page. If we can find/identify continuous memory areas
then the Intel CPU memset rep stos will have a real performance gain.)
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/cpumap.c | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 430103e182a0..732d6ced3987 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work) } static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, - struct xdp_frame *xdpf) + struct xdp_frame *xdpf, + struct sk_buff *skb) { unsigned int hard_start_headroom; unsigned int frame_size; void *pkt_data_start; - struct sk_buff *skb; /* Part of headroom was reserved to xdpf */ hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; @@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); pkt_data_start = xdpf->data - hard_start_headroom; - skb = build_skb(pkt_data_start, frame_size); - if (!skb) + skb = build_skb_around(skb, pkt_data_start, frame_size); + if (unlikely(!skb)) return NULL; skb_reserve(skb, hard_start_headroom); @@ -256,7 +256,9 @@ static int cpu_map_kthread_run(void *data) while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { unsigned int drops = 0, sched = 0; void *frames[CPUMAP_BATCH]; - int i, n; + void *skbs[CPUMAP_BATCH]; + gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; + int i, n, m; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { @@ -278,14 +280,20 @@ static int cpu_map_kthread_run(void *data) * consume side valid as no-resize allowed of queue. */ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); + m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs); + if (unlikely(m == 0)) { + for (i = 0; i < n; i++) + skbs[i] = NULL; /* effect: xdp_return_frame */ + drops = n; + } local_bh_disable(); for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; - struct sk_buff *skb; + struct sk_buff *skb = skbs[i]; int ret; - skb = cpu_map_build_skb(rcpu, xdpf); + skb = cpu_map_build_skb(rcpu, xdpf, skb); if (!skb) { xdp_return_frame(xdpf); continue; |