diff options
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 1120 |
1 files changed, 842 insertions, 278 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5d141f16f6fa..a91ad518c050 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -23,13 +23,16 @@ #include <linux/timekeeping.h> #include <linux/ctype.h> #include <linux/nospec.h> +#include <linux/audit.h> +#include <uapi/linux/btf.h> -#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ - (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ - (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ - (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) +#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) +#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) -#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) +#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ + IS_FD_HASH(map)) #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) @@ -42,7 +45,7 @@ static DEFINE_SPINLOCK(map_idr_lock); int sysctl_unprivileged_bpf_disabled __read_mostly; static const struct bpf_map_ops * const bpf_map_types[] = { -#define BPF_PROG_TYPE(_id, _ops) +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) #define BPF_MAP_TYPE(_id, _ops) \ [_id] = &_ops, #include <linux/bpf_types.h> @@ -126,7 +129,153 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } -void *bpf_map_area_alloc(size_t size, int numa_node) +static u32 bpf_map_value_size(struct bpf_map *map) +{ + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + return round_up(map->value_size, 8) * num_possible_cpus(); + else if (IS_FD_MAP(map)) + return sizeof(u32); + else + return map->value_size; +} + +static void maybe_wait_bpf_programs(struct bpf_map *map) +{ + /* Wait for any running BPF programs to complete so that + * userspace, when we return to it, knows that all programs + * that could be running use the new map value. + */ + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + synchronize_rcu(); +} + +static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, + void *value, __u64 flags) +{ + int err; + + /* Need to create a kthread, thus must support schedule */ + if (bpf_map_is_dev_bound(map)) { + return bpf_map_offload_update_elem(map, key, value, flags); + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || + map->map_type == BPF_MAP_TYPE_SOCKHASH || + map->map_type == BPF_MAP_TYPE_SOCKMAP || + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + return map->ops->map_update_elem(map, key, value, flags); + } else if (IS_FD_PROG_ARRAY(map)) { + return bpf_fd_array_map_update_elem(map, f.file, key, value, + flags); + } + + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from + * inside bpf map update or delete otherwise deadlocks are possible + */ + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + err = bpf_percpu_hash_update(map, key, value, flags); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_update(map, key, value, flags); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { + err = bpf_percpu_cgroup_storage_update(map, key, value, + flags); + } else if (IS_FD_ARRAY(map)) { + rcu_read_lock(); + err = bpf_fd_array_map_update_elem(map, f.file, key, value, + flags); + rcu_read_unlock(); + } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + rcu_read_lock(); + err = bpf_fd_htab_map_update_elem(map, f.file, key, value, + flags); + rcu_read_unlock(); + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { + /* rcu_read_lock() is not needed */ + err = bpf_fd_reuseport_array_update_elem(map, key, value, + flags); + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_push_elem(map, value, flags); + } else { + rcu_read_lock(); + err = map->ops->map_update_elem(map, key, value, flags); + rcu_read_unlock(); + } + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + maybe_wait_bpf_programs(map); + + return err; +} + +static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, + __u64 flags) +{ + void *ptr; + int err; + + if (bpf_map_is_dev_bound(map)) + return bpf_map_offload_lookup_elem(map, key, value); + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + err = bpf_percpu_hash_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { + err = bpf_percpu_cgroup_storage_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { + err = bpf_stackmap_copy(map, key, value); + } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { + err = bpf_fd_array_map_lookup_elem(map, key, value); + } else if (IS_FD_HASH(map)) { + err = bpf_fd_htab_map_lookup_elem(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { + err = bpf_fd_reuseport_array_lookup_elem(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_peek_elem(map, value); + } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + /* struct_ops map requires directly updating "value" */ + err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); + } else { + rcu_read_lock(); + if (map->ops->map_lookup_elem_sys_only) + ptr = map->ops->map_lookup_elem_sys_only(map, key); + else + ptr = map->ops->map_lookup_elem(map, key); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + } else if (!ptr) { + err = -ENOENT; + } else { + err = 0; + if (flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); + } + rcu_read_unlock(); + } + + this_cpu_dec(bpf_prog_active); + preempt_enable(); + maybe_wait_bpf_programs(map); + + return err; +} + +static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) { /* We really just want to fail instead of triggering OOM killer * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, @@ -141,18 +290,36 @@ void *bpf_map_area_alloc(size_t size, int numa_node) const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + if (size >= SIZE_MAX) + return NULL; + + /* kmalloc()'ed memory can't be mmap()'ed */ + if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, numa_node); if (area != NULL) return area; } - + if (mmapable) { + BUG_ON(!PAGE_ALIGNED(size)); + return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL | + __GFP_RETRY_MAYFAIL | flags); + } return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | __GFP_RETRY_MAYFAIL | flags, __builtin_return_address(0)); } +void *bpf_map_area_alloc(u64 size, int numa_node) +{ + return __bpf_map_area_alloc(size, numa_node, false); +} + +void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) +{ + return __bpf_map_area_alloc(size, numa_node, true); +} + void bpf_map_area_free(void *area) { kvfree(area); @@ -197,7 +364,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) atomic_long_sub(pages, &user->locked_vm); } -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size) +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) { u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; struct user_struct *user; @@ -310,7 +477,7 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { - if (atomic_dec_and_test(&map->usercnt)) { + if (atomic64_dec_and_test(&map->usercnt)) { if (map->ops->map_release_uref) map->ops->map_release_uref(map); } @@ -321,7 +488,7 @@ static void bpf_map_put_uref(struct bpf_map *map) */ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) { - if (atomic_dec_and_test(&map->refcnt)) { + if (atomic64_dec_and_test(&map->refcnt)) { /* bpf_map_free_id() must be called first */ bpf_map_free_id(map, do_idr_lock); btf_put(map->btf); @@ -370,13 +537,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_map *map = filp->private_data; const struct bpf_array *array; - u32 owner_prog_type = 0; - u32 owner_jited = 0; + u32 type = 0, jited = 0; if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); - owner_prog_type = array->owner_prog_type; - owner_jited = array->owner_jited; + type = array->aux->type; + jited = array->aux->jited; } seq_printf(m, @@ -396,12 +562,9 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->memory.pages * 1ULL << PAGE_SHIFT, map->id, READ_ONCE(map->frozen)); - - if (owner_prog_type) { - seq_printf(m, "owner_prog_type:\t%u\n", - owner_prog_type); - seq_printf(m, "owner_jited:\t%u\n", - owner_jited); + if (type) { + seq_printf(m, "owner_prog_type:\t%u\n", type); + seq_printf(m, "owner_jited:\t%u\n", jited); } } #endif @@ -424,6 +587,74 @@ static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, return -EINVAL; } +/* called for any extra memory-mapped regions (except initial) */ +static void bpf_map_mmap_open(struct vm_area_struct *vma) +{ + struct bpf_map *map = vma->vm_file->private_data; + + bpf_map_inc_with_uref(map); + + if (vma->vm_flags & VM_WRITE) { + mutex_lock(&map->freeze_mutex); + map->writecnt++; + mutex_unlock(&map->freeze_mutex); + } +} + +/* called for all unmapped memory region (including initial) */ +static void bpf_map_mmap_close(struct vm_area_struct *vma) +{ + struct bpf_map *map = vma->vm_file->private_data; + + if (vma->vm_flags & VM_WRITE) { + mutex_lock(&map->freeze_mutex); + map->writecnt--; + mutex_unlock(&map->freeze_mutex); + } + + bpf_map_put_with_uref(map); +} + +static const struct vm_operations_struct bpf_map_default_vmops = { + .open = bpf_map_mmap_open, + .close = bpf_map_mmap_close, +}; + +static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct bpf_map *map = filp->private_data; + int err; + + if (!map->ops->map_mmap || map_value_has_spin_lock(map)) + return -ENOTSUPP; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + mutex_lock(&map->freeze_mutex); + + if ((vma->vm_flags & VM_WRITE) && map->frozen) { + err = -EPERM; + goto out; + } + + /* set default open/close callbacks */ + vma->vm_ops = &bpf_map_default_vmops; + vma->vm_private_data = map; + + err = map->ops->map_mmap(map, vma); + if (err) + goto out; + + bpf_map_inc_with_uref(map); + + if (vma->vm_flags & VM_WRITE) + map->writecnt++; +out: + mutex_unlock(&map->freeze_mutex); + return err; +} + const struct file_operations bpf_map_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_map_show_fdinfo, @@ -431,6 +662,7 @@ const struct file_operations bpf_map_fops = { .release = bpf_map_release, .read = bpf_dummy_read, .write = bpf_dummy_write, + .mmap = bpf_map_mmap, }; int bpf_map_new_fd(struct bpf_map *map, int flags) @@ -542,7 +774,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id +#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -556,6 +788,14 @@ static int map_create(union bpf_attr *attr) if (err) return -EINVAL; + if (attr->btf_vmlinux_value_type_id) { + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || + attr->btf_key_type_id || attr->btf_value_type_id) + return -EINVAL; + } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { + return -EINVAL; + } + f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -574,35 +814,39 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map; - atomic_set(&map->refcnt, 1); - atomic_set(&map->usercnt, 1); - - if (attr->btf_key_type_id || attr->btf_value_type_id) { + atomic64_set(&map->refcnt, 1); + atomic64_set(&map->usercnt, 1); + mutex_init(&map->freeze_mutex); + + map->spin_lock_off = -EINVAL; + if (attr->btf_key_type_id || attr->btf_value_type_id || + /* Even the map's value is a kernel's struct, + * the bpf_prog.o must have BTF to begin with + * to figure out the corresponding kernel's + * counter part. Thus, attr->btf_fd has + * to be valid also. + */ + attr->btf_vmlinux_value_type_id) { struct btf *btf; - if (!attr->btf_value_type_id) { - err = -EINVAL; - goto free_map; - } - btf = btf_get_by_fd(attr->btf_fd); if (IS_ERR(btf)) { err = PTR_ERR(btf); goto free_map; } + map->btf = btf; - err = map_check_btf(map, btf, attr->btf_key_type_id, - attr->btf_value_type_id); - if (err) { - btf_put(btf); - goto free_map; + if (attr->btf_value_type_id) { + err = map_check_btf(map, btf, attr->btf_key_type_id, + attr->btf_value_type_id); + if (err) + goto free_map; } - map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; - } else { - map->spin_lock_off = -EINVAL; + map->btf_vmlinux_value_type_id = + attr->btf_vmlinux_value_type_id; } err = security_bpf_map_alloc(map); @@ -652,21 +896,19 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -/* prog's and map's refcnt limit */ -#define BPF_MAX_REFCNT 32768 - -struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) +void bpf_map_inc(struct bpf_map *map) { - if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { - atomic_dec(&map->refcnt); - return ERR_PTR(-EBUSY); - } - if (uref) - atomic_inc(&map->usercnt); - return map; + atomic64_inc(&map->refcnt); } EXPORT_SYMBOL_GPL(bpf_map_inc); +void bpf_map_inc_with_uref(struct bpf_map *map) +{ + atomic64_inc(&map->refcnt); + atomic64_inc(&map->usercnt); +} +EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); + struct bpf_map *bpf_map_get_with_uref(u32 ufd) { struct fd f = fdget(ufd); @@ -676,34 +918,36 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) if (IS_ERR(map)) return map; - map = bpf_map_inc(map, true); + bpf_map_inc_with_uref(map); fdput(f); return map; } /* map_idr_lock should have been held */ -static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, - bool uref) +static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) { int refold; - refold = atomic_fetch_add_unless(&map->refcnt, 1, 0); - - if (refold >= BPF_MAX_REFCNT) { - __bpf_map_put(map, false); - return ERR_PTR(-EBUSY); - } - + refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); if (!refold) return ERR_PTR(-ENOENT); - if (uref) - atomic_inc(&map->usercnt); + atomic64_inc(&map->usercnt); return map; } +struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) +{ + spin_lock_bh(&map_idr_lock); + map = __bpf_map_inc_not_zero(map, false); + spin_unlock_bh(&map_idr_lock); + + return map; +} +EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); + int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -729,7 +973,7 @@ static int map_lookup_elem(union bpf_attr *attr) void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; - void *key, *value, *ptr; + void *key, *value; u32 value_size; struct fd f; int err; @@ -761,72 +1005,14 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) - value_size = round_up(map->value_size, 8) * num_possible_cpus(); - else if (IS_FD_MAP(map)) - value_size = sizeof(u32); - else - value_size = map->value_size; + value_size = bpf_map_value_size(map); err = -ENOMEM; value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; - if (bpf_map_is_dev_bound(map)) { - err = bpf_map_offload_lookup_elem(map, key, value); - goto done; - } - - preempt_disable(); - this_cpu_inc(bpf_prog_active); - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { - err = bpf_percpu_hash_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - err = bpf_percpu_array_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { - err = bpf_percpu_cgroup_storage_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { - err = bpf_stackmap_copy(map, key, value); - } else if (IS_FD_ARRAY(map)) { - err = bpf_fd_array_map_lookup_elem(map, key, value); - } else if (IS_FD_HASH(map)) { - err = bpf_fd_htab_map_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { - err = bpf_fd_reuseport_array_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { - err = map->ops->map_peek_elem(map, value); - } else { - rcu_read_lock(); - if (map->ops->map_lookup_elem_sys_only) - ptr = map->ops->map_lookup_elem_sys_only(map, key); - else - ptr = map->ops->map_lookup_elem(map, key); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - } else if (!ptr) { - err = -ENOENT; - } else { - err = 0; - if (attr->flags & BPF_F_LOCK) - /* lock 'ptr' and copy everything but lock */ - copy_map_value_locked(map, value, ptr, true); - else - copy_map_value(map, value, ptr); - /* mask lock, since value wasn't zero inited */ - check_and_init_map_lock(map, value); - } - rcu_read_unlock(); - } - this_cpu_dec(bpf_prog_active); - preempt_enable(); - -done: + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -845,16 +1031,6 @@ err_put: return err; } -static void maybe_wait_bpf_programs(struct bpf_map *map) -{ - /* Wait for any running BPF programs to complete so that - * userspace, when we return to it, knows that all programs - * that could be running use the new map value. - */ - if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || - map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) - synchronize_rcu(); -} #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags @@ -910,56 +1086,8 @@ static int map_update_elem(union bpf_attr *attr) if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; - /* Need to create a kthread, thus must support schedule */ - if (bpf_map_is_dev_bound(map)) { - err = bpf_map_offload_update_elem(map, key, value, attr->flags); - goto out; - } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || - map->map_type == BPF_MAP_TYPE_SOCKHASH || - map->map_type == BPF_MAP_TYPE_SOCKMAP) { - err = map->ops->map_update_elem(map, key, value, attr->flags); - goto out; - } + err = bpf_map_update_value(map, f, key, value, attr->flags); - /* must increment bpf_prog_active to avoid kprobe+bpf triggering from - * inside bpf map update or delete otherwise deadlocks are possible - */ - preempt_disable(); - __this_cpu_inc(bpf_prog_active); - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { - err = bpf_percpu_hash_update(map, key, value, attr->flags); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - err = bpf_percpu_array_update(map, key, value, attr->flags); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { - err = bpf_percpu_cgroup_storage_update(map, key, value, - attr->flags); - } else if (IS_FD_ARRAY(map)) { - rcu_read_lock(); - err = bpf_fd_array_map_update_elem(map, f.file, key, value, - attr->flags); - rcu_read_unlock(); - } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - rcu_read_lock(); - err = bpf_fd_htab_map_update_elem(map, f.file, key, value, - attr->flags); - rcu_read_unlock(); - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { - /* rcu_read_lock() is not needed */ - err = bpf_fd_reuseport_array_update_elem(map, key, value, - attr->flags); - } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { - err = map->ops->map_push_elem(map, value, attr->flags); - } else { - rcu_read_lock(); - err = map->ops->map_update_elem(map, key, value, attr->flags); - rcu_read_unlock(); - } - __this_cpu_dec(bpf_prog_active); - preempt_enable(); - maybe_wait_bpf_programs(map); -out: free_value: kfree(value); free_key: @@ -1001,6 +1129,11 @@ static int map_delete_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_delete_elem(map, key); goto out; + } else if (IS_FD_PROG_ARRAY(map) || + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + /* These maps require sleepable context */ + err = map->ops->map_delete_elem(map, key); + goto out; } preempt_disable(); @@ -1085,6 +1218,220 @@ err_put: return err; } +int generic_map_delete_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *keys = u64_to_user_ptr(attr->batch.keys); + u32 cp, max_count; + int err = 0; + void *key; + + if (attr->batch.elem_flags & ~BPF_F_LOCK) + return -EINVAL; + + if ((attr->batch.elem_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + return -EINVAL; + } + + max_count = attr->batch.count; + if (!max_count) + return 0; + + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); + if (!key) + return -ENOMEM; + + for (cp = 0; cp < max_count; cp++) { + err = -EFAULT; + if (copy_from_user(key, keys + cp * map->key_size, + map->key_size)) + break; + + if (bpf_map_is_dev_bound(map)) { + err = bpf_map_offload_delete_elem(map, key); + break; + } + + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + rcu_read_lock(); + err = map->ops->map_delete_elem(map, key); + rcu_read_unlock(); + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + maybe_wait_bpf_programs(map); + if (err) + break; + } + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; + + kfree(key); + return err; +} + +int generic_map_update_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *values = u64_to_user_ptr(attr->batch.values); + void __user *keys = u64_to_user_ptr(attr->batch.keys); + u32 value_size, cp, max_count; + int ufd = attr->map_fd; + void *key, *value; + struct fd f; + int err = 0; + + f = fdget(ufd); + if (attr->batch.elem_flags & ~BPF_F_LOCK) + return -EINVAL; + + if ((attr->batch.elem_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + return -EINVAL; + } + + value_size = bpf_map_value_size(map); + + max_count = attr->batch.count; + if (!max_count) + return 0; + + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); + if (!key) + return -ENOMEM; + + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) { + kfree(key); + return -ENOMEM; + } + + for (cp = 0; cp < max_count; cp++) { + err = -EFAULT; + if (copy_from_user(key, keys + cp * map->key_size, + map->key_size) || + copy_from_user(value, values + cp * value_size, value_size)) + break; + + err = bpf_map_update_value(map, f, key, value, + attr->batch.elem_flags); + + if (err) + break; + } + + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; + + kfree(value); + kfree(key); + return err; +} + +#define MAP_LOOKUP_RETRIES 3 + +int generic_map_lookup_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); + void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); + void __user *values = u64_to_user_ptr(attr->batch.values); + void __user *keys = u64_to_user_ptr(attr->batch.keys); + void *buf, *buf_prevkey, *prev_key, *key, *value; + int err, retry = MAP_LOOKUP_RETRIES; + u32 value_size, cp, max_count; + + if (attr->batch.elem_flags & ~BPF_F_LOCK) + return -EINVAL; + + if ((attr->batch.elem_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) + return -EINVAL; + + value_size = bpf_map_value_size(map); + + max_count = attr->batch.count; + if (!max_count) + return 0; + + if (put_user(0, &uattr->batch.count)) + return -EFAULT; + + buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); + if (!buf_prevkey) + return -ENOMEM; + + buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); + if (!buf) { + kvfree(buf_prevkey); + return -ENOMEM; + } + + err = -EFAULT; + prev_key = NULL; + if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) + goto free_buf; + key = buf; + value = key + map->key_size; + if (ubatch) + prev_key = buf_prevkey; + + for (cp = 0; cp < max_count;) { + rcu_read_lock(); + err = map->ops->map_get_next_key(map, prev_key, key); + rcu_read_unlock(); + if (err) + break; + err = bpf_map_copy_value(map, key, value, + attr->batch.elem_flags); + + if (err == -ENOENT) { + if (retry) { + retry--; + continue; + } + err = -EINTR; + break; + } + + if (err) + goto free_buf; + + if (copy_to_user(keys + cp * map->key_size, key, + map->key_size)) { + err = -EFAULT; + goto free_buf; + } + if (copy_to_user(values + cp * value_size, value, value_size)) { + err = -EFAULT; + goto free_buf; + } + + if (!prev_key) + prev_key = buf_prevkey; + + swap(prev_key, key); + retry = MAP_LOOKUP_RETRIES; + cp++; + } + + if (err == -EFAULT) + goto free_buf; + + if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || + (cp && copy_to_user(uobatch, prev_key, map->key_size)))) + err = -EFAULT; + +free_buf: + kfree(buf_prevkey); + kfree(buf); + return err; +} + #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value static int map_lookup_and_delete_elem(union bpf_attr *attr) @@ -1162,6 +1509,13 @@ static int map_freeze(const union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + + mutex_lock(&map->freeze_mutex); + + if (map->writecnt) { + err = -EBUSY; + goto err_put; + } if (READ_ONCE(map->frozen)) { err = -EBUSY; goto err_put; @@ -1173,12 +1527,13 @@ static int map_freeze(const union bpf_attr *attr) WRITE_ONCE(map->frozen, true); err_put: + mutex_unlock(&map->freeze_mutex); fdput(f); return err; } static const struct bpf_prog_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _prog_ops, #define BPF_MAP_TYPE(_id, _ops) #include <linux/bpf_types.h> @@ -1205,23 +1560,34 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) return 0; } -/* drop refcnt on maps used by eBPF program and free auxilary data */ -static void free_used_maps(struct bpf_prog_aux *aux) -{ - enum bpf_cgroup_storage_type stype; - int i; +enum bpf_audit { + BPF_AUDIT_LOAD, + BPF_AUDIT_UNLOAD, + BPF_AUDIT_MAX, +}; - for_each_cgroup_storage_type(stype) { - if (!aux->cgroup_storage[stype]) - continue; - bpf_cgroup_storage_release(aux->prog, - aux->cgroup_storage[stype]); - } +static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { + [BPF_AUDIT_LOAD] = "LOAD", + [BPF_AUDIT_UNLOAD] = "UNLOAD", +}; - for (i = 0; i < aux->used_map_cnt; i++) - bpf_map_put(aux->used_maps[i]); +static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) +{ + struct audit_context *ctx = NULL; + struct audit_buffer *ab; - kfree(aux->used_maps); + if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) + return; + if (audit_enabled == AUDIT_OFF) + return; + if (op == BPF_AUDIT_LOAD) + ctx = audit_context(); + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); + if (unlikely(!ab)) + return; + audit_log_format(ab, "prog-id=%u op=%s", + prog->aux->id, bpf_audit_str[op]); + audit_log_end(ab); } int __bpf_prog_charge(struct user_struct *user, u32 pages) @@ -1316,24 +1682,33 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); - free_used_maps(aux); + kvfree(aux->func_info); + kfree(aux->func_info_aux); bpf_prog_uncharge_memlock(aux->prog); security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } +static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) +{ + bpf_prog_kallsyms_del_all(prog); + btf_put(prog->aux->btf); + bpf_prog_free_linfo(prog); + + if (deferred) + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + else + __bpf_prog_put_rcu(&prog->aux->rcu); +} + static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { - if (atomic_dec_and_test(&prog->aux->refcnt)) { + if (atomic64_dec_and_test(&prog->aux->refcnt)) { perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - bpf_prog_kallsyms_del_all(prog); - btf_put(prog->aux->btf); - kvfree(prog->aux->func_info); - bpf_prog_free_linfo(prog); - - call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + __bpf_prog_put_noref(prog, true); } } @@ -1435,13 +1810,9 @@ static struct bpf_prog *____bpf_prog_get(struct fd f) return f.file->private_data; } -struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) +void bpf_prog_add(struct bpf_prog *prog, int i) { - if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { - atomic_sub(i, &prog->aux->refcnt); - return ERR_PTR(-EBUSY); - } - return prog; + atomic64_add(i, &prog->aux->refcnt); } EXPORT_SYMBOL_GPL(bpf_prog_add); @@ -1452,13 +1823,13 @@ void bpf_prog_sub(struct bpf_prog *prog, int i) * path holds a reference to the program, thus atomic_sub() can * be safely used in such cases! */ - WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); + WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); } EXPORT_SYMBOL_GPL(bpf_prog_sub); -struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +void bpf_prog_inc(struct bpf_prog *prog) { - return bpf_prog_add(prog, 1); + atomic64_inc(&prog->aux->refcnt); } EXPORT_SYMBOL_GPL(bpf_prog_inc); @@ -1467,12 +1838,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) { int refold; - refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0); - - if (refold >= BPF_MAX_REFCNT) { - __bpf_prog_put(prog, false); - return ERR_PTR(-EBUSY); - } + refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); if (!refold) return ERR_PTR(-ENOENT); @@ -1510,7 +1876,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, goto out; } - prog = bpf_prog_inc(prog); + bpf_prog_inc(prog); out: fdput(f); return prog; @@ -1555,9 +1921,28 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) } static int -bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, - enum bpf_attach_type expected_attach_type) +bpf_prog_load_check_attach(enum bpf_prog_type prog_type, + enum bpf_attach_type expected_attach_type, + u32 btf_id, u32 prog_fd) { + if (btf_id) { + if (btf_id > BTF_MAX_TYPE) + return -EINVAL; + + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_EXT: + break; + default: + return -EINVAL; + } + } + + if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && + prog_type != BPF_PROG_TYPE_EXT) + return -EINVAL; + switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK: switch (expected_attach_type) { @@ -1598,13 +1983,17 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_EXT: + if (expected_attach_type) + return -EINVAL; + /* fallthrough */ default: return 0; } } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt +#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -1619,6 +2008,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | + BPF_F_TEST_STATE_FREQ | BPF_F_TEST_RND_HI32)) return -EINVAL; @@ -1645,7 +2035,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) return -EPERM; bpf_prog_load_fixup_attach_type(attr); - if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type)) + if (bpf_prog_load_check_attach(type, attr->expected_attach_type, + attr->attach_btf_id, + attr->attach_prog_fd)) return -EINVAL; /* plain bpf_prog allocation */ @@ -1654,6 +2046,17 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) return -ENOMEM; prog->expected_attach_type = attr->expected_attach_type; + prog->aux->attach_btf_id = attr->attach_btf_id; + if (attr->attach_prog_fd) { + struct bpf_prog *tgt_prog; + + tgt_prog = bpf_prog_get(attr->attach_prog_fd); + if (IS_ERR(tgt_prog)) { + err = PTR_ERR(tgt_prog); + goto free_prog_nouncharge; + } + prog->aux->linked_prog = tgt_prog; + } prog->aux->offload_requested = !!attr->prog_ifindex; @@ -1675,7 +2078,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) prog->orig_prog = NULL; prog->jited = 0; - atomic_set(&prog->aux->refcnt, 1); + atomic64_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; if (bpf_prog_is_dev_bound(prog->aux)) { @@ -1707,28 +2110,36 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (err) goto free_used_maps; - err = bpf_prog_new_fd(prog); - if (err < 0) { - /* failed to allocate fd. - * bpf_prog_put() is needed because the above - * bpf_prog_alloc_id() has published the prog - * to the userspace and the userspace may - * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. - */ - bpf_prog_put(prog); - return err; - } - + /* Upon success of bpf_prog_alloc_id(), the BPF prog is + * effectively publicly exposed. However, retrieving via + * bpf_prog_get_fd_by_id() will take another reference, + * therefore it cannot be gone underneath us. + * + * Only for the time /after/ successful bpf_prog_new_fd() + * and before returning to userspace, we might just hold + * one reference and any parallel close on that fd could + * rip everything out. Hence, below notifications must + * happen before bpf_prog_new_fd(). + * + * Also, any failure handling from this point onwards must + * be using bpf_prog_put() given the program is exposed. + */ bpf_prog_kallsyms_add(prog); perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_LOAD); + + err = bpf_prog_new_fd(prog); + if (err < 0) + bpf_prog_put(prog); return err; free_used_maps: - bpf_prog_free_linfo(prog); - kvfree(prog->aux->func_info); - btf_put(prog->aux->btf); - bpf_prog_kallsyms_del_subprogs(prog); - free_used_maps(prog->aux); + /* In case we have subprogs, we need to wait for a grace + * period before we can tear down JIT memory since symbols + * are already exposed under kallsyms. + */ + __bpf_prog_put_noref(prog, prog->aux->func_cnt); + return err; free_prog: bpf_prog_uncharge_memlock(prog); free_prog_sec: @@ -1758,6 +2169,50 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } +static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) +{ + struct bpf_prog *prog = filp->private_data; + + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); + bpf_prog_put(prog); + return 0; +} + +static const struct file_operations bpf_tracing_prog_fops = { + .release = bpf_tracing_prog_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, +}; + +static int bpf_tracing_prog_attach(struct bpf_prog *prog) +{ + int tr_fd, err; + + if (prog->expected_attach_type != BPF_TRACE_FENTRY && + prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->type != BPF_PROG_TYPE_EXT) { + err = -EINVAL; + goto out_put_prog; + } + + err = bpf_trampoline_link_prog(prog); + if (err) + goto out_put_prog; + + tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops, + prog, O_CLOEXEC); + if (tr_fd < 0) { + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); + err = tr_fd; + goto out_put_prog; + } + return tr_fd; + +out_put_prog: + bpf_prog_put(prog); + return err; +} + struct bpf_raw_tracepoint { struct bpf_raw_event_map *btp; struct bpf_prog *prog; @@ -1789,17 +2244,54 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) struct bpf_raw_tracepoint *raw_tp; struct bpf_raw_event_map *btp; struct bpf_prog *prog; - char tp_name[128]; + const char *tp_name; + char buf[128]; int tp_fd, err; - if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), - sizeof(tp_name) - 1) < 0) - return -EFAULT; - tp_name[sizeof(tp_name) - 1] = 0; + if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) + return -EINVAL; + + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_EXT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } + + if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { + if (attr->raw_tracepoint.name) { + /* The attach point for this category of programs + * should be specified via btf_id during program load. + */ + err = -EINVAL; + goto out_put_prog; + } + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) + tp_name = prog->aux->attach_func_name; + else + return bpf_tracing_prog_attach(prog); + } else { + if (strncpy_from_user(buf, + u64_to_user_ptr(attr->raw_tracepoint.name), + sizeof(buf) - 1) < 0) { + err = -EFAULT; + goto out_put_prog; + } + buf[sizeof(buf) - 1] = 0; + tp_name = buf; + } btp = bpf_get_raw_tracepoint(tp_name); - if (!btp) - return -ENOENT; + if (!btp) { + err = -ENOENT; + goto out_put_prog; + } raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); if (!raw_tp) { @@ -1807,38 +2299,27 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) goto out_put_btp; } raw_tp->btp = btp; - - prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); - if (IS_ERR(prog)) { - err = PTR_ERR(prog); - goto out_free_tp; - } - if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && - prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { - err = -EINVAL; - goto out_put_prog; - } + raw_tp->prog = prog; err = bpf_probe_register(raw_tp->btp, prog); if (err) - goto out_put_prog; + goto out_free_tp; - raw_tp->prog = prog; tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, O_CLOEXEC); if (tp_fd < 0) { bpf_probe_unregister(raw_tp->btp, prog); err = tp_fd; - goto out_put_prog; + goto out_free_tp; } return tp_fd; -out_put_prog: - bpf_prog_put(prog); out_free_tp: kfree(raw_tp); out_put_btp: bpf_put_raw_tracepoint(btp); +out_put_prog: + bpf_prog_put(prog); return err; } @@ -1859,10 +2340,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, } } -#define BPF_PROG_ATTACH_LAST_FIELD attach_flags +#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd #define BPF_F_ATTACH_MASK \ - (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) static int bpf_prog_attach(const union bpf_attr *attr) { @@ -2124,17 +2605,12 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr, #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id -static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) +struct bpf_prog *bpf_prog_by_id(u32 id) { struct bpf_prog *prog; - u32 id = attr->prog_id; - int fd; - if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + if (!id) + return ERR_PTR(-ENOENT); spin_lock_bh(&prog_idr_lock); prog = idr_find(&prog_idr, id); @@ -2143,7 +2619,22 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) else prog = ERR_PTR(-ENOENT); spin_unlock_bh(&prog_idr_lock); + return prog; +} + +static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + u32 id = attr->prog_id; + int fd; + if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + prog = bpf_prog_by_id(id); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -2177,7 +2668,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) spin_lock_bh(&map_idr_lock); map = idr_find(&map_idr, id); if (map) - map = bpf_map_inc_not_zero(map, true); + map = __bpf_map_inc_not_zero(map, true); else map = ERR_PTR(-ENOENT); spin_unlock_bh(&map_idr_lock); @@ -2593,6 +3084,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.btf_key_type_id = map->btf_key_type_id; info.btf_value_type_id = map->btf_value_type_id; } + info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_info_fill(&info, map); @@ -2805,6 +3297,61 @@ out: return err; } +#define BPF_MAP_BATCH_LAST_FIELD batch.flags + +#define BPF_DO_BATCH(fn) \ + do { \ + if (!fn) { \ + err = -ENOTSUPP; \ + goto err_put; \ + } \ + err = fn(map, attr, uattr); \ + } while (0) + +static int bpf_map_do_batch(const union bpf_attr *attr, + union bpf_attr __user *uattr, + int cmd) +{ + struct bpf_map *map; + int err, ufd; + struct fd f; + + if (CHECK_ATTR(BPF_MAP_BATCH)) + return -EINVAL; + + ufd = attr->batch.map_fd; + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + if ((cmd == BPF_MAP_LOOKUP_BATCH || + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && + !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } + + if (cmd != BPF_MAP_LOOKUP_BATCH && + !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + + if (cmd == BPF_MAP_LOOKUP_BATCH) + BPF_DO_BATCH(map->ops->map_lookup_batch); + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) + BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); + else if (cmd == BPF_MAP_UPDATE_BATCH) + BPF_DO_BATCH(map->ops->map_update_batch); + else + BPF_DO_BATCH(map->ops->map_delete_batch); + +err_put: + fdput(f); + return err; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2874,6 +3421,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock); break; + case BPF_BTF_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &btf_idr, &btf_idr_lock); + break; case BPF_PROG_GET_FD_BY_ID: err = bpf_prog_get_fd_by_id(&attr); break; @@ -2898,6 +3449,19 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_MAP_LOOKUP_AND_DELETE_ELEM: err = map_lookup_and_delete_elem(&attr); break; + case BPF_MAP_LOOKUP_BATCH: + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH); + break; + case BPF_MAP_LOOKUP_AND_DELETE_BATCH: + err = bpf_map_do_batch(&attr, uattr, + BPF_MAP_LOOKUP_AND_DELETE_BATCH); + break; + case BPF_MAP_UPDATE_BATCH: + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); + break; + case BPF_MAP_DELETE_BATCH: + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); + break; default: err = -EINVAL; break; |