summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c1120
1 files changed, 842 insertions, 278 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5d141f16f6fa..a91ad518c050 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,13 +23,16 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
+#include <linux/audit.h>
+#include <uapi/linux/btf.h>
-#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
-#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
+ IS_FD_HASH(map))
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
@@ -42,7 +45,7 @@ static DEFINE_SPINLOCK(map_idr_lock);
int sysctl_unprivileged_bpf_disabled __read_mostly;
static const struct bpf_map_ops * const bpf_map_types[] = {
-#define BPF_PROG_TYPE(_id, _ops)
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
#define BPF_MAP_TYPE(_id, _ops) \
[_id] = &_ops,
#include <linux/bpf_types.h>
@@ -126,7 +129,153 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-void *bpf_map_area_alloc(size_t size, int numa_node)
+static u32 bpf_map_value_size(struct bpf_map *map)
+{
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+ return round_up(map->value_size, 8) * num_possible_cpus();
+ else if (IS_FD_MAP(map))
+ return sizeof(u32);
+ else
+ return map->value_size;
+}
+
+static void maybe_wait_bpf_programs(struct bpf_map *map)
+{
+ /* Wait for any running BPF programs to complete so that
+ * userspace, when we return to it, knows that all programs
+ * that could be running use the new map value.
+ */
+ if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
+ map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+ synchronize_rcu();
+}
+
+static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
+ void *value, __u64 flags)
+{
+ int err;
+
+ /* Need to create a kthread, thus must support schedule */
+ if (bpf_map_is_dev_bound(map)) {
+ return bpf_map_offload_update_elem(map, key, value, flags);
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+ map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+ map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+ map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ return map->ops->map_update_elem(map, key, value, flags);
+ } else if (IS_FD_PROG_ARRAY(map)) {
+ return bpf_fd_array_map_update_elem(map, f.file, key, value,
+ flags);
+ }
+
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+ * inside bpf map update or delete otherwise deadlocks are possible
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ err = bpf_percpu_hash_update(map, key, value, flags);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ err = bpf_percpu_array_update(map, key, value, flags);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+ err = bpf_percpu_cgroup_storage_update(map, key, value,
+ flags);
+ } else if (IS_FD_ARRAY(map)) {
+ rcu_read_lock();
+ err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+ flags);
+ rcu_read_unlock();
+ } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ rcu_read_lock();
+ err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+ flags);
+ rcu_read_unlock();
+ } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+ /* rcu_read_lock() is not needed */
+ err = bpf_fd_reuseport_array_update_elem(map, key, value,
+ flags);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_push_elem(map, value, flags);
+ } else {
+ rcu_read_lock();
+ err = map->ops->map_update_elem(map, key, value, flags);
+ rcu_read_unlock();
+ }
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ maybe_wait_bpf_programs(map);
+
+ return err;
+}
+
+static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
+ __u64 flags)
+{
+ void *ptr;
+ int err;
+
+ if (bpf_map_is_dev_bound(map))
+ return bpf_map_offload_lookup_elem(map, key, value);
+
+ preempt_disable();
+ this_cpu_inc(bpf_prog_active);
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ err = bpf_percpu_hash_copy(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ err = bpf_percpu_array_copy(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+ err = bpf_percpu_cgroup_storage_copy(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+ err = bpf_stackmap_copy(map, key, value);
+ } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
+ err = bpf_fd_array_map_lookup_elem(map, key, value);
+ } else if (IS_FD_HASH(map)) {
+ err = bpf_fd_htab_map_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+ err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_peek_elem(map, value);
+ } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ /* struct_ops map requires directly updating "value" */
+ err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+ } else {
+ rcu_read_lock();
+ if (map->ops->map_lookup_elem_sys_only)
+ ptr = map->ops->map_lookup_elem_sys_only(map, key);
+ else
+ ptr = map->ops->map_lookup_elem(map, key);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ } else if (!ptr) {
+ err = -ENOENT;
+ } else {
+ err = 0;
+ if (flags & BPF_F_LOCK)
+ /* lock 'ptr' and copy everything but lock */
+ copy_map_value_locked(map, value, ptr, true);
+ else
+ copy_map_value(map, value, ptr);
+ /* mask lock, since value wasn't zero inited */
+ check_and_init_map_lock(map, value);
+ }
+ rcu_read_unlock();
+ }
+
+ this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ maybe_wait_bpf_programs(map);
+
+ return err;
+}
+
+static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
@@ -141,18 +290,36 @@ void *bpf_map_area_alloc(size_t size, int numa_node)
const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ if (size >= SIZE_MAX)
+ return NULL;
+
+ /* kmalloc()'ed memory can't be mmap()'ed */
+ if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
numa_node);
if (area != NULL)
return area;
}
-
+ if (mmapable) {
+ BUG_ON(!PAGE_ALIGNED(size));
+ return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
+ __GFP_RETRY_MAYFAIL | flags);
+ }
return __vmalloc_node_flags_caller(size, numa_node,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
flags, __builtin_return_address(0));
}
+void *bpf_map_area_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, false);
+}
+
+void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, true);
+}
+
void bpf_map_area_free(void *area)
{
kvfree(area);
@@ -197,7 +364,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
atomic_long_sub(pages, &user->locked_vm);
}
-int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
+int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
{
u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
struct user_struct *user;
@@ -310,7 +477,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
- if (atomic_dec_and_test(&map->usercnt)) {
+ if (atomic64_dec_and_test(&map->usercnt)) {
if (map->ops->map_release_uref)
map->ops->map_release_uref(map);
}
@@ -321,7 +488,7 @@ static void bpf_map_put_uref(struct bpf_map *map)
*/
static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
{
- if (atomic_dec_and_test(&map->refcnt)) {
+ if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map, do_idr_lock);
btf_put(map->btf);
@@ -370,13 +537,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_map *map = filp->private_data;
const struct bpf_array *array;
- u32 owner_prog_type = 0;
- u32 owner_jited = 0;
+ u32 type = 0, jited = 0;
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
array = container_of(map, struct bpf_array, map);
- owner_prog_type = array->owner_prog_type;
- owner_jited = array->owner_jited;
+ type = array->aux->type;
+ jited = array->aux->jited;
}
seq_printf(m,
@@ -396,12 +562,9 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->memory.pages * 1ULL << PAGE_SHIFT,
map->id,
READ_ONCE(map->frozen));
-
- if (owner_prog_type) {
- seq_printf(m, "owner_prog_type:\t%u\n",
- owner_prog_type);
- seq_printf(m, "owner_jited:\t%u\n",
- owner_jited);
+ if (type) {
+ seq_printf(m, "owner_prog_type:\t%u\n", type);
+ seq_printf(m, "owner_jited:\t%u\n", jited);
}
}
#endif
@@ -424,6 +587,74 @@ static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
return -EINVAL;
}
+/* called for any extra memory-mapped regions (except initial) */
+static void bpf_map_mmap_open(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt++;
+ mutex_unlock(&map->freeze_mutex);
+ }
+}
+
+/* called for all unmapped memory region (including initial) */
+static void bpf_map_mmap_close(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt--;
+ mutex_unlock(&map->freeze_mutex);
+ }
+
+ bpf_map_put_with_uref(map);
+}
+
+static const struct vm_operations_struct bpf_map_default_vmops = {
+ .open = bpf_map_mmap_open,
+ .close = bpf_map_mmap_close,
+};
+
+static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct bpf_map *map = filp->private_data;
+ int err;
+
+ if (!map->ops->map_mmap || map_value_has_spin_lock(map))
+ return -ENOTSUPP;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ mutex_lock(&map->freeze_mutex);
+
+ if ((vma->vm_flags & VM_WRITE) && map->frozen) {
+ err = -EPERM;
+ goto out;
+ }
+
+ /* set default open/close callbacks */
+ vma->vm_ops = &bpf_map_default_vmops;
+ vma->vm_private_data = map;
+
+ err = map->ops->map_mmap(map, vma);
+ if (err)
+ goto out;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE)
+ map->writecnt++;
+out:
+ mutex_unlock(&map->freeze_mutex);
+ return err;
+}
+
const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
@@ -431,6 +662,7 @@ const struct file_operations bpf_map_fops = {
.release = bpf_map_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
+ .mmap = bpf_map_mmap,
};
int bpf_map_new_fd(struct bpf_map *map, int flags)
@@ -542,7 +774,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return ret;
}
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -556,6 +788,14 @@ static int map_create(union bpf_attr *attr)
if (err)
return -EINVAL;
+ if (attr->btf_vmlinux_value_type_id) {
+ if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
+ attr->btf_key_type_id || attr->btf_value_type_id)
+ return -EINVAL;
+ } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+ return -EINVAL;
+ }
+
f_flags = bpf_get_file_flag(attr->map_flags);
if (f_flags < 0)
return f_flags;
@@ -574,35 +814,39 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map;
- atomic_set(&map->refcnt, 1);
- atomic_set(&map->usercnt, 1);
-
- if (attr->btf_key_type_id || attr->btf_value_type_id) {
+ atomic64_set(&map->refcnt, 1);
+ atomic64_set(&map->usercnt, 1);
+ mutex_init(&map->freeze_mutex);
+
+ map->spin_lock_off = -EINVAL;
+ if (attr->btf_key_type_id || attr->btf_value_type_id ||
+ /* Even the map's value is a kernel's struct,
+ * the bpf_prog.o must have BTF to begin with
+ * to figure out the corresponding kernel's
+ * counter part. Thus, attr->btf_fd has
+ * to be valid also.
+ */
+ attr->btf_vmlinux_value_type_id) {
struct btf *btf;
- if (!attr->btf_value_type_id) {
- err = -EINVAL;
- goto free_map;
- }
-
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto free_map;
}
+ map->btf = btf;
- err = map_check_btf(map, btf, attr->btf_key_type_id,
- attr->btf_value_type_id);
- if (err) {
- btf_put(btf);
- goto free_map;
+ if (attr->btf_value_type_id) {
+ err = map_check_btf(map, btf, attr->btf_key_type_id,
+ attr->btf_value_type_id);
+ if (err)
+ goto free_map;
}
- map->btf = btf;
map->btf_key_type_id = attr->btf_key_type_id;
map->btf_value_type_id = attr->btf_value_type_id;
- } else {
- map->spin_lock_off = -EINVAL;
+ map->btf_vmlinux_value_type_id =
+ attr->btf_vmlinux_value_type_id;
}
err = security_bpf_map_alloc(map);
@@ -652,21 +896,19 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-/* prog's and map's refcnt limit */
-#define BPF_MAX_REFCNT 32768
-
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
+void bpf_map_inc(struct bpf_map *map)
{
- if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
- atomic_dec(&map->refcnt);
- return ERR_PTR(-EBUSY);
- }
- if (uref)
- atomic_inc(&map->usercnt);
- return map;
+ atomic64_inc(&map->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_map_inc);
+void bpf_map_inc_with_uref(struct bpf_map *map)
+{
+ atomic64_inc(&map->refcnt);
+ atomic64_inc(&map->usercnt);
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
+
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
@@ -676,34 +918,36 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
- map = bpf_map_inc(map, true);
+ bpf_map_inc_with_uref(map);
fdput(f);
return map;
}
/* map_idr_lock should have been held */
-static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
- bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
{
int refold;
- refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_map_put(map, false);
- return ERR_PTR(-EBUSY);
- }
-
+ refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
-
if (uref)
- atomic_inc(&map->usercnt);
+ atomic64_inc(&map->usercnt);
return map;
}
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
+{
+ spin_lock_bh(&map_idr_lock);
+ map = __bpf_map_inc_not_zero(map, false);
+ spin_unlock_bh(&map_idr_lock);
+
+ return map;
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
+
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
{
return -ENOTSUPP;
@@ -729,7 +973,7 @@ static int map_lookup_elem(union bpf_attr *attr)
void __user *uvalue = u64_to_user_ptr(attr->value);
int ufd = attr->map_fd;
struct bpf_map *map;
- void *key, *value, *ptr;
+ void *key, *value;
u32 value_size;
struct fd f;
int err;
@@ -761,72 +1005,14 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
- value_size = round_up(map->value_size, 8) * num_possible_cpus();
- else if (IS_FD_MAP(map))
- value_size = sizeof(u32);
- else
- value_size = map->value_size;
+ value_size = bpf_map_value_size(map);
err = -ENOMEM;
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
- if (bpf_map_is_dev_bound(map)) {
- err = bpf_map_offload_lookup_elem(map, key, value);
- goto done;
- }
-
- preempt_disable();
- this_cpu_inc(bpf_prog_active);
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- err = bpf_percpu_hash_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- err = bpf_percpu_array_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
- err = bpf_percpu_cgroup_storage_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
- err = bpf_stackmap_copy(map, key, value);
- } else if (IS_FD_ARRAY(map)) {
- err = bpf_fd_array_map_lookup_elem(map, key, value);
- } else if (IS_FD_HASH(map)) {
- err = bpf_fd_htab_map_lookup_elem(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
- err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
- err = map->ops->map_peek_elem(map, value);
- } else {
- rcu_read_lock();
- if (map->ops->map_lookup_elem_sys_only)
- ptr = map->ops->map_lookup_elem_sys_only(map, key);
- else
- ptr = map->ops->map_lookup_elem(map, key);
- if (IS_ERR(ptr)) {
- err = PTR_ERR(ptr);
- } else if (!ptr) {
- err = -ENOENT;
- } else {
- err = 0;
- if (attr->flags & BPF_F_LOCK)
- /* lock 'ptr' and copy everything but lock */
- copy_map_value_locked(map, value, ptr, true);
- else
- copy_map_value(map, value, ptr);
- /* mask lock, since value wasn't zero inited */
- check_and_init_map_lock(map, value);
- }
- rcu_read_unlock();
- }
- this_cpu_dec(bpf_prog_active);
- preempt_enable();
-
-done:
+ err = bpf_map_copy_value(map, key, value, attr->flags);
if (err)
goto free_value;
@@ -845,16 +1031,6 @@ err_put:
return err;
}
-static void maybe_wait_bpf_programs(struct bpf_map *map)
-{
- /* Wait for any running BPF programs to complete so that
- * userspace, when we return to it, knows that all programs
- * that could be running use the new map value.
- */
- if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
- map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
- synchronize_rcu();
-}
#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
@@ -910,56 +1086,8 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value;
- /* Need to create a kthread, thus must support schedule */
- if (bpf_map_is_dev_bound(map)) {
- err = bpf_map_offload_update_elem(map, key, value, attr->flags);
- goto out;
- } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
- map->map_type == BPF_MAP_TYPE_SOCKHASH ||
- map->map_type == BPF_MAP_TYPE_SOCKMAP) {
- err = map->ops->map_update_elem(map, key, value, attr->flags);
- goto out;
- }
+ err = bpf_map_update_value(map, f, key, value, attr->flags);
- /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
- * inside bpf map update or delete otherwise deadlocks are possible
- */
- preempt_disable();
- __this_cpu_inc(bpf_prog_active);
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- err = bpf_percpu_hash_update(map, key, value, attr->flags);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- err = bpf_percpu_array_update(map, key, value, attr->flags);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
- err = bpf_percpu_cgroup_storage_update(map, key, value,
- attr->flags);
- } else if (IS_FD_ARRAY(map)) {
- rcu_read_lock();
- err = bpf_fd_array_map_update_elem(map, f.file, key, value,
- attr->flags);
- rcu_read_unlock();
- } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- rcu_read_lock();
- err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
- attr->flags);
- rcu_read_unlock();
- } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
- /* rcu_read_lock() is not needed */
- err = bpf_fd_reuseport_array_update_elem(map, key, value,
- attr->flags);
- } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
- err = map->ops->map_push_elem(map, value, attr->flags);
- } else {
- rcu_read_lock();
- err = map->ops->map_update_elem(map, key, value, attr->flags);
- rcu_read_unlock();
- }
- __this_cpu_dec(bpf_prog_active);
- preempt_enable();
- maybe_wait_bpf_programs(map);
-out:
free_value:
kfree(value);
free_key:
@@ -1001,6 +1129,11 @@ static int map_delete_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_delete_elem(map, key);
goto out;
+ } else if (IS_FD_PROG_ARRAY(map) ||
+ map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+ /* These maps require sleepable context */
+ err = map->ops->map_delete_elem(map, key);
+ goto out;
}
preempt_disable();
@@ -1085,6 +1218,220 @@ err_put:
return err;
}
+int generic_map_delete_batch(struct bpf_map *map,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
+ u32 cp, max_count;
+ int err = 0;
+ void *key;
+
+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
+ return -EINVAL;
+
+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+ !map_value_has_spin_lock(map)) {
+ return -EINVAL;
+ }
+
+ max_count = attr->batch.count;
+ if (!max_count)
+ return 0;
+
+ key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ if (!key)
+ return -ENOMEM;
+
+ for (cp = 0; cp < max_count; cp++) {
+ err = -EFAULT;
+ if (copy_from_user(key, keys + cp * map->key_size,
+ map->key_size))
+ break;
+
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_delete_elem(map, key);
+ break;
+ }
+
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ rcu_read_lock();
+ err = map->ops->map_delete_elem(map, key);
+ rcu_read_unlock();
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ maybe_wait_bpf_programs(map);
+ if (err)
+ break;
+ }
+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+ err = -EFAULT;
+
+ kfree(key);
+ return err;
+}
+
+int generic_map_update_batch(struct bpf_map *map,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ void __user *values = u64_to_user_ptr(attr->batch.values);
+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
+ u32 value_size, cp, max_count;
+ int ufd = attr->map_fd;
+ void *key, *value;
+ struct fd f;
+ int err = 0;
+
+ f = fdget(ufd);
+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
+ return -EINVAL;
+
+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+ !map_value_has_spin_lock(map)) {
+ return -EINVAL;
+ }
+
+ value_size = bpf_map_value_size(map);
+
+ max_count = attr->batch.count;
+ if (!max_count)
+ return 0;
+
+ key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ if (!key)
+ return -ENOMEM;
+
+ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ if (!value) {
+ kfree(key);
+ return -ENOMEM;
+ }
+
+ for (cp = 0; cp < max_count; cp++) {
+ err = -EFAULT;
+ if (copy_from_user(key, keys + cp * map->key_size,
+ map->key_size) ||
+ copy_from_user(value, values + cp * value_size, value_size))
+ break;
+
+ err = bpf_map_update_value(map, f, key, value,
+ attr->batch.elem_flags);
+
+ if (err)
+ break;
+ }
+
+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+ err = -EFAULT;
+
+ kfree(value);
+ kfree(key);
+ return err;
+}
+
+#define MAP_LOOKUP_RETRIES 3
+
+int generic_map_lookup_batch(struct bpf_map *map,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
+ void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+ void __user *values = u64_to_user_ptr(attr->batch.values);
+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
+ void *buf, *buf_prevkey, *prev_key, *key, *value;
+ int err, retry = MAP_LOOKUP_RETRIES;
+ u32 value_size, cp, max_count;
+
+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
+ return -EINVAL;
+
+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
+ !map_value_has_spin_lock(map))
+ return -EINVAL;
+
+ value_size = bpf_map_value_size(map);
+
+ max_count = attr->batch.count;
+ if (!max_count)
+ return 0;
+
+ if (put_user(0, &uattr->batch.count))
+ return -EFAULT;
+
+ buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ if (!buf_prevkey)
+ return -ENOMEM;
+
+ buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+ if (!buf) {
+ kvfree(buf_prevkey);
+ return -ENOMEM;
+ }
+
+ err = -EFAULT;
+ prev_key = NULL;
+ if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
+ goto free_buf;
+ key = buf;
+ value = key + map->key_size;
+ if (ubatch)
+ prev_key = buf_prevkey;
+
+ for (cp = 0; cp < max_count;) {
+ rcu_read_lock();
+ err = map->ops->map_get_next_key(map, prev_key, key);
+ rcu_read_unlock();
+ if (err)
+ break;
+ err = bpf_map_copy_value(map, key, value,
+ attr->batch.elem_flags);
+
+ if (err == -ENOENT) {
+ if (retry) {
+ retry--;
+ continue;
+ }
+ err = -EINTR;
+ break;
+ }
+
+ if (err)
+ goto free_buf;
+
+ if (copy_to_user(keys + cp * map->key_size, key,
+ map->key_size)) {
+ err = -EFAULT;
+ goto free_buf;
+ }
+ if (copy_to_user(values + cp * value_size, value, value_size)) {
+ err = -EFAULT;
+ goto free_buf;
+ }
+
+ if (!prev_key)
+ prev_key = buf_prevkey;
+
+ swap(prev_key, key);
+ retry = MAP_LOOKUP_RETRIES;
+ cp++;
+ }
+
+ if (err == -EFAULT)
+ goto free_buf;
+
+ if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
+ (cp && copy_to_user(uobatch, prev_key, map->key_size))))
+ err = -EFAULT;
+
+free_buf:
+ kfree(buf_prevkey);
+ kfree(buf);
+ return err;
+}
+
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
static int map_lookup_and_delete_elem(union bpf_attr *attr)
@@ -1162,6 +1509,13 @@ static int map_freeze(const union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+
+ mutex_lock(&map->freeze_mutex);
+
+ if (map->writecnt) {
+ err = -EBUSY;
+ goto err_put;
+ }
if (READ_ONCE(map->frozen)) {
err = -EBUSY;
goto err_put;
@@ -1173,12 +1527,13 @@ static int map_freeze(const union bpf_attr *attr)
WRITE_ONCE(map->frozen, true);
err_put:
+ mutex_unlock(&map->freeze_mutex);
fdput(f);
return err;
}
static const struct bpf_prog_ops * const bpf_prog_types[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -1205,23 +1560,34 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
return 0;
}
-/* drop refcnt on maps used by eBPF program and free auxilary data */
-static void free_used_maps(struct bpf_prog_aux *aux)
-{
- enum bpf_cgroup_storage_type stype;
- int i;
+enum bpf_audit {
+ BPF_AUDIT_LOAD,
+ BPF_AUDIT_UNLOAD,
+ BPF_AUDIT_MAX,
+};
- for_each_cgroup_storage_type(stype) {
- if (!aux->cgroup_storage[stype])
- continue;
- bpf_cgroup_storage_release(aux->prog,
- aux->cgroup_storage[stype]);
- }
+static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
+ [BPF_AUDIT_LOAD] = "LOAD",
+ [BPF_AUDIT_UNLOAD] = "UNLOAD",
+};
- for (i = 0; i < aux->used_map_cnt; i++)
- bpf_map_put(aux->used_maps[i]);
+static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
+{
+ struct audit_context *ctx = NULL;
+ struct audit_buffer *ab;
- kfree(aux->used_maps);
+ if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
+ return;
+ if (audit_enabled == AUDIT_OFF)
+ return;
+ if (op == BPF_AUDIT_LOAD)
+ ctx = audit_context();
+ ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "prog-id=%u op=%s",
+ prog->aux->id, bpf_audit_str[op]);
+ audit_log_end(ab);
}
int __bpf_prog_charge(struct user_struct *user, u32 pages)
@@ -1316,24 +1682,33 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
- free_used_maps(aux);
+ kvfree(aux->func_info);
+ kfree(aux->func_info_aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+{
+ bpf_prog_kallsyms_del_all(prog);
+ btf_put(prog->aux->btf);
+ bpf_prog_free_linfo(prog);
+
+ if (deferred)
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ else
+ __bpf_prog_put_rcu(&prog->aux->rcu);
+}
+
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
- if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ if (atomic64_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
- bpf_prog_kallsyms_del_all(prog);
- btf_put(prog->aux->btf);
- kvfree(prog->aux->func_info);
- bpf_prog_free_linfo(prog);
-
- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ __bpf_prog_put_noref(prog, true);
}
}
@@ -1435,13 +1810,9 @@ static struct bpf_prog *____bpf_prog_get(struct fd f)
return f.file->private_data;
}
-struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+void bpf_prog_add(struct bpf_prog *prog, int i)
{
- if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
- atomic_sub(i, &prog->aux->refcnt);
- return ERR_PTR(-EBUSY);
- }
- return prog;
+ atomic64_add(i, &prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_add);
@@ -1452,13 +1823,13 @@ void bpf_prog_sub(struct bpf_prog *prog, int i)
* path holds a reference to the program, thus atomic_sub() can
* be safely used in such cases!
*/
- WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+ WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
}
EXPORT_SYMBOL_GPL(bpf_prog_sub);
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+void bpf_prog_inc(struct bpf_prog *prog)
{
- return bpf_prog_add(prog, 1);
+ atomic64_inc(&prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_inc);
@@ -1467,12 +1838,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
{
int refold;
- refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_prog_put(prog, false);
- return ERR_PTR(-EBUSY);
- }
+ refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
@@ -1510,7 +1876,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
goto out;
}
- prog = bpf_prog_inc(prog);
+ bpf_prog_inc(prog);
out:
fdput(f);
return prog;
@@ -1555,9 +1921,28 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
}
static int
-bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
- enum bpf_attach_type expected_attach_type)
+bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
+ enum bpf_attach_type expected_attach_type,
+ u32 btf_id, u32 prog_fd)
{
+ if (btf_id) {
+ if (btf_id > BTF_MAX_TYPE)
+ return -EINVAL;
+
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ case BPF_PROG_TYPE_EXT:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+ prog_type != BPF_PROG_TYPE_EXT)
+ return -EINVAL;
+
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
switch (expected_attach_type) {
@@ -1598,13 +1983,17 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_EXT:
+ if (expected_attach_type)
+ return -EINVAL;
+ /* fallthrough */
default:
return 0;
}
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt
+#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
@@ -1619,6 +2008,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
BPF_F_ANY_ALIGNMENT |
+ BPF_F_TEST_STATE_FREQ |
BPF_F_TEST_RND_HI32))
return -EINVAL;
@@ -1645,7 +2035,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
- if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
+ if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
+ attr->attach_btf_id,
+ attr->attach_prog_fd))
return -EINVAL;
/* plain bpf_prog allocation */
@@ -1654,6 +2046,17 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return -ENOMEM;
prog->expected_attach_type = attr->expected_attach_type;
+ prog->aux->attach_btf_id = attr->attach_btf_id;
+ if (attr->attach_prog_fd) {
+ struct bpf_prog *tgt_prog;
+
+ tgt_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ goto free_prog_nouncharge;
+ }
+ prog->aux->linked_prog = tgt_prog;
+ }
prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -1675,7 +2078,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->orig_prog = NULL;
prog->jited = 0;
- atomic_set(&prog->aux->refcnt, 1);
+ atomic64_set(&prog->aux->refcnt, 1);
prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
@@ -1707,28 +2110,36 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (err)
goto free_used_maps;
- err = bpf_prog_new_fd(prog);
- if (err < 0) {
- /* failed to allocate fd.
- * bpf_prog_put() is needed because the above
- * bpf_prog_alloc_id() has published the prog
- * to the userspace and the userspace may
- * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
- */
- bpf_prog_put(prog);
- return err;
- }
-
+ /* Upon success of bpf_prog_alloc_id(), the BPF prog is
+ * effectively publicly exposed. However, retrieving via
+ * bpf_prog_get_fd_by_id() will take another reference,
+ * therefore it cannot be gone underneath us.
+ *
+ * Only for the time /after/ successful bpf_prog_new_fd()
+ * and before returning to userspace, we might just hold
+ * one reference and any parallel close on that fd could
+ * rip everything out. Hence, below notifications must
+ * happen before bpf_prog_new_fd().
+ *
+ * Also, any failure handling from this point onwards must
+ * be using bpf_prog_put() given the program is exposed.
+ */
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_LOAD);
+
+ err = bpf_prog_new_fd(prog);
+ if (err < 0)
+ bpf_prog_put(prog);
return err;
free_used_maps:
- bpf_prog_free_linfo(prog);
- kvfree(prog->aux->func_info);
- btf_put(prog->aux->btf);
- bpf_prog_kallsyms_del_subprogs(prog);
- free_used_maps(prog->aux);
+ /* In case we have subprogs, we need to wait for a grace
+ * period before we can tear down JIT memory since symbols
+ * are already exposed under kallsyms.
+ */
+ __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+ return err;
free_prog:
bpf_prog_uncharge_memlock(prog);
free_prog_sec:
@@ -1758,6 +2169,50 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
+static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_prog *prog = filp->private_data;
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ bpf_prog_put(prog);
+ return 0;
+}
+
+static const struct file_operations bpf_tracing_prog_fops = {
+ .release = bpf_tracing_prog_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+};
+
+static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+{
+ int tr_fd, err;
+
+ if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
+ prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->type != BPF_PROG_TYPE_EXT) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ err = bpf_trampoline_link_prog(prog);
+ if (err)
+ goto out_put_prog;
+
+ tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
+ prog, O_CLOEXEC);
+ if (tr_fd < 0) {
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ err = tr_fd;
+ goto out_put_prog;
+ }
+ return tr_fd;
+
+out_put_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
@@ -1789,17 +2244,54 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
- char tp_name[128];
+ const char *tp_name;
+ char buf[128];
int tp_fd, err;
- if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
- sizeof(tp_name) - 1) < 0)
- return -EFAULT;
- tp_name[sizeof(tp_name) - 1] = 0;
+ if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+ prog->type != BPF_PROG_TYPE_TRACING &&
+ prog->type != BPF_PROG_TYPE_EXT &&
+ prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ if (prog->type == BPF_PROG_TYPE_TRACING ||
+ prog->type == BPF_PROG_TYPE_EXT) {
+ if (attr->raw_tracepoint.name) {
+ /* The attach point for this category of programs
+ * should be specified via btf_id during program load.
+ */
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ tp_name = prog->aux->attach_func_name;
+ else
+ return bpf_tracing_prog_attach(prog);
+ } else {
+ if (strncpy_from_user(buf,
+ u64_to_user_ptr(attr->raw_tracepoint.name),
+ sizeof(buf) - 1) < 0) {
+ err = -EFAULT;
+ goto out_put_prog;
+ }
+ buf[sizeof(buf) - 1] = 0;
+ tp_name = buf;
+ }
btp = bpf_get_raw_tracepoint(tp_name);
- if (!btp)
- return -ENOENT;
+ if (!btp) {
+ err = -ENOENT;
+ goto out_put_prog;
+ }
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
if (!raw_tp) {
@@ -1807,38 +2299,27 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
goto out_put_btp;
}
raw_tp->btp = btp;
-
- prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
- if (IS_ERR(prog)) {
- err = PTR_ERR(prog);
- goto out_free_tp;
- }
- if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
- prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
- err = -EINVAL;
- goto out_put_prog;
- }
+ raw_tp->prog = prog;
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
- goto out_put_prog;
+ goto out_free_tp;
- raw_tp->prog = prog;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
- goto out_put_prog;
+ goto out_free_tp;
}
return tp_fd;
-out_put_prog:
- bpf_prog_put(prog);
out_free_tp:
kfree(raw_tp);
out_put_btp:
bpf_put_raw_tracepoint(btp);
+out_put_prog:
+ bpf_prog_put(prog);
return err;
}
@@ -1859,10 +2340,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
}
}
-#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
#define BPF_F_ATTACH_MASK \
- (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+ (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
static int bpf_prog_attach(const union bpf_attr *attr)
{
@@ -2124,17 +2605,12 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
-static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+struct bpf_prog *bpf_prog_by_id(u32 id)
{
struct bpf_prog *prog;
- u32 id = attr->prog_id;
- int fd;
- if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
- return -EINVAL;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!id)
+ return ERR_PTR(-ENOENT);
spin_lock_bh(&prog_idr_lock);
prog = idr_find(&prog_idr, id);
@@ -2143,7 +2619,22 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
else
prog = ERR_PTR(-ENOENT);
spin_unlock_bh(&prog_idr_lock);
+ return prog;
+}
+
+static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ u32 id = attr->prog_id;
+ int fd;
+ if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ prog = bpf_prog_by_id(id);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -2177,7 +2668,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
spin_lock_bh(&map_idr_lock);
map = idr_find(&map_idr, id);
if (map)
- map = bpf_map_inc_not_zero(map, true);
+ map = __bpf_map_inc_not_zero(map, true);
else
map = ERR_PTR(-ENOENT);
spin_unlock_bh(&map_idr_lock);
@@ -2593,6 +3084,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_type_id = map->btf_value_type_id;
}
+ info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_info_fill(&info, map);
@@ -2805,6 +3297,61 @@ out:
return err;
}
+#define BPF_MAP_BATCH_LAST_FIELD batch.flags
+
+#define BPF_DO_BATCH(fn) \
+ do { \
+ if (!fn) { \
+ err = -ENOTSUPP; \
+ goto err_put; \
+ } \
+ err = fn(map, attr, uattr); \
+ } while (0)
+
+static int bpf_map_do_batch(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ int cmd)
+{
+ struct bpf_map *map;
+ int err, ufd;
+ struct fd f;
+
+ if (CHECK_ATTR(BPF_MAP_BATCH))
+ return -EINVAL;
+
+ ufd = attr->batch.map_fd;
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if ((cmd == BPF_MAP_LOOKUP_BATCH ||
+ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
+ !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ if (cmd != BPF_MAP_LOOKUP_BATCH &&
+ !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ if (cmd == BPF_MAP_LOOKUP_BATCH)
+ BPF_DO_BATCH(map->ops->map_lookup_batch);
+ else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
+ BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
+ else if (cmd == BPF_MAP_UPDATE_BATCH)
+ BPF_DO_BATCH(map->ops->map_update_batch);
+ else
+ BPF_DO_BATCH(map->ops->map_delete_batch);
+
+err_put:
+ fdput(f);
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -2874,6 +3421,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = bpf_obj_get_next_id(&attr, uattr,
&map_idr, &map_idr_lock);
break;
+ case BPF_BTF_GET_NEXT_ID:
+ err = bpf_obj_get_next_id(&attr, uattr,
+ &btf_idr, &btf_idr_lock);
+ break;
case BPF_PROG_GET_FD_BY_ID:
err = bpf_prog_get_fd_by_id(&attr);
break;
@@ -2898,6 +3449,19 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
err = map_lookup_and_delete_elem(&attr);
break;
+ case BPF_MAP_LOOKUP_BATCH:
+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
+ break;
+ case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
+ err = bpf_map_do_batch(&attr, uattr,
+ BPF_MAP_LOOKUP_AND_DELETE_BATCH);
+ break;
+ case BPF_MAP_UPDATE_BATCH:
+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
+ break;
+ case BPF_MAP_DELETE_BATCH:
+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
+ break;
default:
err = -EINVAL;
break;
OpenPOWER on IntegriCloud