diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/arraymap.c | 55 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 37 | ||||
-rw-r--r-- | kernel/bpf/core.c | 49 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 21 | ||||
-rw-r--r-- | kernel/bpf/map_in_map.c | 5 | ||||
-rw-r--r-- | kernel/bpf/map_in_map.h | 1 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 510 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 189 | ||||
-rw-r--r-- | kernel/compat.c | 131 | ||||
-rw-r--r-- | kernel/events/core.c | 47 | ||||
-rw-r--r-- | kernel/exit.c | 307 | ||||
-rw-r--r-- | kernel/kexec_file.c | 14 | ||||
-rw-r--r-- | kernel/sys.c | 16 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 4 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 30 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 8 | ||||
-rw-r--r-- | kernel/time/posix-stubs.c | 96 | ||||
-rw-r--r-- | kernel/time/posix-timers.c | 127 | ||||
-rw-r--r-- | kernel/time/time.c | 58 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 66 |
20 files changed, 1217 insertions, 554 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 172dc8ee0e3b..d771a3872500 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -335,6 +335,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) } /* only called from syscall */ +int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) +{ + void **elem, *ptr; + int ret = 0; + + if (!map->ops->map_fd_sys_lookup_elem) + return -ENOTSUPP; + + rcu_read_lock(); + elem = array_map_lookup_elem(map, key); + if (elem && (ptr = READ_ONCE(*elem))) + *value = map->ops->map_fd_sys_lookup_elem(ptr); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +/* only called from syscall */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { @@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr) bpf_prog_put(ptr); } +static u32 prog_fd_array_sys_lookup_elem(void *ptr) +{ + return ((struct bpf_prog *)ptr)->aux->id; +} + /* decrement refcnt of all bpf_progs that are stored in this map */ void bpf_fd_array_map_clear(struct bpf_map *map) { @@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, + .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -452,38 +478,24 @@ static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) static void *perf_event_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { - const struct perf_event_attr *attr; struct bpf_event_entry *ee; struct perf_event *event; struct file *perf_file; + u64 value; perf_file = perf_event_get(fd); if (IS_ERR(perf_file)) return perf_file; + ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; - ee = ERR_PTR(-EINVAL); - - attr = perf_event_attrs(event); - if (IS_ERR(attr) || attr->inherit) + if (perf_event_read_local(event, &value) == -EOPNOTSUPP) goto err_out; - switch (attr->type) { - case PERF_TYPE_SOFTWARE: - if (attr->config != PERF_COUNT_SW_BPF_OUTPUT) - goto err_out; - /* fall-through */ - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - ee = bpf_event_entry_gen(perf_file, map_file); - if (ee) - return ee; - ee = ERR_PTR(-ENOMEM); - /* fall-through */ - default: - break; - } - + ee = bpf_event_entry_gen(perf_file, map_file); + if (ee) + return ee; + ee = ERR_PTR(-ENOMEM); err_out: fput(perf_file); return ee; @@ -599,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ea6033cba947..546113430049 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); + +/** + * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock + * @sk: socket to get cgroup from + * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains + * sk with connection information (IP addresses, etc.) May not contain + * cgroup info if it is a req sock. + * @type: The type of program to be exectuted + * + * socket passed is expected to be of type INET or INET6. + * + * The program type passed in via @type must be suitable for sock_ops + * filtering. No further check is performed to assert that. + * + * This function will return %-EPERM if any if an attached program was found + * and if it returned != 1 during execution. In all other cases, 0 is returned. + */ +int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, + struct bpf_sock_ops_kern *sock_ops, + enum bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog *prog; + int ret = 0; + + + rcu_read_lock(); + + prog = rcu_dereference(cgrp->bpf.effective[type]); + if (prog) + ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index dedf367f59bb..ad5f55922a13 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -763,10 +763,10 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); * * Decode and execute eBPF instructions. */ -static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) +static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, + u64 *stack) { - u64 stack[MAX_BPF_STACK / sizeof(u64)]; - u64 regs[MAX_BPF_REG], tmp; + u64 tmp; static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ @@ -824,7 +824,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, - [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, + [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, @@ -874,9 +874,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) #define CONT ({ insn++; goto select_insn; }) #define CONT_JMP ({ insn++; goto select_insn; }) - FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - ARG1 = (u64) (unsigned long) ctx; - select_insn: goto *jumptable[insn->code]; @@ -1219,7 +1216,39 @@ load_byte: WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); return 0; } -STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ +STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ + +#define PROG_NAME(stack_size) __bpf_prog_run##stack_size +#define DEFINE_BPF_PROG_RUN(stack_size) \ +static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ +{ \ + u64 stack[stack_size / sizeof(u64)]; \ + u64 regs[MAX_BPF_REG]; \ +\ + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ + ARG1 = (u64) (unsigned long) ctx; \ + return ___bpf_prog_run(regs, insn, stack); \ +} + +#define EVAL1(FN, X) FN(X) +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y) +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y) +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y) + +EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192); +EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); +EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); + +#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), + +static unsigned int (*interpreters[])(const void *ctx, + const struct bpf_insn *insn) = { +EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) +EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) +EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) +}; bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) @@ -1268,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { - fp->bpf_func = (void *) __bpf_prog_run; + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + + fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 004334ea13ba..4fb463172aa8 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1244,6 +1244,26 @@ static void fd_htab_map_free(struct bpf_map *map) } /* only called from syscall */ +int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) +{ + void **ptr; + int ret = 0; + + if (!map->ops->map_fd_sys_lookup_elem) + return -ENOTSUPP; + + rcu_read_lock(); + ptr = htab_map_lookup_elem(map, key); + if (ptr) + *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +/* only called from syscall */ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { @@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_delete_elem = htab_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, }; diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 59bcdf821ae4..1da574612bea 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr) */ bpf_map_put(ptr); } + +u32 bpf_map_fd_sys_lookup_elem(void *ptr) +{ + return ((struct bpf_map *)ptr)->id; +} diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h index 177fadb689dc..6183db9ec08c 100644 --- a/kernel/bpf/map_in_map.h +++ b/kernel/bpf/map_in_map.h @@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, int ufd); void bpf_map_fd_put_ptr(void *ptr); +u32 bpf_map_fd_sys_lookup_elem(void *ptr); #endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 265a0d854e33..045646da97cc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -22,8 +22,20 @@ #include <linux/filter.h> #include <linux/version.h> #include <linux/kernel.h> +#include <linux/idr.h> + +#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) +#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) +#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) DEFINE_PER_CPU(int, bpf_prog_active); +static DEFINE_IDR(prog_idr); +static DEFINE_SPINLOCK(prog_idr_lock); +static DEFINE_IDR(map_idr); +static DEFINE_SPINLOCK(map_idr_lock); int sysctl_unprivileged_bpf_disabled __read_mostly; @@ -114,6 +126,37 @@ static void bpf_map_uncharge_memlock(struct bpf_map *map) free_uid(user); } +static int bpf_map_alloc_id(struct bpf_map *map) +{ + int id; + + spin_lock_bh(&map_idr_lock); + id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); + if (id > 0) + map->id = id; + spin_unlock_bh(&map_idr_lock); + + if (WARN_ON_ONCE(!id)) + return -ENOSPC; + + return id > 0 ? 0 : id; +} + +static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) +{ + if (do_idr_lock) + spin_lock_bh(&map_idr_lock); + else + __acquire(&map_idr_lock); + + idr_remove(&map_idr, map->id); + + if (do_idr_lock) + spin_unlock_bh(&map_idr_lock); + else + __release(&map_idr_lock); +} + /* called from workqueue */ static void bpf_map_free_deferred(struct work_struct *work) { @@ -135,14 +178,21 @@ static void bpf_map_put_uref(struct bpf_map *map) /* decrement map refcnt and schedule it for freeing via workqueue * (unrelying map implementation ops->map_free() might sleep) */ -void bpf_map_put(struct bpf_map *map) +static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) { if (atomic_dec_and_test(&map->refcnt)) { + /* bpf_map_free_id() must be called first */ + bpf_map_free_id(map, do_idr_lock); INIT_WORK(&map->work, bpf_map_free_deferred); schedule_work(&map->work); } } +void bpf_map_put(struct bpf_map *map) +{ + __bpf_map_put(map, true); +} + void bpf_map_put_with_uref(struct bpf_map *map) { bpf_map_put_uref(map); @@ -166,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) const struct bpf_map *map = filp->private_data; const struct bpf_array *array; u32 owner_prog_type = 0; + u32 owner_jited = 0; if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); owner_prog_type = array->owner_prog_type; + owner_jited = array->owner_jited; } seq_printf(m, @@ -186,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->map_flags, map->pages * 1ULL << PAGE_SHIFT); - if (owner_prog_type) + if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", owner_prog_type); + seq_printf(m, "owner_jited:\t%u\n", + owner_jited); + } } #endif @@ -236,11 +291,22 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map_nouncharge; - err = bpf_map_new_fd(map); - if (err < 0) - /* failed to allocate fd */ + err = bpf_map_alloc_id(map); + if (err) goto free_map; + err = bpf_map_new_fd(map); + if (err < 0) { + /* failed to allocate fd. + * bpf_map_put() is needed because the above + * bpf_map_alloc_id() has published the map + * to the userspace and the userspace may + * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. + */ + bpf_map_put(map); + return err; + } + trace_bpf_map_create(map, err); return err; @@ -295,6 +361,28 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } +/* map_idr_lock should have been held */ +static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, + bool uref) +{ + int refold; + + refold = __atomic_add_unless(&map->refcnt, 1, 0); + + if (refold >= BPF_MAX_REFCNT) { + __bpf_map_put(map, false); + return ERR_PTR(-EBUSY); + } + + if (!refold) + return ERR_PTR(-ENOENT); + + if (uref) + atomic_inc(&map->usercnt); + + return map; +} + int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -322,19 +410,18 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) + key = memdup_user(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + } if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else if (IS_FD_MAP(map)) + value_size = sizeof(u32); else value_size = map->value_size; @@ -350,9 +437,10 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_percpu_array_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - err = -ENOTSUPP; + } else if (IS_FD_ARRAY(map)) { + err = bpf_fd_array_map_lookup_elem(map, key, value); + } else if (IS_FD_HASH(map)) { + err = bpf_fd_htab_map_lookup_elem(map, key, value); } else { rcu_read_lock(); ptr = map->ops->map_lookup_elem(map, key); @@ -402,14 +490,11 @@ static int map_update_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) + key = memdup_user(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + } if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -488,14 +573,11 @@ static int map_delete_elem(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) + key = memdup_user(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + } preempt_disable(); __this_cpu_inc(bpf_prog_active); @@ -507,7 +589,6 @@ static int map_delete_elem(union bpf_attr *attr) if (!err) trace_bpf_map_delete_elem(map, ufd, key); -free_key: kfree(key); err_put: fdput(f); @@ -536,14 +617,11 @@ static int map_get_next_key(union bpf_attr *attr) return PTR_ERR(map); if (ukey) { - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) + key = memdup_user(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + } } else { key = NULL; } @@ -650,6 +728,42 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) free_uid(user); } +static int bpf_prog_alloc_id(struct bpf_prog *prog) +{ + int id; + + spin_lock_bh(&prog_idr_lock); + id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); + if (id > 0) + prog->aux->id = id; + spin_unlock_bh(&prog_idr_lock); + + /* id is in [1, INT_MAX) */ + if (WARN_ON_ONCE(!id)) + return -ENOSPC; + + return id > 0 ? 0 : id; +} + +static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +{ + /* cBPF to eBPF migrations are currently not in the idr store. */ + if (!prog->aux->id) + return; + + if (do_idr_lock) + spin_lock_bh(&prog_idr_lock); + else + __acquire(&prog_idr_lock); + + idr_remove(&prog_idr, prog->aux->id); + + if (do_idr_lock) + spin_unlock_bh(&prog_idr_lock); + else + __release(&prog_idr_lock); +} + static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); @@ -659,14 +773,21 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) bpf_prog_free(aux->prog); } -void bpf_prog_put(struct bpf_prog *prog) +static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { trace_bpf_prog_put_rcu(prog); + /* bpf_prog_free_id() must be called first */ + bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } } + +void bpf_prog_put(struct bpf_prog *prog) +{ + __bpf_prog_put(prog, true); +} EXPORT_SYMBOL_GPL(bpf_prog_put); static int bpf_prog_release(struct inode *inode, struct file *filp) @@ -748,6 +869,24 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc); +/* prog_idr_lock should have been held */ +static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) +{ + int refold; + + refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); + + if (refold >= BPF_MAX_REFCNT) { + __bpf_prog_put(prog, false); + return ERR_PTR(-EBUSY); + } + + if (!refold) + return ERR_PTR(-ENOENT); + + return prog; +} + static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) { struct fd f = fdget(ufd); @@ -815,7 +954,9 @@ static int bpf_prog_load(union bpf_attr *attr) attr->kern_version != LINUX_VERSION_CODE) return -EINVAL; - if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) + if (type != BPF_PROG_TYPE_SOCKET_FILTER && + type != BPF_PROG_TYPE_CGROUP_SKB && + !capable(CAP_SYS_ADMIN)) return -EPERM; /* plain bpf_prog allocation */ @@ -855,11 +996,22 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - err = bpf_prog_new_fd(prog); - if (err < 0) - /* failed to allocate fd */ + err = bpf_prog_alloc_id(prog); + if (err) goto free_used_maps; + err = bpf_prog_new_fd(prog); + if (err < 0) { + /* failed to allocate fd. + * bpf_prog_put() is needed because the above + * bpf_prog_alloc_id() has published the prog + * to the userspace and the userspace may + * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. + */ + bpf_prog_put(prog); + return err; + } + bpf_prog_kallsyms_add(prog); trace_bpf_prog_load(prog, err); return err; @@ -919,6 +1071,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET_SOCK_CREATE: ptype = BPF_PROG_TYPE_CGROUP_SOCK; break; + case BPF_CGROUP_SOCK_OPS: + ptype = BPF_PROG_TYPE_SOCK_OPS; + break; default: return -EINVAL; } @@ -959,6 +1114,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_SOCK_OPS: cgrp = cgroup_get_from_fd(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); @@ -973,6 +1129,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } + #endif /* CONFIG_CGROUP_BPF */ #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration @@ -997,6 +1154,237 @@ static int bpf_prog_test_run(const union bpf_attr *attr, return ret; } +#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id + +static int bpf_obj_get_next_id(const union bpf_attr *attr, + union bpf_attr __user *uattr, + struct idr *idr, + spinlock_t *lock) +{ + u32 next_id = attr->start_id; + int err = 0; + + if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + next_id++; + spin_lock_bh(lock); + if (!idr_get_next(idr, &next_id)) + err = -ENOENT; + spin_unlock_bh(lock); + + if (!err) + err = put_user(next_id, &uattr->next_id); + + return err; +} + +#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id + +static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + u32 id = attr->prog_id; + int fd; + + if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock_bh(&prog_idr_lock); + prog = idr_find(&prog_idr, id); + if (prog) + prog = bpf_prog_inc_not_zero(prog); + else + prog = ERR_PTR(-ENOENT); + spin_unlock_bh(&prog_idr_lock); + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + fd = bpf_prog_new_fd(prog); + if (fd < 0) + bpf_prog_put(prog); + + return fd; +} + +#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id + +static int bpf_map_get_fd_by_id(const union bpf_attr *attr) +{ + struct bpf_map *map; + u32 id = attr->map_id; + int fd; + + if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock_bh(&map_idr_lock); + map = idr_find(&map_idr, id); + if (map) + map = bpf_map_inc_not_zero(map, true); + else + map = ERR_PTR(-ENOENT); + spin_unlock_bh(&map_idr_lock); + + if (IS_ERR(map)) + return PTR_ERR(map); + + fd = bpf_map_new_fd(map); + if (fd < 0) + bpf_map_put(map); + + return fd; +} + +static int check_uarg_tail_zero(void __user *uaddr, + size_t expected_size, + size_t actual_size) +{ + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; + int err; + + if (actual_size <= expected_size) + return 0; + + addr = uaddr + expected_size; + end = uaddr + actual_size; + + for (; addr < end; addr++) { + err = get_user(val, addr); + if (err) + return err; + if (val) + return -E2BIG; + } + + return 0; +} + +static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); + struct bpf_prog_info info = {}; + u32 info_len = attr->info.info_len; + char __user *uinsns; + u32 ulen; + int err; + + err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); + if (err) + return err; + info_len = min_t(u32, sizeof(info), info_len); + + if (copy_from_user(&info, uinfo, info_len)) + return err; + + info.type = prog->type; + info.id = prog->aux->id; + + memcpy(info.tag, prog->tag, sizeof(prog->tag)); + + if (!capable(CAP_SYS_ADMIN)) { + info.jited_prog_len = 0; + info.xlated_prog_len = 0; + goto done; + } + + ulen = info.jited_prog_len; + info.jited_prog_len = prog->jited_len; + if (info.jited_prog_len && ulen) { + uinsns = u64_to_user_ptr(info.jited_prog_insns); + ulen = min_t(u32, info.jited_prog_len, ulen); + if (copy_to_user(uinsns, prog->bpf_func, ulen)) + return -EFAULT; + } + + ulen = info.xlated_prog_len; + info.xlated_prog_len = bpf_prog_size(prog->len); + if (info.xlated_prog_len && ulen) { + uinsns = u64_to_user_ptr(info.xlated_prog_insns); + ulen = min_t(u32, info.xlated_prog_len, ulen); + if (copy_to_user(uinsns, prog->insnsi, ulen)) + return -EFAULT; + } + +done: + if (copy_to_user(uinfo, &info, info_len) || + put_user(info_len, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} + +static int bpf_map_get_info_by_fd(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); + struct bpf_map_info info = {}; + u32 info_len = attr->info.info_len; + int err; + + err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); + if (err) + return err; + info_len = min_t(u32, sizeof(info), info_len); + + info.type = map->map_type; + info.id = map->id; + info.key_size = map->key_size; + info.value_size = map->value_size; + info.max_entries = map->max_entries; + info.map_flags = map->map_flags; + + if (copy_to_user(uinfo, &info, info_len) || + put_user(info_len, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} + +#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info + +static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + int ufd = attr->info.bpf_fd; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) + return -EINVAL; + + f = fdget(ufd); + if (!f.file) + return -EBADFD; + + if (f.file->f_op == &bpf_prog_fops) + err = bpf_prog_get_info_by_fd(f.file->private_data, attr, + uattr); + else if (f.file->f_op == &bpf_map_fops) + err = bpf_map_get_info_by_fd(f.file->private_data, attr, + uattr); + else + err = -EINVAL; + + fdput(f); + return err; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -1016,23 +1404,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz * user-space does not rely on any kernel feature * extensions we dont know about yet. */ - if (size > sizeof(attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - err = get_user(val, addr); - if (err) - return err; - if (val) - return -E2BIG; - } - size = sizeof(attr); - } + err = check_uarg_tail_zero(uattr, sizeof(attr), size); + if (err) + return err; + size = min_t(u32, size, sizeof(attr)); /* copy attributes from user space, may be less than sizeof(bpf_attr) */ if (copy_from_user(&attr, uattr, size) != 0) @@ -1074,6 +1449,23 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_PROG_TEST_RUN: err = bpf_prog_test_run(&attr, uattr); break; + case BPF_PROG_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &prog_idr, &prog_idr_lock); + break; + case BPF_MAP_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &map_idr, &map_idr_lock); + break; + case BPF_PROG_GET_FD_BY_ID: + err = bpf_prog_get_fd_by_id(&attr); + break; + case BPF_MAP_GET_FD_BY_ID: + err = bpf_map_get_fd_by_id(&attr); + break; + case BPF_OBJ_GET_INFO_BY_FD: + err = bpf_obj_get_info_by_fd(&attr, uattr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a8a725697bed..6a86723c5b64 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, return 0; } -static int bpf_size_to_bytes(int bpf_size) -{ - if (bpf_size == BPF_W) - return 4; - else if (bpf_size == BPF_H) - return 2; - else if (bpf_size == BPF_B) - return 1; - else if (bpf_size == BPF_DW) - return 8; - else - return -EINVAL; -} - static bool is_spillable_regtype(enum bpf_reg_type type) { switch (type) { @@ -758,15 +744,29 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + struct bpf_insn_access_aux info = { + .reg_type = *reg_type, + }; + /* for analyzer ctx accesses are already validated and converted */ if (env->analyzer_ops) return 0; if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { + env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + /* A non zero info.ctx_field_size indicates that this field is a + * candidate for later verifier transformation to load the whole + * field and then apply a mask when accessed with a narrower + * access than actual ctx access size. A zero info.ctx_field_size + * will only allow for whole field access and rejects any other + * type of narrower access. + */ + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + *reg_type = info.reg_type; + /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -868,7 +868,7 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { @@ -911,7 +911,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - err = check_ctx_access(env, off, size, t, ®_type); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value_and_range(state->regs, value_regno); @@ -926,6 +926,10 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, verbose("invalid stack off=%d size=%d\n", off, size); return -EACCES; } + + if (env->prog->aux->stack_depth < -off) + env->prog->aux->stack_depth = -off; + if (t == BPF_WRITE) { if (!env->allow_ptr_leaks && state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && @@ -968,7 +972,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; int err; @@ -995,13 +999,13 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); if (err) return err; /* check whether atomic_add can write into the same memory */ - return check_mem_access(env, insn->dst_reg, insn->off, + return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); } @@ -1037,6 +1041,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, return -EACCES; } + if (env->prog->aux->stack_depth < -off) + env->prog->aux->stack_depth = -off; + if (meta && meta->raw_mode) { meta->access_size = access_size; meta->regno = regno; @@ -1344,8 +1351,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_PACKET_END) continue; - reg->type = UNKNOWN_VALUE; - reg->imm = 0; + __mark_reg_unknown_value(state->spilled_regs, + i / BPF_REG_SIZE); } } @@ -1414,7 +1421,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) * is inferred from register state. */ for (i = 0; i < meta.access_size; i++) { - err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); + err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); if (err) return err; } @@ -1650,6 +1657,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + u8 opcode = BPF_OP(insn->code); + s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm); + + /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */ + if (src_reg->imm > 0 && dst_reg->imm) { + switch (opcode) { + case BPF_ADD: + /* dreg += sreg + * where both have zero upper bits. Adding them + * can only result making one more bit non-zero + * in the larger value. + * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) + * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + dst_reg->imm--; + break; + case BPF_AND: + /* dreg &= sreg + * AND can not extend zero bits only shrink + * Ex. 0x00..00ffffff + * & 0x0f..ffffffff + * ---------------- + * 0x00..00ffffff + */ + dst_reg->imm = max(src_reg->imm, 63 - imm_log2); + break; + case BPF_OR: + /* dreg |= sreg + * OR can only extend zero bits + * Ex. 0x00..00ffffff + * | 0x0f..ffffffff + * ---------------- + * 0x0f..00ffffff + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + break; + case BPF_SUB: + case BPF_MUL: + case BPF_RSH: + case BPF_LSH: + /* These may be flushed out later */ + default: + mark_reg_unknown_value(regs, insn->dst_reg); + } + } else { + mark_reg_unknown_value(regs, insn->dst_reg); + } + + dst_reg->type = UNKNOWN_VALUE; + return 0; +} + static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1659,6 +1725,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); u64 dst_imm = dst_reg->imm; + if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE) + return evaluate_reg_imm_alu_unknown(env, insn); + /* dst_reg->type == CONST_IMM here. Simulate execution of insns * containing ALU ops. Don't care about overflow or negative * values, just add/sub/... them; registers are in u64. @@ -1950,6 +2019,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = insn->imm; + regs[insn->dst_reg].id = 0; regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; regs[insn->dst_reg].min_align = calc_align(insn->imm); @@ -2407,6 +2477,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = imm; + regs[insn->dst_reg].id = 0; return 0; } @@ -2826,6 +2897,8 @@ static bool states_equal(struct bpf_verifier_env *env, return false; if (i % BPF_REG_SIZE) continue; + if (old->stack_slot_type[i] != STACK_SPILL) + continue; if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE], &cur->spilled_regs[i / BPF_REG_SIZE], sizeof(old->spilled_regs[0]))) @@ -2987,18 +3060,12 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn->src_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg); if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W && - BPF_SIZE(insn->code) != BPF_DW) { - insn_idx++; - continue; - } - prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; if (*prev_src_type == NOT_INIT) { @@ -3026,7 +3093,7 @@ static int do_check(struct bpf_verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn); + err = check_xadd(env, insn_idx, insn); if (err) return err; insn_idx++; @@ -3045,7 +3112,7 @@ static int do_check(struct bpf_verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg); if (err) @@ -3074,7 +3141,7 @@ static int do_check(struct bpf_verifier_env *env) return err; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); if (err) @@ -3172,7 +3239,8 @@ process_bpf_exit: insn_idx++; } - verbose("processed %d insns\n", insn_processed); + verbose("processed %d insns, stack depth %d\n", + insn_processed, env->prog->aux->stack_depth); return 0; } @@ -3372,11 +3440,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, cnt, delta = 0; + bool is_narrower_load; + u32 target_size; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -3416,12 +3486,52 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; + size = BPF_LDST_BYTES(insn); + + /* If the read access is a narrower load of the field, + * convert to a 4/8-byte load, to minimum program type specific + * convert_ctx_access changes. If conversion is successful, + * we will apply proper mask to the result. + */ + is_narrower_load = size < ctx_field_size; + if (is_narrower_load) { + u32 off = insn->off; + u8 size_code; + + if (type == BPF_WRITE) { + verbose("bpf verifier narrow ctx access misconfigured\n"); + return -EINVAL; + } + + size_code = BPF_H; + if (ctx_field_size == 4) + size_code = BPF_W; + else if (ctx_field_size == 8) + size_code = BPF_DW; + + insn->off = off & ~(ctx_field_size - 1); + insn->code = BPF_LDX | BPF_MEM | size_code; + } + + target_size = 0; + cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, + &target_size); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || + (ctx_field_size && !target_size)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } + if (is_narrower_load && size < target_size) { + if (ctx_field_size <= 4) + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, + (1 << size * 8) - 1); + else + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, + (1 << size * 8) - 1); + } + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@ -3467,6 +3577,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) * the program array. */ prog->cb_access = 1; + env->prog->aux->stack_depth = MAX_BPF_STACK; /* mark bpf_tail_call as different opcode to avoid * conditional branch in the interpeter for every normal @@ -3474,7 +3585,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) * that doesn't support bpf_tail_call yet */ insn->imm = 0; - insn->code |= BPF_X; + insn->code = BPF_JMP | BPF_TAIL_CALL; continue; } diff --git a/kernel/compat.c b/kernel/compat.c index ebd8bdc3fd68..0621c8e1ab72 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -120,6 +120,50 @@ static int __compat_put_timespec(const struct timespec *ts, struct compat_timesp __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; } +static int __compat_get_timespec64(struct timespec64 *ts64, + const struct compat_timespec __user *cts) +{ + struct compat_timespec ts; + int ret; + + ret = copy_from_user(&ts, cts, sizeof(ts)); + if (ret) + return -EFAULT; + + ts64->tv_sec = ts.tv_sec; + ts64->tv_nsec = ts.tv_nsec; + + return 0; +} + +static int __compat_put_timespec64(const struct timespec64 *ts64, + struct compat_timespec __user *cts) +{ + struct compat_timespec ts = { + .tv_sec = ts64->tv_sec, + .tv_nsec = ts64->tv_nsec + }; + return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0; +} + +int compat_get_timespec64(struct timespec64 *ts, const void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0; + else + return __compat_get_timespec64(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_get_timespec64); + +int compat_put_timespec64(const struct timespec64 *ts, void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0; + else + return __compat_put_timespec64(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_put_timespec64); + int compat_get_timeval(struct timeval *tv, const void __user *utv) { if (COMPAT_USE_64BIT_TIME) @@ -396,72 +440,6 @@ int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru) return 0; } -COMPAT_SYSCALL_DEFINE4(wait4, - compat_pid_t, pid, - compat_uint_t __user *, stat_addr, - int, options, - struct compat_rusage __user *, ru) -{ - if (!ru) { - return sys_wait4(pid, stat_addr, options, NULL); - } else { - struct rusage r; - int ret; - unsigned int status; - mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); - ret = sys_wait4(pid, - (stat_addr ? - (unsigned int __user *) &status : NULL), - options, (struct rusage __user *) &r); - set_fs (old_fs); - - if (ret > 0) { - if (put_compat_rusage(&r, ru)) - return -EFAULT; - if (stat_addr && put_user(status, stat_addr)) - return -EFAULT; - } - return ret; - } -} - -COMPAT_SYSCALL_DEFINE5(waitid, - int, which, compat_pid_t, pid, - struct compat_siginfo __user *, uinfo, int, options, - struct compat_rusage __user *, uru) -{ - siginfo_t info; - struct rusage ru; - long ret; - mm_segment_t old_fs = get_fs(); - - memset(&info, 0, sizeof(info)); - - set_fs(KERNEL_DS); - ret = sys_waitid(which, pid, (siginfo_t __user *)&info, options, - uru ? (struct rusage __user *)&ru : NULL); - set_fs(old_fs); - - if ((ret < 0) || (info.si_signo == 0)) - return ret; - - if (uru) { - /* sys_waitid() overwrites everything in ru */ - if (COMPAT_USE_64BIT_TIME) - ret = copy_to_user(uru, &ru, sizeof(ru)); - else - ret = put_compat_rusage(&ru, uru); - if (ret) - return -EFAULT; - } - - BUG_ON(info.si_code & __SI_MASK); - info.si_code |= __SI_CHLD; - return copy_siginfo_to_user32(uinfo, &info); -} - static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr, unsigned len, struct cpumask *new_mask) { @@ -542,6 +520,27 @@ int put_compat_itimerspec(struct compat_itimerspec __user *dst, return 0; } +int get_compat_itimerspec64(struct itimerspec64 *its, + const struct compat_itimerspec __user *uits) +{ + + if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) || + __compat_get_timespec64(&its->it_value, &uits->it_value)) + return -EFAULT; + return 0; +} +EXPORT_SYMBOL_GPL(get_compat_itimerspec64); + +int put_compat_itimerspec64(const struct itimerspec64 *its, + struct compat_itimerspec __user *uits) +{ + if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) || + __compat_put_timespec64(&its->it_value, &uits->it_value)) + return -EFAULT; + return 0; +} +EXPORT_SYMBOL_GPL(put_compat_itimerspec64); + /* * We currently only need the following fields from the sigevent * structure: sigev_value, sigev_signo, sig_notify and (sometimes diff --git a/kernel/events/core.c b/kernel/events/core.c index 4d2c32f98482..1538df9b2b65 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3632,10 +3632,10 @@ static inline u64 perf_event_count(struct perf_event *event) * will not be local and we cannot read them atomically * - must not have a pmu::count method */ -u64 perf_event_read_local(struct perf_event *event) +int perf_event_read_local(struct perf_event *event, u64 *value) { unsigned long flags; - u64 val; + int ret = 0; /* * Disabling interrupts avoids all counter scheduling (context @@ -3643,25 +3643,37 @@ u64 perf_event_read_local(struct perf_event *event) */ local_irq_save(flags); - /* If this is a per-task event, it must be for current */ - WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && - event->hw.target != current); - - /* If this is a per-CPU event, it must be for this CPU */ - WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && - event->cpu != smp_processor_id()); - /* * It must not be an event with inherit set, we cannot read * all child counters from atomic context. */ - WARN_ON_ONCE(event->attr.inherit); + if (event->attr.inherit) { + ret = -EOPNOTSUPP; + goto out; + } /* * It must not have a pmu::count method, those are not * NMI safe. */ - WARN_ON_ONCE(event->pmu->count); + if (event->pmu->count) { + ret = -EOPNOTSUPP; + goto out; + } + + /* If this is a per-task event, it must be for current */ + if ((event->attach_state & PERF_ATTACH_TASK) && + event->hw.target != current) { + ret = -EINVAL; + goto out; + } + + /* If this is a per-CPU event, it must be for this CPU */ + if (!(event->attach_state & PERF_ATTACH_TASK) && + event->cpu != smp_processor_id()) { + ret = -EINVAL; + goto out; + } /* * If the event is currently on this CPU, its either a per-task event, @@ -3671,10 +3683,11 @@ u64 perf_event_read_local(struct perf_event *event) if (event->oncpu == smp_processor_id()) event->pmu->read(event); - val = local64_read(&event->count); + *value = local64_read(&event->count); +out: local_irq_restore(flags); - return val; + return ret; } static int perf_event_read(struct perf_event *event, bool group) @@ -8049,12 +8062,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) bool is_kprobe, is_tracepoint; struct bpf_prog *prog; - if (event->attr.type == PERF_TYPE_HARDWARE || - event->attr.type == PERF_TYPE_SOFTWARE) - return perf_event_set_bpf_handler(event, prog_fd); - if (event->attr.type != PERF_TYPE_TRACEPOINT) - return -EINVAL; + return perf_event_set_bpf_handler(event, prog_fd); if (event->tp_event->prog) return -EEXIST; diff --git a/kernel/exit.c b/kernel/exit.c index c63226283aef..b0cc86a2d00b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -62,6 +62,7 @@ #include <linux/kcov.h> #include <linux/random.h> #include <linux/rcuwait.h> +#include <linux/compat.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -982,14 +983,21 @@ SYSCALL_DEFINE1(exit_group, int, error_code) return 0; } +struct waitid_info { + pid_t pid; + uid_t uid; + int status; + int cause; +}; + struct wait_opts { enum pid_type wo_type; int wo_flags; struct pid *wo_pid; - struct siginfo __user *wo_info; - int __user *wo_stat; - struct rusage __user *wo_rusage; + struct waitid_info *wo_info; + int wo_stat; + struct rusage *wo_rusage; wait_queue_entry_t child_wait; int notask_error; @@ -1036,34 +1044,6 @@ eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) return 1; } -static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, - pid_t pid, uid_t uid, int why, int status) -{ - struct siginfo __user *infop; - int retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - - put_task_struct(p); - infop = wo->wo_info; - if (infop) { - if (!retval) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(pid, &infop->si_pid); - if (!retval) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = put_user(status, &infop->si_status); - } - if (!retval) - retval = pid; - return retval; -} - /* * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold * read_lock(&tasklist_lock) on entry. If we return zero, we still hold @@ -1072,30 +1052,23 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, */ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) { - int state, retval, status; + int state, status; pid_t pid = task_pid_vnr(p); uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); - struct siginfo __user *infop; + struct waitid_info *infop; if (!likely(wo->wo_flags & WEXITED)) return 0; if (unlikely(wo->wo_flags & WNOWAIT)) { - int exit_code = p->exit_code; - int why; - + status = p->exit_code; get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); - - if ((exit_code & 0x7f) == 0) { - why = CLD_EXITED; - status = exit_code >> 8; - } else { - why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; - status = exit_code & 0x7f; - } - return wait_noreap_copyout(wo, p, pid, uid, why, status); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); + goto out_info; } /* * Move the task's state to DEAD/TRACE, only one thread can do this. @@ -1168,38 +1141,11 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) spin_unlock_irq(¤t->sighand->siglock); } - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; - if (!retval && wo->wo_stat) - retval = put_user(status, wo->wo_stat); - - infop = wo->wo_info; - if (!retval && infop) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval && infop) - retval = put_user(0, &infop->si_errno); - if (!retval && infop) { - int why; - - if ((status & 0x7f) == 0) { - why = CLD_EXITED; - status >>= 8; - } else { - why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; - status &= 0x7f; - } - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(status, &infop->si_status); - } - if (!retval && infop) - retval = put_user(pid, &infop->si_pid); - if (!retval && infop) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = pid; + wo->wo_stat = status; if (state == EXIT_TRACE) { write_lock_irq(&tasklist_lock); @@ -1216,7 +1162,21 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) if (state == EXIT_DEAD) release_task(p); - return retval; +out_info: + infop = wo->wo_info; + if (infop) { + if ((status & 0x7f) == 0) { + infop->cause = CLD_EXITED; + infop->status = status >> 8; + } else { + infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; + infop->status = status & 0x7f; + } + infop->pid = pid; + infop->uid = uid; + } + + return pid; } static int *task_stopped_code(struct task_struct *p, bool ptrace) @@ -1252,8 +1212,8 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) static int wait_task_stopped(struct wait_opts *wo, int ptrace, struct task_struct *p) { - struct siginfo __user *infop; - int retval, exit_code, *p_code, why; + struct waitid_info *infop; + int exit_code, *p_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; @@ -1298,34 +1258,21 @@ unlock_sig: why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); sched_annotate_sleep(); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); - if (unlikely(wo->wo_flags & WNOWAIT)) - return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); - - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - if (!retval && wo->wo_stat) - retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); + if (likely(!(wo->wo_flags & WNOWAIT))) + wo->wo_stat = (exit_code << 8) | 0x7f; infop = wo->wo_info; - if (!retval && infop) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval && infop) - retval = put_user(0, &infop->si_errno); - if (!retval && infop) - retval = put_user((short)why, &infop->si_code); - if (!retval && infop) - retval = put_user(exit_code, &infop->si_status); - if (!retval && infop) - retval = put_user(pid, &infop->si_pid); - if (!retval && infop) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = pid; - put_task_struct(p); - - BUG_ON(!retval); - return retval; + if (infop) { + infop->cause = why; + infop->status = exit_code; + infop->pid = pid; + infop->uid = uid; + } + return pid; } /* @@ -1336,7 +1283,7 @@ unlock_sig: */ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) { - int retval; + struct waitid_info *infop; pid_t pid; uid_t uid; @@ -1361,22 +1308,20 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); - if (!wo->wo_info) { - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - put_task_struct(p); - if (!retval && wo->wo_stat) - retval = put_user(0xffff, wo->wo_stat); - if (!retval) - retval = pid; + infop = wo->wo_info; + if (!infop) { + wo->wo_stat = 0xffff; } else { - retval = wait_noreap_copyout(wo, p, pid, uid, - CLD_CONTINUED, SIGCONT); - BUG_ON(retval == 0); + infop->cause = CLD_CONTINUED; + infop->pid = pid; + infop->uid = uid; + infop->status = SIGCONT; } - - return retval; + return pid; } /* @@ -1604,8 +1549,8 @@ end: return retval; } -SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, - infop, int, options, struct rusage __user *, ru) +static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, + int options, struct rusage *ru) { struct wait_opts wo; struct pid *pid = NULL; @@ -1643,38 +1588,46 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, wo.wo_pid = pid; wo.wo_flags = options; wo.wo_info = infop; - wo.wo_stat = NULL; wo.wo_rusage = ru; ret = do_wait(&wo); - if (ret > 0) { + if (ret > 0) ret = 0; - } else if (infop) { - /* - * For a WNOHANG return, clear out all the fields - * we would set so the user can easily tell the - * difference. - */ - if (!ret) - ret = put_user(0, &infop->si_signo); - if (!ret) - ret = put_user(0, &infop->si_errno); - if (!ret) - ret = put_user(0, &infop->si_code); - if (!ret) - ret = put_user(0, &infop->si_pid); - if (!ret) - ret = put_user(0, &infop->si_uid); - if (!ret) - ret = put_user(0, &infop->si_status); - } put_pid(pid); return ret; } -SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, - int, options, struct rusage __user *, ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) +{ + struct rusage r; + struct waitid_info info = {.status = 0}; + long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); + + if (!err) { + if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) + return -EFAULT; + } + if (!infop) + return err; + + user_access_begin(); + unsafe_put_user(err ? 0 : SIGCHLD, &infop->si_signo, Efault); + unsafe_put_user(0, &infop->si_errno, Efault); + unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.pid, &infop->si_pid, Efault); + unsafe_put_user(info.uid, &infop->si_uid, Efault); + unsafe_put_user(info.status, &infop->si_status, Efault); + user_access_end(); + return err; +Efault: + user_access_end(); + return -EFAULT; +} + +long kernel_wait4(pid_t upid, int __user *stat_addr, int options, + struct rusage *ru) { struct wait_opts wo; struct pid *pid = NULL; @@ -1702,14 +1655,29 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, wo.wo_pid = pid; wo.wo_flags = options | WEXITED; wo.wo_info = NULL; - wo.wo_stat = stat_addr; + wo.wo_stat = 0; wo.wo_rusage = ru; ret = do_wait(&wo); put_pid(pid); + if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr)) + ret = -EFAULT; return ret; } +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) +{ + struct rusage r; + long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); + + if (err > 0) { + if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) + return -EFAULT; + } + return err; +} + #ifdef __ARCH_WANT_SYS_WAITPID /* @@ -1722,3 +1690,56 @@ SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) } #endif + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(wait4, + compat_pid_t, pid, + compat_uint_t __user *, stat_addr, + int, options, + struct compat_rusage __user *, ru) +{ + struct rusage r; + long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL); + if (err > 0) { + if (ru && put_compat_rusage(&r, ru)) + return -EFAULT; + } + return err; +} + +COMPAT_SYSCALL_DEFINE5(waitid, + int, which, compat_pid_t, pid, + struct compat_siginfo __user *, infop, int, options, + struct compat_rusage __user *, uru) +{ + struct rusage ru; + struct waitid_info info = {.status = 0}; + long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL); + + if (!err && uru) { + /* kernel_waitid() overwrites everything in ru */ + if (COMPAT_USE_64BIT_TIME) + err = copy_to_user(uru, &ru, sizeof(ru)); + else + err = put_compat_rusage(&ru, uru); + if (err) + return -EFAULT; + } + + if (!infop) + return err; + + user_access_begin(); + unsafe_put_user(err ? 0 : SIGCHLD, &infop->si_signo, Efault); + unsafe_put_user(0, &infop->si_errno, Efault); + unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.pid, &infop->si_pid, Efault); + unsafe_put_user(info.uid, &infop->si_uid, Efault); + unsafe_put_user(info.status, &infop->si_status, Efault); + user_access_end(); + return err; +Efault: + user_access_end(); + return -EFAULT; +} +#endif diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b118735fea9d..766e7e4d3ad9 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -162,16 +162,10 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, } if (cmdline_len) { - image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); - if (!image->cmdline_buf) { - ret = -ENOMEM; - goto out; - } - - ret = copy_from_user(image->cmdline_buf, cmdline_ptr, - cmdline_len); - if (ret) { - ret = -EFAULT; + image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len); + if (IS_ERR(image->cmdline_buf)) { + ret = PTR_ERR(image->cmdline_buf); + image->cmdline_buf = NULL; goto out; } diff --git a/kernel/sys.c b/kernel/sys.c index 8a94b4eabcaa..dab1a0658a92 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1552,7 +1552,7 @@ static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) r->ru_oublock += task_io_get_oublock(t); } -static void k_getrusage(struct task_struct *p, int who, struct rusage *r) +void getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; @@ -1626,20 +1626,16 @@ out: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ } -int getrusage(struct task_struct *p, int who, struct rusage __user *ru) +SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) { struct rusage r; - k_getrusage(p, who, &r); - return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; -} - -SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) -{ if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) return -EINVAL; - return getrusage(current, who, ru); + + getrusage(current, who, &r); + return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } #ifdef CONFIG_COMPAT @@ -1651,7 +1647,7 @@ COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) who != RUSAGE_THREAD) return -EINVAL; - k_getrusage(current, who, &r); + getrusage(current, who, &r); return put_compat_rusage(&r, ru); } #endif diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c991cf212c6d..0b8ff7d257ea 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -712,14 +712,14 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, alarmtimer_freezerset(absexp, type); restart = ¤t->restart_block; if (restart->nanosleep.type != TT_NONE) { - struct timespec rmt; + struct timespec64 rmt; ktime_t rem; rem = ktime_sub(absexp, alarm_bases[type].gettime()); if (rem <= 0) return 0; - rmt = ktime_to_timespec(rem); + rmt = ktime_to_timespec64(rem); return nanosleep_copyout(restart, &rmt); } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 81da124f1115..88f75f92ef36 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1440,17 +1440,17 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); -int nanosleep_copyout(struct restart_block *restart, struct timespec *ts) +int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { #ifdef CONFIG_COMPAT case TT_COMPAT: - if (compat_put_timespec(ts, restart->nanosleep.compat_rmtp)) + if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) return -EFAULT; break; #endif case TT_NATIVE: - if (copy_to_user(restart->nanosleep.rmtp, ts, sizeof(struct timespec))) + if (put_timespec64(ts, restart->nanosleep.rmtp)) return -EFAULT; break; default: @@ -1485,11 +1485,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod restart = ¤t->restart_block; if (restart->nanosleep.type != TT_NONE) { ktime_t rem = hrtimer_expires_remaining(&t->timer); - struct timespec rmt; + struct timespec64 rmt; if (rem <= 0) return 0; - rmt = ktime_to_timespec(rem); + rmt = ktime_to_timespec64(rem); return nanosleep_copyout(restart, &rmt); } @@ -1546,19 +1546,17 @@ out: SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp) { - struct timespec64 tu64; - struct timespec tu; + struct timespec64 tu; - if (copy_from_user(&tu, rqtp, sizeof(tu))) + if (get_timespec64(&tu, rqtp)) return -EFAULT; - tu64 = timespec_to_timespec64(tu); - if (!timespec64_valid(&tu64)) + if (!timespec64_valid(&tu)) return -EINVAL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #ifdef CONFIG_COMPAT @@ -1566,19 +1564,17 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, struct compat_timespec __user *, rmtp) { - struct timespec64 tu64; - struct timespec tu; + struct timespec64 tu; - if (compat_get_timespec(&tu, rqtp)) + if (compat_get_timespec64(&tu, rqtp)) return -EFAULT; - tu64 = timespec_to_timespec64(tu); - if (!timespec64_valid(&tu64)) + if (!timespec64_valid(&tu)) return -EINVAL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(&tu64, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } #endif diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 60cb24ac9ebc..a3bd5dbe0dc4 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1318,12 +1318,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, */ restart = ¤t->restart_block; restart->nanosleep.expires = expires; - if (restart->nanosleep.type != TT_NONE) { - struct timespec ts; - - ts = timespec64_to_timespec(it.it_value); - error = nanosleep_copyout(restart, &ts); - } + if (restart->nanosleep.type != TT_NONE) + error = nanosleep_copyout(restart, &it.it_value); } return error; diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 38f3b20efa29..06f34feb635e 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -41,12 +41,6 @@ SYS_NI(setitimer); #ifdef __ARCH_WANT_SYS_ALARM SYS_NI(alarm); #endif -COMPAT_SYS_NI(timer_create); -COMPAT_SYS_NI(clock_adjtime); -COMPAT_SYS_NI(timer_settime); -COMPAT_SYS_NI(timer_gettime); -COMPAT_SYS_NI(getitimer); -COMPAT_SYS_NI(setitimer); /* * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC @@ -57,40 +51,52 @@ COMPAT_SYS_NI(setitimer); SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { - struct timespec64 new_tp64; - struct timespec new_tp; + struct timespec64 new_tp; if (which_clock != CLOCK_REALTIME) return -EINVAL; - if (copy_from_user(&new_tp, tp, sizeof (*tp))) + if (get_timespec64(&new_tp, tp)) return -EFAULT; - new_tp64 = timespec_to_timespec64(new_tp); - return do_sys_settimeofday64(&new_tp64, NULL); + return do_sys_settimeofday64(&new_tp, NULL); } -SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, - struct timespec __user *,tp) +int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp) { - struct timespec64 kernel_tp64; - struct timespec kernel_tp; - switch (which_clock) { - case CLOCK_REALTIME: ktime_get_real_ts64(&kernel_tp64); break; - case CLOCK_MONOTONIC: ktime_get_ts64(&kernel_tp64); break; - case CLOCK_BOOTTIME: get_monotonic_boottime64(&kernel_tp64); break; - default: return -EINVAL; + case CLOCK_REALTIME: + ktime_get_real_ts64(tp); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(tp); + break; + case CLOCK_BOOTTIME: + get_monotonic_boottime64(tp); + break; + default: + return -EINVAL; } - kernel_tp = timespec64_to_timespec(kernel_tp64); - if (copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) + return 0; +} +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *, tp) +{ + int ret; + struct timespec64 kernel_tp; + + ret = do_clock_gettime(which_clock, &kernel_tp); + if (ret) + return ret; + + if (put_timespec64(&kernel_tp, tp)) return -EFAULT; return 0; } SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { - struct timespec rtn_tp = { + struct timespec64 rtn_tp = { .tv_sec = 0, .tv_nsec = hrtimer_resolution, }; @@ -99,7 +105,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __us case CLOCK_REALTIME: case CLOCK_MONOTONIC: case CLOCK_BOOTTIME: - if (copy_to_user(tp, &rtn_tp, sizeof(rtn_tp))) + if (put_timespec64(&rtn_tp, tp)) return -EFAULT; return 0; default: @@ -138,44 +144,45 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, } #ifdef CONFIG_COMPAT +COMPAT_SYS_NI(timer_create); +COMPAT_SYS_NI(clock_adjtime); +COMPAT_SYS_NI(timer_settime); +COMPAT_SYS_NI(timer_gettime); +COMPAT_SYS_NI(getitimer); +COMPAT_SYS_NI(setitimer); + COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, struct compat_timespec __user *, tp) { - struct timespec64 new_tp64; - struct timespec new_tp; + struct timespec64 new_tp; if (which_clock != CLOCK_REALTIME) return -EINVAL; - if (compat_get_timespec(&new_tp, tp)) + if (compat_get_timespec64(&new_tp, tp)) return -EFAULT; - new_tp64 = timespec_to_timespec64(new_tp); - return do_sys_settimeofday64(&new_tp64, NULL); + return do_sys_settimeofday64(&new_tp, NULL); } -COMPAT_SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, - struct compat_timespec __user *,tp) +COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, + struct compat_timespec __user *, tp) { - struct timespec64 kernel_tp64; - struct timespec kernel_tp; + int ret; + struct timespec64 kernel_tp; - switch (which_clock) { - case CLOCK_REALTIME: ktime_get_real_ts64(&kernel_tp64); break; - case CLOCK_MONOTONIC: ktime_get_ts64(&kernel_tp64); break; - case CLOCK_BOOTTIME: get_monotonic_boottime64(&kernel_tp64); break; - default: return -EINVAL; - } + ret = do_clock_gettime(which_clock, &kernel_tp); + if (ret) + return ret; - kernel_tp = timespec64_to_timespec(kernel_tp64); - if (compat_put_timespec(&kernel_tp, tp)) + if (compat_put_timespec64(&kernel_tp, tp)) return -EFAULT; return 0; } -COMPAT_SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, +COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, struct compat_timespec __user *, tp) { - struct timespec rtn_tp = { + struct timespec64 rtn_tp = { .tv_sec = 0, .tv_nsec = hrtimer_resolution, }; @@ -184,13 +191,14 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, case CLOCK_REALTIME: case CLOCK_MONOTONIC: case CLOCK_BOOTTIME: - if (compat_put_timespec(&rtn_tp, tp)) + if (compat_put_timespec64(&rtn_tp, tp)) return -EFAULT; return 0; default: return -EINVAL; } } + COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, struct compat_timespec __user *, rqtp, struct compat_timespec __user *, rmtp) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 82d67be7d9d1..13d6881f908b 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -739,13 +739,11 @@ static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { - struct itimerspec64 cur_setting64; + struct itimerspec64 cur_setting; - int ret = do_timer_gettime(timer_id, &cur_setting64); + int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { - struct itimerspec cur_setting; - cur_setting = itimerspec64_to_itimerspec(&cur_setting64); - if (copy_to_user(setting, &cur_setting, sizeof (cur_setting))) + if (put_itimerspec64(&cur_setting, setting)) ret = -EFAULT; } return ret; @@ -755,13 +753,11 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct compat_itimerspec __user *, setting) { - struct itimerspec64 cur_setting64; + struct itimerspec64 cur_setting; - int ret = do_timer_gettime(timer_id, &cur_setting64); + int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { - struct itimerspec cur_setting; - cur_setting = itimerspec64_to_itimerspec(&cur_setting64); - if (put_compat_itimerspec(setting, &cur_setting)) + if (put_compat_itimerspec64(&cur_setting, setting)) ret = -EFAULT; } return ret; @@ -907,23 +903,19 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct itimerspec __user *, new_setting, struct itimerspec __user *, old_setting) { - struct itimerspec64 new_spec64, old_spec64; - struct itimerspec64 *rtn = old_setting ? &old_spec64 : NULL; - struct itimerspec new_spec; + struct itimerspec64 new_spec, old_spec; + struct itimerspec64 *rtn = old_setting ? &old_spec : NULL; int error = 0; if (!new_setting) return -EINVAL; - if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) + if (get_itimerspec64(&new_spec, new_setting)) return -EFAULT; - new_spec64 = itimerspec_to_itimerspec64(&new_spec); - error = do_timer_settime(timer_id, flags, &new_spec64, rtn); + error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old_setting) { - struct itimerspec old_spec; - old_spec = itimerspec64_to_itimerspec(&old_spec64); - if (copy_to_user(old_setting, &old_spec, sizeof (old_spec))) + if (put_itimerspec64(&old_spec, old_setting)) error = -EFAULT; } return error; @@ -934,22 +926,18 @@ COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, struct compat_itimerspec __user *, new, struct compat_itimerspec __user *, old) { - struct itimerspec64 new_spec64, old_spec64; - struct itimerspec64 *rtn = old ? &old_spec64 : NULL; - struct itimerspec new_spec; + struct itimerspec64 new_spec, old_spec; + struct itimerspec64 *rtn = old ? &old_spec : NULL; int error = 0; if (!new) return -EINVAL; - if (get_compat_itimerspec(&new_spec, new)) + if (get_compat_itimerspec64(&new_spec, new)) return -EFAULT; - new_spec64 = itimerspec_to_itimerspec64(&new_spec); - error = do_timer_settime(timer_id, flags, &new_spec64, rtn); + error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old) { - struct itimerspec old_spec; - old_spec = itimerspec64_to_itimerspec(&old_spec64); - if (put_compat_itimerspec(old, &old_spec)) + if (put_compat_itimerspec64(&old_spec, old)) error = -EFAULT; } return error; @@ -1049,34 +1037,30 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 new_tp64; - struct timespec new_tp; + struct timespec64 new_tp; if (!kc || !kc->clock_set) return -EINVAL; - if (copy_from_user(&new_tp, tp, sizeof (*tp))) + if (get_timespec64(&new_tp, tp)) return -EFAULT; - new_tp64 = timespec_to_timespec64(new_tp); - return kc->clock_set(which_clock, &new_tp64); + return kc->clock_set(which_clock, &new_tp); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct timespec __user *,tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 kernel_tp64; - struct timespec kernel_tp; + struct timespec64 kernel_tp; int error; if (!kc) return -EINVAL; - error = kc->clock_get(which_clock, &kernel_tp64); - kernel_tp = timespec64_to_timespec(kernel_tp64); + error = kc->clock_get(which_clock, &kernel_tp); - if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) + if (!error && put_timespec64(&kernel_tp, tp)) error = -EFAULT; return error; @@ -1109,17 +1093,15 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 rtn_tp64; - struct timespec rtn_tp; + struct timespec64 rtn_tp; int error; if (!kc) return -EINVAL; - error = kc->clock_getres(which_clock, &rtn_tp64); - rtn_tp = timespec64_to_timespec(rtn_tp64); + error = kc->clock_getres(which_clock, &rtn_tp); - if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) + if (!error && tp && put_timespec64(&rtn_tp, tp)) error = -EFAULT; return error; @@ -1131,38 +1113,33 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, struct compat_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 new_tp64; - struct timespec new_tp; + struct timespec64 ts; if (!kc || !kc->clock_set) return -EINVAL; - if (compat_get_timespec(&new_tp, tp)) + if (compat_get_timespec64(&ts, tp)) return -EFAULT; - new_tp64 = timespec_to_timespec64(new_tp); - - return kc->clock_set(which_clock, &new_tp64); + return kc->clock_set(which_clock, &ts); } COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, struct compat_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 kernel_tp64; - struct timespec kernel_tp; - int error; + struct timespec64 ts; + int err; if (!kc) return -EINVAL; - error = kc->clock_get(which_clock, &kernel_tp64); - kernel_tp = timespec64_to_timespec(kernel_tp64); + err = kc->clock_get(which_clock, &ts); - if (!error && compat_put_timespec(&kernel_tp, tp)) - error = -EFAULT; + if (!err && compat_put_timespec64(&ts, tp)) + err = -EFAULT; - return error; + return err; } COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, @@ -1193,21 +1170,19 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, struct compat_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 rtn_tp64; - struct timespec rtn_tp; - int error; + struct timespec64 ts; + int err; if (!kc) return -EINVAL; - error = kc->clock_getres(which_clock, &rtn_tp64); - rtn_tp = timespec64_to_timespec(rtn_tp64); - - if (!error && tp && compat_put_timespec(&rtn_tp, tp)) - error = -EFAULT; + err = kc->clock_getres(which_clock, &ts); + if (!err && tp && compat_put_timespec64(&ts, tp)) + return -EFAULT; - return error; + return err; } + #endif /* @@ -1226,26 +1201,24 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, struct timespec __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 t64; - struct timespec t; + struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -ENANOSLEEP_NOTSUP; - if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + if (get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return kc->nsleep(which_clock, flags, &t64); + return kc->nsleep(which_clock, flags, &t); } #ifdef CONFIG_COMPAT @@ -1254,26 +1227,24 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, struct compat_timespec __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec64 t64; - struct timespec t; + struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -ENANOSLEEP_NOTSUP; - if (compat_get_timespec(&t, rqtp)) + if (compat_get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return kc->nsleep(which_clock, flags, &t64); + return kc->nsleep(which_clock, flags, &t); } #endif diff --git a/kernel/time/time.c b/kernel/time/time.c index 7c89e437c4d7..44a8c1402133 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -890,3 +890,61 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs, return res; } + +int get_timespec64(struct timespec64 *ts, + const struct timespec __user *uts) +{ + struct timespec kts; + int ret; + + ret = copy_from_user(&kts, uts, sizeof(kts)); + if (ret) + return -EFAULT; + + ts->tv_sec = kts.tv_sec; + ts->tv_nsec = kts.tv_nsec; + + return 0; +} +EXPORT_SYMBOL_GPL(get_timespec64); + +int put_timespec64(const struct timespec64 *ts, + struct timespec __user *uts) +{ + struct timespec kts = { + .tv_sec = ts->tv_sec, + .tv_nsec = ts->tv_nsec + }; + return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(put_timespec64); + +int get_itimerspec64(struct itimerspec64 *it, + const struct itimerspec __user *uit) +{ + int ret; + + ret = get_timespec64(&it->it_interval, &uit->it_interval); + if (ret) + return ret; + + ret = get_timespec64(&it->it_value, &uit->it_value); + + return ret; +} +EXPORT_SYMBOL_GPL(get_itimerspec64); + +int put_itimerspec64(const struct itimerspec64 *it, + struct itimerspec __user *uit) +{ + int ret; + + ret = put_timespec64(&it->it_interval, &uit->it_interval); + if (ret) + return ret; + + ret = put_timespec64(&it->it_value, &uit->it_value); + + return ret; +} +EXPORT_SYMBOL_GPL(put_itimerspec64); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 460a031c77e5..37385193a608 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) } /* - * limited trace_printk() - * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed + * Only limited trace_printk() conversion specifiers allowed: + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s */ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) @@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, i++; } - if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + if (fmt[i] != 'i' && fmt[i] != 'd' && + fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; fmt_cnt++; } @@ -234,7 +235,8 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; - struct perf_event *event; + u64 value = 0; + int err; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; @@ -247,21 +249,14 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - event = ee->event; - if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && - event->attr.type != PERF_TYPE_RAW)) - return -EINVAL; - - /* make sure event is local and doesn't have pmu::count */ - if (unlikely(event->oncpu != cpu || event->pmu->count)) - return -EINVAL; - + err = perf_event_read_local(ee->event, &value); /* - * we don't know if the function is run successfully by the - * return value. It can be judged in other places, such as - * eBPF programs. + * this api is ugly since we miss [-22..-2] range of valid + * counter values, but that's uapi */ - return perf_event_read_local(event); + if (err) + return err; + return value; } static const struct bpf_func_proto bpf_perf_event_read_proto = { @@ -272,14 +267,16 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; +static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); + static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_raw_record *raw) { struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; - struct perf_sample_data sample_data; struct bpf_event_entry *ee; struct perf_event *event; @@ -300,9 +297,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_sample_data_init(&sample_data, 0, 0); - sample_data.raw = raw; - perf_event_output(event, &sample_data, regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = raw; + perf_event_output(event, sd, regs); return 0; } @@ -483,7 +480,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func /* bpf+kprobe programs can access fields of 'struct pt_regs' */ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct pt_regs)) return false; @@ -566,7 +563,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; @@ -585,40 +582,47 @@ const struct bpf_verifier_ops tracepoint_prog_ops = { }; static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + struct bpf_insn_access_aux *info) { + const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, + sample_period); + if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; - if (off == offsetof(struct bpf_perf_event_data, sample_period)) { - if (size != sizeof(u64)) + + switch (off) { + case bpf_ctx_range(struct bpf_perf_event_data, sample_period): + bpf_ctx_record_field_size(info, size_sp); + if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) return false; - } else { + break; + default: if (size != sizeof(long)) return false; } + return true; } static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct bpf_perf_event_data, sample_period): - BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, - offsetof(struct perf_sample_data, period)); + bpf_target_off(struct perf_sample_data, period, 8, + target_size)); break; default: *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, |