diff options
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/arraymap.c | 33 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/cpumap.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/lpm_trie.c | 14 | ||||
| -rw-r--r-- | kernel/bpf/sockmap.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 42 | 
7 files changed, 59 insertions, 39 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b1f66480135b..14750e7c5ee4 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)  {  	int i; -	for (i = 0; i < array->map.max_entries; i++) +	for (i = 0; i < array->map.max_entries; i++) {  		free_percpu(array->pptrs[i]); +		cond_resched(); +	}  }  static int bpf_array_alloc_percpu(struct bpf_array *array) @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)  			return -ENOMEM;  		}  		array->pptrs[i] = ptr; +		cond_resched();  	}  	return 0; @@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)  static struct bpf_map *array_map_alloc(union bpf_attr *attr)  {  	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; -	int numa_node = bpf_map_attr_numa_node(attr); +	int ret, numa_node = bpf_map_attr_numa_node(attr);  	u32 elem_size, index_mask, max_entries;  	bool unpriv = !capable(CAP_SYS_ADMIN); +	u64 cost, array_size, mask64;  	struct bpf_array *array; -	u64 array_size, mask64;  	elem_size = round_up(attr->value_size, 8); @@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)  		array_size += (u64) max_entries * elem_size;  	/* make sure there is no u32 overflow later in round_up() */ -	if (array_size >= U32_MAX - PAGE_SIZE) +	cost = array_size; +	if (cost >= U32_MAX - PAGE_SIZE)  		return ERR_PTR(-ENOMEM); +	if (percpu) { +		cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); +		if (cost >= U32_MAX - PAGE_SIZE) +			return ERR_PTR(-ENOMEM); +	} +	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + +	ret = bpf_map_precharge_memlock(cost); +	if (ret < 0) +		return ERR_PTR(ret);  	/* allocate all map elements and zero-initialize them */  	array = bpf_map_area_alloc(array_size, numa_node); @@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)  	/* copy mandatory map attributes */  	bpf_map_init_from_attr(&array->map, attr); +	array->map.pages = cost;  	array->elem_size = elem_size; -	if (!percpu) -		goto out; - -	array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); - -	if (array_size >= U32_MAX - PAGE_SIZE || -	    bpf_array_alloc_percpu(array)) { +	if (percpu && bpf_array_alloc_percpu(array)) {  		bpf_map_area_free(array);  		return ERR_PTR(-ENOMEM);  	} -out: -	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;  	return &array->map;  } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 29ca9208dcfa..d315b393abdd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,  	 * so always copy 'cnt' prog_ids to the user.  	 * In a rare race the user will see zero prog_ids  	 */ -	ids = kcalloc(cnt, sizeof(u32), GFP_USER); +	ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);  	if (!ids)  		return -ENOMEM;  	rcu_read_lock(); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index fbfdada6caee..a4bb0b34375a 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)  static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,  						       int map_id)  { -	gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; +	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;  	struct bpf_cpu_map_entry *rcpu;  	int numa, err; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 7b469d10d0e9..b4b5b81e7251 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)  	struct lpm_trie_node __rcu **slot;  	struct lpm_trie_node *node; -	raw_spin_lock(&trie->lock); +	/* Wait for outstanding programs to complete +	 * update/lookup/delete/get_next_key and free the trie. +	 */ +	synchronize_rcu();  	/* Always start at the root and walk down to a node that has no  	 * children. Then free that node, nullify its reference in the parent @@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)  		slot = &trie->root;  		for (;;) { -			node = rcu_dereference_protected(*slot, -					lockdep_is_held(&trie->lock)); +			node = rcu_dereference_protected(*slot, 1);  			if (!node) -				goto unlock; +				goto out;  			if (rcu_access_pointer(node->child[0])) {  				slot = &node->child[0]; @@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)  		}  	} -unlock: -	raw_spin_unlock(&trie->lock); +out: +	kfree(trie);  }  static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 48c33417d13c..a927e89dad6e 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,  static struct bpf_map *sock_map_alloc(union bpf_attr *attr)  {  	struct bpf_stab *stab; -	int err = -EINVAL;  	u64 cost; +	int err;  	if (!capable(CAP_NET_ADMIN))  		return ERR_PTR(-EPERM); @@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)  	/* make sure page count doesn't overflow */  	cost = (u64) stab->map.max_entries * sizeof(struct sock *); +	err = -EINVAL;  	if (cost >= U32_MAX - PAGE_SIZE)  		goto free_stab; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e24aa3241387..43f95d190eea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1845,7 +1845,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz  	union bpf_attr attr = {};  	int err; -	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) +	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))  		return -EPERM;  	err = check_uarg_tail_zero(uattr, sizeof(attr), size); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fb69a85d967..c6eff108aa99 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1356,6 +1356,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)  	return reg->type == PTR_TO_CTX;  } +static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) +{ +	const struct bpf_reg_state *reg = cur_regs(env) + regno; + +	return type_is_pkt_pointer(reg->type); +} +  static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,  				   const struct bpf_reg_state *reg,  				   int off, int size, bool strict) @@ -1416,10 +1423,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,  }  static int check_ptr_alignment(struct bpf_verifier_env *env, -			       const struct bpf_reg_state *reg, -			       int off, int size) +			       const struct bpf_reg_state *reg, int off, +			       int size, bool strict_alignment_once)  { -	bool strict = env->strict_alignment; +	bool strict = env->strict_alignment || strict_alignment_once;  	const char *pointer_desc = "";  	switch (reg->type) { @@ -1576,9 +1583,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)   * if t==write && value_regno==-1, some unknown value is stored into memory   * if t==read && value_regno==-1, don't care what we read from memory   */ -static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, -			    int bpf_size, enum bpf_access_type t, -			    int value_regno) +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, +			    int off, int bpf_size, enum bpf_access_type t, +			    int value_regno, bool strict_alignment_once)  {  	struct bpf_reg_state *regs = cur_regs(env);  	struct bpf_reg_state *reg = regs + regno; @@ -1590,7 +1597,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn  		return size;  	/* alignment checks will add in reg->off themselves */ -	err = check_ptr_alignment(env, reg, off, size); +	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);  	if (err)  		return err; @@ -1735,21 +1742,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins  		return -EACCES;  	} -	if (is_ctx_reg(env, insn->dst_reg)) { -		verbose(env, "BPF_XADD stores into R%d context is not allowed\n", -			insn->dst_reg); +	if (is_ctx_reg(env, insn->dst_reg) || +	    is_pkt_reg(env, insn->dst_reg)) { +		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", +			insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? +			"context" : "packet");  		return -EACCES;  	}  	/* check whether atomic_add can read the memory */  	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, -			       BPF_SIZE(insn->code), BPF_READ, -1); +			       BPF_SIZE(insn->code), BPF_READ, -1, true);  	if (err)  		return err;  	/* check whether atomic_add can write into the same memory */  	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, -				BPF_SIZE(insn->code), BPF_WRITE, -1); +				BPF_SIZE(insn->code), BPF_WRITE, -1, true);  }  /* when register 'regno' is passed into function that will read 'access_size' @@ -2388,7 +2397,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn  	 * is inferred from register state.  	 */  	for (i = 0; i < meta.access_size; i++) { -		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); +		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, +				       BPF_WRITE, -1, false);  		if (err)  			return err;  	} @@ -4632,7 +4642,7 @@ static int do_check(struct bpf_verifier_env *env)  			 */  			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,  					       BPF_SIZE(insn->code), BPF_READ, -					       insn->dst_reg); +					       insn->dst_reg, false);  			if (err)  				return err; @@ -4684,7 +4694,7 @@ static int do_check(struct bpf_verifier_env *env)  			/* check that memory (dst_reg + off) is writeable */  			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,  					       BPF_SIZE(insn->code), BPF_WRITE, -					       insn->src_reg); +					       insn->src_reg, false);  			if (err)  				return err; @@ -4719,7 +4729,7 @@ static int do_check(struct bpf_verifier_env *env)  			/* check that memory (dst_reg + off) is writeable */  			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,  					       BPF_SIZE(insn->code), BPF_WRITE, -					       -1); +					       -1, false);  			if (err)  				return err;  | 

