diff options
| author | David S. Miller <davem@davemloft.net> | 2019-06-20 00:06:27 -0400 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2019-06-20 00:06:27 -0400 | 
| commit | dca73a65a68329ee386d3ff473152bac66eaab39 (patch) | |
| tree | 97c41afb932bdd6cbe67e7ffc38bfe5952c97798 | |
| parent | 497ad9f5b2dc86b733761b9afa44ecfa2f17be65 (diff) | |
| parent | 94079b64255fe40b9b53fd2e4081f68b9b14f54a (diff) | |
| download | blackbird-op-linux-dca73a65a68329ee386d3ff473152bac66eaab39.tar.gz blackbird-op-linux-dca73a65a68329ee386d3ff473152bac66eaab39.zip  | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2019-06-19
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) new SO_REUSEPORT_DETACH_BPF setsocktopt, from Martin.
2) BTF based map definition, from Andrii.
3) support bpf_map_lookup_elem for xskmap, from Jonathan.
4) bounded loops and scalar precision logic in the verifier, from Alexei.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
101 files changed, 4048 insertions, 860 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 976e89b116e5..de6c4df61082 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -122,6 +122,8 @@  #define SO_RCVTIMEO_NEW         66  #define SO_SNDTIMEO_NEW         67 +#define SO_DETACH_REUSEPORT_BPF 68 +  #if !defined(__KERNEL__)  #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d41765cfbc6e..d0a9ed2ca2d6 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -133,6 +133,8 @@  #define SO_RCVTIMEO_NEW         66  #define SO_SNDTIMEO_NEW         67 +#define SO_DETACH_REUSEPORT_BPF 68 +  #if !defined(__KERNEL__)  #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 66c5dd245ac7..10173c32195e 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -114,6 +114,8 @@  #define SO_RCVTIMEO_NEW         0x4040  #define SO_SNDTIMEO_NEW         0x4041 +#define SO_DETACH_REUSEPORT_BPF 0x4042 +  #if !defined(__KERNEL__)  #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 9265a9eece15..8029b681fc7c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -115,6 +115,8 @@  #define SO_RCVTIMEO_NEW          0x0044  #define SO_SNDTIMEO_NEW          0x0045 +#define SO_DETACH_REUSEPORT_BPF  0x0047 +  #if !defined(__KERNEL__) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9f7c453db70c..a62e7889b0b6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -277,6 +277,7 @@ enum bpf_reg_type {  	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */  	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */  	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */ +	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */  };  /* The information passed from prog-specific *_is_valid_access @@ -1098,6 +1099,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,  				    struct bpf_insn *insn_buf,  				    struct bpf_prog *prog,  				    u32 *target_size); + +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, +				  struct bpf_insn_access_aux *info); + +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, +				    const struct bpf_insn *si, +				    struct bpf_insn *insn_buf, +				    struct bpf_prog *prog, +				    u32 *target_size);  #else  static inline bool bpf_tcp_sock_is_valid_access(int off, int size,  						enum bpf_access_type type, @@ -1114,6 +1124,21 @@ static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,  {  	return 0;  } +static inline bool bpf_xdp_sock_is_valid_access(int off, int size, +						enum bpf_access_type type, +						struct bpf_insn_access_aux *info) +{ +	return false; +} + +static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, +						  const struct bpf_insn *si, +						  struct bpf_insn *insn_buf, +						  struct bpf_prog *prog, +						  u32 *target_size) +{ +	return 0; +}  #endif /* CONFIG_INET */  #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 026ba8b81e88..5fe99f322b1c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -136,6 +136,8 @@ struct bpf_reg_state {  	 */  	s32 subreg_def;  	enum bpf_reg_liveness live; +	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */ +	bool precise;  };  enum bpf_stack_slot_type { @@ -187,14 +189,77 @@ struct bpf_func_state {  	struct bpf_stack_state *stack;  }; +struct bpf_idx_pair { +	u32 prev_idx; +	u32 idx; +}; +  #define MAX_CALL_FRAMES 8  struct bpf_verifier_state {  	/* call stack tracking */  	struct bpf_func_state *frame[MAX_CALL_FRAMES]; +	struct bpf_verifier_state *parent; +	/* +	 * 'branches' field is the number of branches left to explore: +	 * 0 - all possible paths from this state reached bpf_exit or +	 * were safely pruned +	 * 1 - at least one path is being explored. +	 * This state hasn't reached bpf_exit +	 * 2 - at least two paths are being explored. +	 * This state is an immediate parent of two children. +	 * One is fallthrough branch with branches==1 and another +	 * state is pushed into stack (to be explored later) also with +	 * branches==1. The parent of this state has branches==1. +	 * The verifier state tree connected via 'parent' pointer looks like: +	 * 1 +	 * 1 +	 * 2 -> 1 (first 'if' pushed into stack) +	 * 1 +	 * 2 -> 1 (second 'if' pushed into stack) +	 * 1 +	 * 1 +	 * 1 bpf_exit. +	 * +	 * Once do_check() reaches bpf_exit, it calls update_branch_counts() +	 * and the verifier state tree will look: +	 * 1 +	 * 1 +	 * 2 -> 1 (first 'if' pushed into stack) +	 * 1 +	 * 1 -> 1 (second 'if' pushed into stack) +	 * 0 +	 * 0 +	 * 0 bpf_exit. +	 * After pop_stack() the do_check() will resume at second 'if'. +	 * +	 * If is_state_visited() sees a state with branches > 0 it means +	 * there is a loop. If such state is exactly equal to the current state +	 * it's an infinite loop. Note states_equal() checks for states +	 * equvalency, so two states being 'states_equal' does not mean +	 * infinite loop. The exact comparison is provided by +	 * states_maybe_looping() function. It's a stronger pre-check and +	 * much faster than states_equal(). +	 * +	 * This algorithm may not find all possible infinite loops or +	 * loop iteration count may be too high. +	 * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in. +	 */ +	u32 branches;  	u32 insn_idx;  	u32 curframe;  	u32 active_spin_lock;  	bool speculative; + +	/* first and last insn idx of this verifier state */ +	u32 first_insn_idx; +	u32 last_insn_idx; +	/* jmp history recorded from first to last. +	 * backtracking is using it to go from last to first. +	 * For most states jmp_history_cnt is [0-3]. +	 * For loops can go up to ~40. +	 */ +	struct bpf_idx_pair *jmp_history; +	u32 jmp_history_cnt;  };  #define bpf_get_spilled_reg(slot, frame)				\ @@ -309,7 +374,9 @@ struct bpf_verifier_env {  	} cfg;  	u32 subprog_cnt;  	/* number of instructions analyzed by the verifier */ -	u32 insn_processed; +	u32 prev_insn_processed, insn_processed; +	/* number of jmps, calls, exits analyzed so far */ +	u32 prev_jmps_processed, jmps_processed;  	/* total verification time */  	u64 verification_time;  	/* maximum number of verifier states kept in 'branching' instructions */ diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 8a5f70c7cdf2..d9112de85261 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -35,6 +35,8 @@ extern struct sock *reuseport_select_sock(struct sock *sk,  					  struct sk_buff *skb,  					  int hdr_len);  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); +extern int reuseport_detach_prog(struct sock *sk); +  int reuseport_get_id(struct sock_reuseport *reuse);  #endif  /* _SOCK_REUSEPORT_H */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..ae0f368a62bb 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,11 +58,11 @@ struct xdp_sock {  	struct xdp_umem *umem;  	struct list_head flush_node;  	u16 queue_id; -	struct xsk_queue *tx ____cacheline_aligned_in_smp; -	struct list_head list;  	bool zc;  	/* Protects multiple processes in the control path */  	struct mutex mutex; +	struct xsk_queue *tx ____cacheline_aligned_in_smp; +	struct list_head list;  	/* Mutual exclusion of NAPI TX thread and sendmsg error paths  	 * in the SKB destructor callback.  	 */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8c1391c89171..77f7c1638eb1 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -117,6 +117,8 @@  #define SO_RCVTIMEO_NEW         66  #define SO_SNDTIMEO_NEW         67 +#define SO_DETACH_REUSEPORT_BPF 68 +  #if !defined(__KERNEL__)  #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e879721f75a..b077507efa3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3085,6 +3085,10 @@ struct bpf_sock_tuple {  	};  }; +struct bpf_xdp_sock { +	__u32 queue_id; +}; +  #define XDP_PACKET_HEADROOM 256  /* User return codes for XDP prog type. @@ -3245,6 +3249,7 @@ struct bpf_sock_addr {  	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.  				 * Stored in network byte order.  				 */ +	__bpf_md_ptr(struct bpf_sock *, sk);  };  /* User bpf_sock_ops struct to access socket values and specify request ops @@ -3296,6 +3301,7 @@ struct bpf_sock_ops {  	__u32 sk_txhash;  	__u64 bytes_received;  	__u64 bytes_acked; +	__bpf_md_ptr(struct bpf_sock *, sk);  };  /* Definitions for bpf_sock_ops_cb_flags */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 4c2fa3ac56f6..29d781061cd5 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,6 @@  # SPDX-License-Identifier: GPL-2.0  obj-y := core.o +CFLAGS_core.o += $(call cc-disable-warning, override-init)  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b84c44505e06..40e86a7e0ef0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -80,8 +80,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)  static struct bpf_map *dev_map_alloc(union bpf_attr *attr)  {  	struct bpf_dtab *dtab; -	int err = -EINVAL;  	u64 cost; +	int err;  	if (!capable(CAP_NET_ADMIN))  		return ERR_PTR(-EPERM); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1e9d10b32984..0e079b2298f8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -326,7 +326,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)  {  	return type == PTR_TO_SOCKET ||  		type == PTR_TO_SOCK_COMMON || -		type == PTR_TO_TCP_SOCK; +		type == PTR_TO_TCP_SOCK || +		type == PTR_TO_XDP_SOCK;  }  static bool reg_type_may_be_null(enum bpf_reg_type type) @@ -398,6 +399,7 @@ static const char * const reg_type_str[] = {  	[PTR_TO_TCP_SOCK]	= "tcp_sock",  	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",  	[PTR_TO_TP_BUFFER]	= "tp_buffer", +	[PTR_TO_XDP_SOCK]	= "xdp_sock",  };  static char slot_type_char[] = { @@ -445,12 +447,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,  		verbose(env, " R%d", i);  		print_liveness(env, reg->live);  		verbose(env, "=%s", reg_type_str[t]); +		if (t == SCALAR_VALUE && reg->precise) +			verbose(env, "P");  		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&  		    tnum_is_const(reg->var_off)) {  			/* reg->off should be 0 for SCALAR_VALUE */  			verbose(env, "%lld", reg->var_off.value + reg->off); -			if (t == PTR_TO_STACK) -				verbose(env, ",call_%d", func(env, reg)->callsite);  		} else {  			verbose(env, "(id=%d", reg->id);  			if (reg_type_may_be_refcounted_or_null(t)) @@ -512,11 +514,17 @@ static void print_verifier_state(struct bpf_verifier_env *env,  			continue;  		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);  		print_liveness(env, state->stack[i].spilled_ptr.live); -		if (state->stack[i].slot_type[0] == STACK_SPILL) -			verbose(env, "=%s", -				reg_type_str[state->stack[i].spilled_ptr.type]); -		else +		if (state->stack[i].slot_type[0] == STACK_SPILL) { +			reg = &state->stack[i].spilled_ptr; +			t = reg->type; +			verbose(env, "=%s", reg_type_str[t]); +			if (t == SCALAR_VALUE && reg->precise) +				verbose(env, "P"); +			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) +				verbose(env, "%lld", reg->var_off.value + reg->off); +		} else {  			verbose(env, "=%s", types_buf); +		}  	}  	if (state->acquired_refs && state->refs[0].id) {  		verbose(env, " refs=%d", state->refs[0].id); @@ -665,6 +673,13 @@ static void free_func_state(struct bpf_func_state *state)  	kfree(state);  } +static void clear_jmp_history(struct bpf_verifier_state *state) +{ +	kfree(state->jmp_history); +	state->jmp_history = NULL; +	state->jmp_history_cnt = 0; +} +  static void free_verifier_state(struct bpf_verifier_state *state,  				bool free_self)  { @@ -674,6 +689,7 @@ static void free_verifier_state(struct bpf_verifier_state *state,  		free_func_state(state->frame[i]);  		state->frame[i] = NULL;  	} +	clear_jmp_history(state);  	if (free_self)  		kfree(state);  } @@ -701,8 +717,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,  			       const struct bpf_verifier_state *src)  {  	struct bpf_func_state *dst; +	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;  	int i, err; +	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { +		kfree(dst_state->jmp_history); +		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); +		if (!dst_state->jmp_history) +			return -ENOMEM; +	} +	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); +	dst_state->jmp_history_cnt = src->jmp_history_cnt; +  	/* if dst has more stack frames then src frame, free them */  	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {  		free_func_state(dst_state->frame[i]); @@ -711,6 +737,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,  	dst_state->speculative = src->speculative;  	dst_state->curframe = src->curframe;  	dst_state->active_spin_lock = src->active_spin_lock; +	dst_state->branches = src->branches; +	dst_state->parent = src->parent; +	dst_state->first_insn_idx = src->first_insn_idx; +	dst_state->last_insn_idx = src->last_insn_idx;  	for (i = 0; i <= src->curframe; i++) {  		dst = dst_state->frame[i];  		if (!dst) { @@ -726,6 +756,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,  	return 0;  } +static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ +	while (st) { +		u32 br = --st->branches; + +		/* WARN_ON(br > 1) technically makes sense here, +		 * but see comment in push_stack(), hence: +		 */ +		WARN_ONCE((int)br < 0, +			  "BUG update_branch_counts:branches_to_explore=%d\n", +			  br); +		if (br) +			break; +		st = st->parent; +	} +} +  static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,  		     int *insn_idx)  { @@ -779,6 +826,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,  			env->stack_size);  		goto err;  	} +	if (elem->st.parent) { +		++elem->st.parent->branches; +		/* WARN_ON(branches > 2) technically makes sense here, +		 * but +		 * 1. speculative states will bump 'branches' for non-branch +		 * instructions +		 * 2. is_state_visited() heuristics may decide not to create +		 * a new state for a sequence of branches and all such current +		 * and cloned states will be pointing to a single parent state +		 * which might have large 'branches' count. +		 */ +	}  	return &elem->st;  err:  	free_verifier_state(env->cur_state, true); @@ -926,6 +985,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)  	reg->smax_value = S64_MAX;  	reg->umin_value = 0;  	reg->umax_value = U64_MAX; + +	/* constant backtracking is enabled for root only for now */ +	reg->precise = capable(CAP_SYS_ADMIN) ? false : true;  }  /* Mark a register as having a completely unknown (scalar) value. */ @@ -1337,6 +1399,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,  	return 0;  } +/* for any branch, call, exit record the history of jmps in the given state */ +static int push_jmp_history(struct bpf_verifier_env *env, +			    struct bpf_verifier_state *cur) +{ +	u32 cnt = cur->jmp_history_cnt; +	struct bpf_idx_pair *p; + +	cnt++; +	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); +	if (!p) +		return -ENOMEM; +	p[cnt - 1].idx = env->insn_idx; +	p[cnt - 1].prev_idx = env->prev_insn_idx; +	cur->jmp_history = p; +	cur->jmp_history_cnt = cnt; +	return 0; +} + +/* Backtrack one insn at a time. If idx is not at the top of recorded + * history then previous instruction came from straight line execution. + */ +static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, +			     u32 *history) +{ +	u32 cnt = *history; + +	if (cnt && st->jmp_history[cnt - 1].idx == i) { +		i = st->jmp_history[cnt - 1].prev_idx; +		(*history)--; +	} else { +		i--; +	} +	return i; +} + +/* For given verifier state backtrack_insn() is called from the last insn to + * the first insn. Its purpose is to compute a bitmask of registers and + * stack slots that needs precision in the parent verifier state. + */ +static int backtrack_insn(struct bpf_verifier_env *env, int idx, +			  u32 *reg_mask, u64 *stack_mask) +{ +	const struct bpf_insn_cbs cbs = { +		.cb_print	= verbose, +		.private_data	= env, +	}; +	struct bpf_insn *insn = env->prog->insnsi + idx; +	u8 class = BPF_CLASS(insn->code); +	u8 opcode = BPF_OP(insn->code); +	u8 mode = BPF_MODE(insn->code); +	u32 dreg = 1u << insn->dst_reg; +	u32 sreg = 1u << insn->src_reg; +	u32 spi; + +	if (insn->code == 0) +		return 0; +	if (env->log.level & BPF_LOG_LEVEL) { +		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); +		verbose(env, "%d: ", idx); +		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); +	} + +	if (class == BPF_ALU || class == BPF_ALU64) { +		if (!(*reg_mask & dreg)) +			return 0; +		if (opcode == BPF_MOV) { +			if (BPF_SRC(insn->code) == BPF_X) { +				/* dreg = sreg +				 * dreg needs precision after this insn +				 * sreg needs precision before this insn +				 */ +				*reg_mask &= ~dreg; +				*reg_mask |= sreg; +			} else { +				/* dreg = K +				 * dreg needs precision after this insn. +				 * Corresponding register is already marked +				 * as precise=true in this verifier state. +				 * No further markings in parent are necessary +				 */ +				*reg_mask &= ~dreg; +			} +		} else { +			if (BPF_SRC(insn->code) == BPF_X) { +				/* dreg += sreg +				 * both dreg and sreg need precision +				 * before this insn +				 */ +				*reg_mask |= sreg; +			} /* else dreg += K +			   * dreg still needs precision before this insn +			   */ +		} +	} else if (class == BPF_LDX) { +		if (!(*reg_mask & dreg)) +			return 0; +		*reg_mask &= ~dreg; + +		/* scalars can only be spilled into stack w/o losing precision. +		 * Load from any other memory can be zero extended. +		 * The desire to keep that precision is already indicated +		 * by 'precise' mark in corresponding register of this state. +		 * No further tracking necessary. +		 */ +		if (insn->src_reg != BPF_REG_FP) +			return 0; +		if (BPF_SIZE(insn->code) != BPF_DW) +			return 0; + +		/* dreg = *(u64 *)[fp - off] was a fill from the stack. +		 * that [fp - off] slot contains scalar that needs to be +		 * tracked with precision +		 */ +		spi = (-insn->off - 1) / BPF_REG_SIZE; +		if (spi >= 64) { +			verbose(env, "BUG spi %d\n", spi); +			WARN_ONCE(1, "verifier backtracking bug"); +			return -EFAULT; +		} +		*stack_mask |= 1ull << spi; +	} else if (class == BPF_STX) { +		if (*reg_mask & dreg) +			/* stx shouldn't be using _scalar_ dst_reg +			 * to access memory. It means backtracking +			 * encountered a case of pointer subtraction. +			 */ +			return -ENOTSUPP; +		/* scalars can only be spilled into stack */ +		if (insn->dst_reg != BPF_REG_FP) +			return 0; +		if (BPF_SIZE(insn->code) != BPF_DW) +			return 0; +		spi = (-insn->off - 1) / BPF_REG_SIZE; +		if (spi >= 64) { +			verbose(env, "BUG spi %d\n", spi); +			WARN_ONCE(1, "verifier backtracking bug"); +			return -EFAULT; +		} +		if (!(*stack_mask & (1ull << spi))) +			return 0; +		*stack_mask &= ~(1ull << spi); +		*reg_mask |= sreg; +	} else if (class == BPF_JMP || class == BPF_JMP32) { +		if (opcode == BPF_CALL) { +			if (insn->src_reg == BPF_PSEUDO_CALL) +				return -ENOTSUPP; +			/* regular helper call sets R0 */ +			*reg_mask &= ~1; +			if (*reg_mask & 0x3f) { +				/* if backtracing was looking for registers R1-R5 +				 * they should have been found already. +				 */ +				verbose(env, "BUG regs %x\n", *reg_mask); +				WARN_ONCE(1, "verifier backtracking bug"); +				return -EFAULT; +			} +		} else if (opcode == BPF_EXIT) { +			return -ENOTSUPP; +		} +	} else if (class == BPF_LD) { +		if (!(*reg_mask & dreg)) +			return 0; +		*reg_mask &= ~dreg; +		/* It's ld_imm64 or ld_abs or ld_ind. +		 * For ld_imm64 no further tracking of precision +		 * into parent is necessary +		 */ +		if (mode == BPF_IND || mode == BPF_ABS) +			/* to be analyzed */ +			return -ENOTSUPP; +	} else if (class == BPF_ST) { +		if (*reg_mask & dreg) +			/* likely pointer subtraction */ +			return -ENOTSUPP; +	} +	return 0; +} + +/* the scalar precision tracking algorithm: + * . at the start all registers have precise=false. + * . scalar ranges are tracked as normal through alu and jmp insns. + * . once precise value of the scalar register is used in: + *   .  ptr + scalar alu + *   . if (scalar cond K|scalar) + *   .  helper_call(.., scalar, ...) where ARG_CONST is expected + *   backtrack through the verifier states and mark all registers and + *   stack slots with spilled constants that these scalar regisers + *   should be precise. + * . during state pruning two registers (or spilled stack slots) + *   are equivalent if both are not precise. + * + * Note the verifier cannot simply walk register parentage chain, + * since many different registers and stack slots could have been + * used to compute single precise scalar. + * + * The approach of starting with precise=true for all registers and then + * backtrack to mark a register as not precise when the verifier detects + * that program doesn't care about specific value (e.g., when helper + * takes register as ARG_ANYTHING parameter) is not safe. + * + * It's ok to walk single parentage chain of the verifier states. + * It's possible that this backtracking will go all the way till 1st insn. + * All other branches will be explored for needing precision later. + * + * The backtracking needs to deal with cases like: + *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) + * r9 -= r8 + * r5 = r9 + * if r5 > 0x79f goto pc+7 + *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) + * r5 += 1 + * ... + * call bpf_perf_event_output#25 + *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO + * + * and this case: + * r6 = 1 + * call foo // uses callee's r6 inside to compute r0 + * r0 += r6 + * if r0 == 0 goto + * + * to track above reg_mask/stack_mask needs to be independent for each frame. + * + * Also if parent's curframe > frame where backtracking started, + * the verifier need to mark registers in both frames, otherwise callees + * may incorrectly prune callers. This is similar to + * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") + * + * For now backtracking falls back into conservative marking. + */ +static void mark_all_scalars_precise(struct bpf_verifier_env *env, +				     struct bpf_verifier_state *st) +{ +	struct bpf_func_state *func; +	struct bpf_reg_state *reg; +	int i, j; + +	/* big hammer: mark all scalars precise in this path. +	 * pop_stack may still get !precise scalars. +	 */ +	for (; st; st = st->parent) +		for (i = 0; i <= st->curframe; i++) { +			func = st->frame[i]; +			for (j = 0; j < BPF_REG_FP; j++) { +				reg = &func->regs[j]; +				if (reg->type != SCALAR_VALUE) +					continue; +				reg->precise = true; +			} +			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { +				if (func->stack[j].slot_type[0] != STACK_SPILL) +					continue; +				reg = &func->stack[j].spilled_ptr; +				if (reg->type != SCALAR_VALUE) +					continue; +				reg->precise = true; +			} +		} +} + +static int mark_chain_precision(struct bpf_verifier_env *env, int regno) +{ +	struct bpf_verifier_state *st = env->cur_state; +	int first_idx = st->first_insn_idx; +	int last_idx = env->insn_idx; +	struct bpf_func_state *func; +	struct bpf_reg_state *reg; +	u32 reg_mask = 1u << regno; +	u64 stack_mask = 0; +	bool skip_first = true; +	int i, err; + +	if (!env->allow_ptr_leaks) +		/* backtracking is root only for now */ +		return 0; + +	func = st->frame[st->curframe]; +	reg = &func->regs[regno]; +	if (reg->type != SCALAR_VALUE) { +		WARN_ONCE(1, "backtracing misuse"); +		return -EFAULT; +	} +	if (reg->precise) +		return 0; +	func->regs[regno].precise = true; + +	for (;;) { +		DECLARE_BITMAP(mask, 64); +		bool new_marks = false; +		u32 history = st->jmp_history_cnt; + +		if (env->log.level & BPF_LOG_LEVEL) +			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); +		for (i = last_idx;;) { +			if (skip_first) { +				err = 0; +				skip_first = false; +			} else { +				err = backtrack_insn(env, i, ®_mask, &stack_mask); +			} +			if (err == -ENOTSUPP) { +				mark_all_scalars_precise(env, st); +				return 0; +			} else if (err) { +				return err; +			} +			if (!reg_mask && !stack_mask) +				/* Found assignment(s) into tracked register in this state. +				 * Since this state is already marked, just return. +				 * Nothing to be tracked further in the parent state. +				 */ +				return 0; +			if (i == first_idx) +				break; +			i = get_prev_insn_idx(st, i, &history); +			if (i >= env->prog->len) { +				/* This can happen if backtracking reached insn 0 +				 * and there are still reg_mask or stack_mask +				 * to backtrack. +				 * It means the backtracking missed the spot where +				 * particular register was initialized with a constant. +				 */ +				verbose(env, "BUG backtracking idx %d\n", i); +				WARN_ONCE(1, "verifier backtracking bug"); +				return -EFAULT; +			} +		} +		st = st->parent; +		if (!st) +			break; + +		func = st->frame[st->curframe]; +		bitmap_from_u64(mask, reg_mask); +		for_each_set_bit(i, mask, 32) { +			reg = &func->regs[i]; +			if (reg->type != SCALAR_VALUE) +				continue; +			if (!reg->precise) +				new_marks = true; +			reg->precise = true; +		} + +		bitmap_from_u64(mask, stack_mask); +		for_each_set_bit(i, mask, 64) { +			if (i >= func->allocated_stack / BPF_REG_SIZE) { +				/* This can happen if backtracking +				 * is propagating stack precision where +				 * caller has larger stack frame +				 * than callee, but backtrack_insn() should +				 * have returned -ENOTSUPP. +				 */ +				verbose(env, "BUG spi %d stack_size %d\n", +					i, func->allocated_stack); +				WARN_ONCE(1, "verifier backtracking bug"); +				return -EFAULT; +			} + +			if (func->stack[i].slot_type[0] != STACK_SPILL) +				continue; +			reg = &func->stack[i].spilled_ptr; +			if (reg->type != SCALAR_VALUE) +				continue; +			if (!reg->precise) +				new_marks = true; +			reg->precise = true; +		} +		if (env->log.level & BPF_LOG_LEVEL) { +			print_verifier_state(env, func); +			verbose(env, "parent %s regs=%x stack=%llx marks\n", +				new_marks ? "didn't have" : "already had", +				reg_mask, stack_mask); +		} + +		if (!new_marks) +			break; + +		last_idx = st->last_insn_idx; +		first_idx = st->first_insn_idx; +	} +	return 0; +} + +  static bool is_spillable_regtype(enum bpf_reg_type type)  {  	switch (type) { @@ -1355,6 +1800,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)  	case PTR_TO_SOCK_COMMON_OR_NULL:  	case PTR_TO_TCP_SOCK:  	case PTR_TO_TCP_SOCK_OR_NULL: +	case PTR_TO_XDP_SOCK:  		return true;  	default:  		return false; @@ -1367,6 +1813,23 @@ static bool register_is_null(struct bpf_reg_state *reg)  	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);  } +static bool register_is_const(struct bpf_reg_state *reg) +{ +	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); +} + +static void save_register_state(struct bpf_func_state *state, +				int spi, struct bpf_reg_state *reg) +{ +	int i; + +	state->stack[spi].spilled_ptr = *reg; +	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + +	for (i = 0; i < BPF_REG_SIZE; i++) +		state->stack[spi].slot_type[i] = STACK_SPILL; +} +  /* check_stack_read/write functions track spill/fill of registers,   * stack boundary and alignment are checked in check_mem_access()   */ @@ -1376,7 +1839,8 @@ static int check_stack_write(struct bpf_verifier_env *env,  {  	struct bpf_func_state *cur; /* state of the current function */  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; -	enum bpf_reg_type type; +	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; +	struct bpf_reg_state *reg = NULL;  	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),  				 state->acquired_refs, true); @@ -1393,27 +1857,48 @@ static int check_stack_write(struct bpf_verifier_env *env,  	}  	cur = env->cur_state->frame[env->cur_state->curframe]; -	if (value_regno >= 0 && -	    is_spillable_regtype((type = cur->regs[value_regno].type))) { - +	if (value_regno >= 0) +		reg = &cur->regs[value_regno]; + +	if (reg && size == BPF_REG_SIZE && register_is_const(reg) && +	    !register_is_null(reg) && env->allow_ptr_leaks) { +		if (dst_reg != BPF_REG_FP) { +			/* The backtracking logic can only recognize explicit +			 * stack slot address like [fp - 8]. Other spill of +			 * scalar via different register has to be conervative. +			 * Backtrack from here and mark all registers as precise +			 * that contributed into 'reg' being a constant. +			 */ +			err = mark_chain_precision(env, value_regno); +			if (err) +				return err; +		} +		save_register_state(state, spi, reg); +	} else if (reg && is_spillable_regtype(reg->type)) {  		/* register containing pointer is being spilled into stack */  		if (size != BPF_REG_SIZE) { +			verbose_linfo(env, insn_idx, "; ");  			verbose(env, "invalid size of register spill\n");  			return -EACCES;  		} -		if (state != cur && type == PTR_TO_STACK) { +		if (state != cur && reg->type == PTR_TO_STACK) {  			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");  			return -EINVAL;  		} -		/* save register state */ -		state->stack[spi].spilled_ptr = cur->regs[value_regno]; -		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; +		if (!env->allow_ptr_leaks) { +			bool sanitize = false; -		for (i = 0; i < BPF_REG_SIZE; i++) { -			if (state->stack[spi].slot_type[i] == STACK_MISC && -			    !env->allow_ptr_leaks) { +			if (state->stack[spi].slot_type[0] == STACK_SPILL && +			    register_is_const(&state->stack[spi].spilled_ptr)) +				sanitize = true; +			for (i = 0; i < BPF_REG_SIZE; i++) +				if (state->stack[spi].slot_type[i] == STACK_MISC) { +					sanitize = true; +					break; +				} +			if (sanitize) {  				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;  				int soff = (-spi - 1) * BPF_REG_SIZE; @@ -1436,8 +1921,8 @@ static int check_stack_write(struct bpf_verifier_env *env,  				}  				*poff = soff;  			} -			state->stack[spi].slot_type[i] = STACK_SPILL;  		} +		save_register_state(state, spi, reg);  	} else {  		u8 type = STACK_MISC; @@ -1460,9 +1945,13 @@ static int check_stack_write(struct bpf_verifier_env *env,  			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;  		/* when we zero initialize stack slots mark them as such */ -		if (value_regno >= 0 && -		    register_is_null(&cur->regs[value_regno])) +		if (reg && register_is_null(reg)) { +			/* backtracking doesn't work for STACK_ZERO yet. */ +			err = mark_chain_precision(env, value_regno); +			if (err) +				return err;  			type = STACK_ZERO; +		}  		/* Mark slots affected by this stack write. */  		for (i = 0; i < size; i++) @@ -1479,6 +1968,7 @@ static int check_stack_read(struct bpf_verifier_env *env,  	struct bpf_verifier_state *vstate = env->cur_state;  	struct bpf_func_state *state = vstate->frame[vstate->curframe];  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; +	struct bpf_reg_state *reg;  	u8 *stype;  	if (reg_state->allocated_stack <= slot) { @@ -1487,11 +1977,21 @@ static int check_stack_read(struct bpf_verifier_env *env,  		return -EACCES;  	}  	stype = reg_state->stack[spi].slot_type; +	reg = ®_state->stack[spi].spilled_ptr;  	if (stype[0] == STACK_SPILL) {  		if (size != BPF_REG_SIZE) { -			verbose(env, "invalid size of register spill\n"); -			return -EACCES; +			if (reg->type != SCALAR_VALUE) { +				verbose_linfo(env, env->insn_idx, "; "); +				verbose(env, "invalid size of register fill\n"); +				return -EACCES; +			} +			if (value_regno >= 0) { +				mark_reg_unknown(env, state->regs, value_regno); +				state->regs[value_regno].live |= REG_LIVE_WRITTEN; +			} +			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); +			return 0;  		}  		for (i = 1; i < BPF_REG_SIZE; i++) {  			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { @@ -1502,17 +2002,14 @@ static int check_stack_read(struct bpf_verifier_env *env,  		if (value_regno >= 0) {  			/* restore register state from stack */ -			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr; +			state->regs[value_regno] = *reg;  			/* mark reg as written since spilled pointer state likely  			 * has its liveness marks cleared by is_state_visited()  			 * which resets stack/reg liveness for state transitions  			 */  			state->regs[value_regno].live |= REG_LIVE_WRITTEN;  		} -		mark_reg_read(env, ®_state->stack[spi].spilled_ptr, -			      reg_state->stack[spi].spilled_ptr.parent, -			      REG_LIVE_READ64); -		return 0; +		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);  	} else {  		int zeros = 0; @@ -1527,23 +2024,32 @@ static int check_stack_read(struct bpf_verifier_env *env,  				off, i, size);  			return -EACCES;  		} -		mark_reg_read(env, ®_state->stack[spi].spilled_ptr, -			      reg_state->stack[spi].spilled_ptr.parent, -			      REG_LIVE_READ64); +		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);  		if (value_regno >= 0) {  			if (zeros == size) {  				/* any size read into register is zero extended,  				 * so the whole register == const_zero  				 */  				__mark_reg_const_zero(&state->regs[value_regno]); +				/* backtracking doesn't support STACK_ZERO yet, +				 * so mark it precise here, so that later +				 * backtracking can stop here. +				 * Backtracking may not need this if this register +				 * doesn't participate in pointer adjustment. +				 * Forward propagation of precise flag is not +				 * necessary either. This mark is only to stop +				 * backtracking. Any register that contributed +				 * to const 0 was marked precise before spill. +				 */ +				state->regs[value_regno].precise = true;  			} else {  				/* have read misc data from the stack */  				mark_reg_unknown(env, state->regs, value_regno);  			}  			state->regs[value_regno].live |= REG_LIVE_WRITTEN;  		} -		return 0;  	} +	return 0;  }  static int check_stack_access(struct bpf_verifier_env *env, @@ -1835,6 +2341,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,  	case PTR_TO_TCP_SOCK:  		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);  		break; +	case PTR_TO_XDP_SOCK: +		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); +		break;  	default:  		valid = false;  	} @@ -1999,6 +2508,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,  	case PTR_TO_TCP_SOCK:  		pointer_desc = "tcp_sock ";  		break; +	case PTR_TO_XDP_SOCK: +		pointer_desc = "xdp_sock "; +		break;  	default:  		break;  	} @@ -2398,7 +2910,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,  {  	struct bpf_reg_state *reg = reg_state(env, regno);  	struct bpf_func_state *state = func(env, reg); -	int err, min_off, max_off, i, slot, spi; +	int err, min_off, max_off, i, j, slot, spi;  	if (reg->type != PTR_TO_STACK) {  		/* Allow zero-byte read from NULL, regardless of pointer type */ @@ -2486,6 +2998,14 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,  			*stype = STACK_MISC;  			goto mark;  		} +		if (state->stack[spi].slot_type[0] == STACK_SPILL && +		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { +			__mark_reg_unknown(&state->stack[spi].spilled_ptr); +			for (j = 0; j < BPF_REG_SIZE; j++) +				state->stack[spi].slot_type[j] = STACK_MISC; +			goto mark; +		} +  err:  		if (tnum_is_const(reg->var_off)) {  			verbose(env, "invalid indirect read from stack off %d+%d size %d\n", @@ -2837,6 +3357,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,  		err = check_helper_mem_access(env, regno - 1,  					      reg->umax_value,  					      zero_size_allowed, meta); +		if (!err) +			err = mark_chain_precision(env, regno);  	} else if (arg_type_is_int_ptr(arg_type)) {  		int size = int_ptr_type_to_size(arg_type); @@ -2897,10 +3419,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,  	 * appear.  	 */  	case BPF_MAP_TYPE_CPUMAP: -	case BPF_MAP_TYPE_XSKMAP:  		if (func_id != BPF_FUNC_redirect_map)  			goto error;  		break; +	case BPF_MAP_TYPE_XSKMAP: +		if (func_id != BPF_FUNC_redirect_map && +		    func_id != BPF_FUNC_map_lookup_elem) +			goto error; +		break;  	case BPF_MAP_TYPE_ARRAY_OF_MAPS:  	case BPF_MAP_TYPE_HASH_OF_MAPS:  		if (func_id != BPF_FUNC_map_lookup_elem) @@ -3791,6 +4317,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,  	case PTR_TO_SOCK_COMMON_OR_NULL:  	case PTR_TO_TCP_SOCK:  	case PTR_TO_TCP_SOCK_OR_NULL: +	case PTR_TO_XDP_SOCK:  		verbose(env, "R%d pointer arithmetic on %s prohibited\n",  			dst, reg_type_str[ptr_reg->type]);  		return -EACCES; @@ -4268,6 +4795,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,  	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;  	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};  	u8 opcode = BPF_OP(insn->code); +	int err;  	dst_reg = ®s[insn->dst_reg];  	src_reg = NULL; @@ -4294,11 +4822,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,  				 * This is legal, but we have to reverse our  				 * src/dest handling in computing the range  				 */ +				err = mark_chain_precision(env, insn->dst_reg); +				if (err) +					return err;  				return adjust_ptr_min_max_vals(env, insn,  							       src_reg, dst_reg);  			}  		} else if (ptr_reg) {  			/* pointer += scalar */ +			err = mark_chain_precision(env, insn->src_reg); +			if (err) +				return err;  			return adjust_ptr_min_max_vals(env, insn,  						       dst_reg, src_reg);  		} @@ -5030,6 +5564,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,  			if (reg->map_ptr->inner_map_meta) {  				reg->type = CONST_PTR_TO_MAP;  				reg->map_ptr = reg->map_ptr->inner_map_meta; +			} else if (reg->map_ptr->map_type == +				   BPF_MAP_TYPE_XSKMAP) { +				reg->type = PTR_TO_XDP_SOCK;  			} else {  				reg->type = PTR_TO_MAP_VALUE;  			} @@ -5201,9 +5738,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,  	struct bpf_verifier_state *this_branch = env->cur_state;  	struct bpf_verifier_state *other_branch;  	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; -	struct bpf_reg_state *dst_reg, *other_branch_regs; +	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;  	u8 opcode = BPF_OP(insn->code);  	bool is_jmp32; +	int pred = -1;  	int err;  	/* Only conditional jumps are expected to reach here. */ @@ -5228,6 +5766,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,  				insn->src_reg);  			return -EACCES;  		} +		src_reg = ®s[insn->src_reg];  	} else {  		if (insn->src_reg != BPF_REG_0) {  			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); @@ -5243,20 +5782,29 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,  	dst_reg = ®s[insn->dst_reg];  	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; -	if (BPF_SRC(insn->code) == BPF_K) { -		int pred = is_branch_taken(dst_reg, insn->imm, opcode, -					   is_jmp32); - -		if (pred == 1) { -			 /* only follow the goto, ignore fall-through */ -			*insn_idx += insn->off; -			return 0; -		} else if (pred == 0) { -			/* only follow fall-through branch, since -			 * that's where the program will go -			 */ -			return 0; -		} +	if (BPF_SRC(insn->code) == BPF_K) +		pred = is_branch_taken(dst_reg, insn->imm, +				       opcode, is_jmp32); +	else if (src_reg->type == SCALAR_VALUE && +		 tnum_is_const(src_reg->var_off)) +		pred = is_branch_taken(dst_reg, src_reg->var_off.value, +				       opcode, is_jmp32); +	if (pred >= 0) { +		err = mark_chain_precision(env, insn->dst_reg); +		if (BPF_SRC(insn->code) == BPF_X && !err) +			err = mark_chain_precision(env, insn->src_reg); +		if (err) +			return err; +	} +	if (pred == 1) { +		/* only follow the goto, ignore fall-through */ +		*insn_idx += insn->off; +		return 0; +	} else if (pred == 0) { +		/* only follow fall-through branch, since +		 * that's where the program will go +		 */ +		return 0;  	}  	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, @@ -5616,7 +6164,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)   * w - next instruction   * e - edge   */ -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, +		     bool loop_ok)  {  	int *insn_stack = env->cfg.insn_stack;  	int *insn_state = env->cfg.insn_state; @@ -5646,6 +6195,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)  		insn_stack[env->cfg.cur_stack++] = w;  		return 1;  	} else if ((insn_state[w] & 0xF0) == DISCOVERED) { +		if (loop_ok && env->allow_ptr_leaks) +			return 0;  		verbose_linfo(env, t, "%d: ", t);  		verbose_linfo(env, w, "%d: ", w);  		verbose(env, "back-edge from insn %d to %d\n", t, w); @@ -5697,7 +6248,7 @@ peek_stack:  		if (opcode == BPF_EXIT) {  			goto mark_explored;  		} else if (opcode == BPF_CALL) { -			ret = push_insn(t, t + 1, FALLTHROUGH, env); +			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);  			if (ret == 1)  				goto peek_stack;  			else if (ret < 0) @@ -5706,7 +6257,8 @@ peek_stack:  				init_explored_state(env, t + 1);  			if (insns[t].src_reg == BPF_PSEUDO_CALL) {  				init_explored_state(env, t); -				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); +				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, +						env, false);  				if (ret == 1)  					goto peek_stack;  				else if (ret < 0) @@ -5719,11 +6271,16 @@ peek_stack:  			}  			/* unconditional jump with single edge */  			ret = push_insn(t, t + insns[t].off + 1, -					FALLTHROUGH, env); +					FALLTHROUGH, env, true);  			if (ret == 1)  				goto peek_stack;  			else if (ret < 0)  				goto err_free; +			/* unconditional jmp is not a good pruning point, +			 * but it's marked, since backtracking needs +			 * to record jmp history in is_state_visited(). +			 */ +			init_explored_state(env, t + insns[t].off + 1);  			/* tell verifier to check for equivalent states  			 * after every call and jump  			 */ @@ -5732,13 +6289,13 @@ peek_stack:  		} else {  			/* conditional jump with two edges */  			init_explored_state(env, t); -			ret = push_insn(t, t + 1, FALLTHROUGH, env); +			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);  			if (ret == 1)  				goto peek_stack;  			else if (ret < 0)  				goto err_free; -			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); +			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);  			if (ret == 1)  				goto peek_stack;  			else if (ret < 0) @@ -5748,7 +6305,7 @@ peek_stack:  		/* all other non-branch instructions with single  		 * fall-through edge  		 */ -		ret = push_insn(t, t + 1, FALLTHROUGH, env); +		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);  		if (ret == 1)  			goto peek_stack;  		else if (ret < 0) @@ -6181,6 +6738,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,  	sl = *explored_state(env, insn);  	while (sl) { +		if (sl->state.branches) +			goto next;  		if (sl->state.insn_idx != insn ||  		    sl->state.curframe != cur->curframe)  			goto next; @@ -6222,6 +6781,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,  	switch (rold->type) {  	case SCALAR_VALUE:  		if (rcur->type == SCALAR_VALUE) { +			if (!rold->precise && !rcur->precise) +				return true;  			/* new val must satisfy old val knowledge */  			return range_within(rold, rcur) &&  			       tnum_in(rold->var_off, rcur->var_off); @@ -6294,6 +6855,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,  	case PTR_TO_SOCK_COMMON_OR_NULL:  	case PTR_TO_TCP_SOCK:  	case PTR_TO_TCP_SOCK_OR_NULL: +	case PTR_TO_XDP_SOCK:  		/* Only valid matches are exact, which memcmp() above  		 * would have accepted  		 */ @@ -6544,19 +7106,52 @@ static int propagate_liveness(struct bpf_verifier_env *env,  	return 0;  } +static bool states_maybe_looping(struct bpf_verifier_state *old, +				 struct bpf_verifier_state *cur) +{ +	struct bpf_func_state *fold, *fcur; +	int i, fr = cur->curframe; + +	if (old->curframe != fr) +		return false; + +	fold = old->frame[fr]; +	fcur = cur->frame[fr]; +	for (i = 0; i < MAX_BPF_REG; i++) +		if (memcmp(&fold->regs[i], &fcur->regs[i], +			   offsetof(struct bpf_reg_state, parent))) +			return false; +	return true; +} + +  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)  {  	struct bpf_verifier_state_list *new_sl;  	struct bpf_verifier_state_list *sl, **pprev;  	struct bpf_verifier_state *cur = env->cur_state, *new;  	int i, j, err, states_cnt = 0; +	bool add_new_state = false; +	cur->last_insn_idx = env->prev_insn_idx;  	if (!env->insn_aux_data[insn_idx].prune_point)  		/* this 'insn_idx' instruction wasn't marked, so we will not  		 * be doing state search here  		 */  		return 0; +	/* bpf progs typically have pruning point every 4 instructions +	 * http://vger.kernel.org/bpfconf2019.html#session-1 +	 * Do not add new state for future pruning if the verifier hasn't seen +	 * at least 2 jumps and at least 8 instructions. +	 * This heuristics helps decrease 'total_states' and 'peak_states' metric. +	 * In tests that amounts to up to 50% reduction into total verifier +	 * memory consumption and 20% verifier time speedup. +	 */ +	if (env->jmps_processed - env->prev_jmps_processed >= 2 && +	    env->insn_processed - env->prev_insn_processed >= 8) +		add_new_state = true; +  	pprev = explored_state(env, insn_idx);  	sl = *pprev; @@ -6566,6 +7161,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)  		states_cnt++;  		if (sl->state.insn_idx != insn_idx)  			goto next; +		if (sl->state.branches) { +			if (states_maybe_looping(&sl->state, cur) && +			    states_equal(env, &sl->state, cur)) { +				verbose_linfo(env, insn_idx, "; "); +				verbose(env, "infinite loop detected at insn %d\n", insn_idx); +				return -EINVAL; +			} +			/* if the verifier is processing a loop, avoid adding new state +			 * too often, since different loop iterations have distinct +			 * states and may not help future pruning. +			 * This threshold shouldn't be too low to make sure that +			 * a loop with large bound will be rejected quickly. +			 * The most abusive loop will be: +			 * r1 += 1 +			 * if r1 < 1000000 goto pc-2 +			 * 1M insn_procssed limit / 100 == 10k peak states. +			 * This threshold shouldn't be too high either, since states +			 * at the end of the loop are likely to be useful in pruning. +			 */ +			if (env->jmps_processed - env->prev_jmps_processed < 20 && +			    env->insn_processed - env->prev_insn_processed < 100) +				add_new_state = false; +			goto miss; +		}  		if (states_equal(env, &sl->state, cur)) {  			sl->hit_cnt++;  			/* reached equivalent register/stack state, @@ -6583,7 +7202,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)  				return err;  			return 1;  		} -		sl->miss_cnt++; +miss: +		/* when new state is not going to be added do not increase miss count. +		 * Otherwise several loop iterations will remove the state +		 * recorded earlier. The goal of these heuristics is to have +		 * states from some iterations of the loop (some in the beginning +		 * and some at the end) to help pruning. +		 */ +		if (add_new_state) +			sl->miss_cnt++;  		/* heuristic to determine whether this state is beneficial  		 * to keep checking from state equivalence point of view.  		 * Higher numbers increase max_states_per_insn and verification time, @@ -6595,6 +7222,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)  			 */  			*pprev = sl->next;  			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { +				u32 br = sl->state.branches; + +				WARN_ONCE(br, +					  "BUG live_done but branches_to_explore %d\n", +					  br);  				free_verifier_state(&sl->state, false);  				kfree(sl);  				env->peak_states--; @@ -6618,20 +7250,27 @@ next:  		env->max_states_per_insn = states_cnt;  	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) -		return 0; +		return push_jmp_history(env, cur); + +	if (!add_new_state) +		return push_jmp_history(env, cur); -	/* there were no equivalent states, remember current one. -	 * technically the current state is not proven to be safe yet, +	/* There were no equivalent states, remember the current one. +	 * Technically the current state is not proven to be safe yet,  	 * but it will either reach outer most bpf_exit (which means it's safe) -	 * or it will be rejected. Since there are no loops, we won't be +	 * or it will be rejected. When there are no loops the verifier won't be  	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) -	 * again on the way to bpf_exit +	 * again on the way to bpf_exit. +	 * When looping the sl->state.branches will be > 0 and this state +	 * will not be considered for equivalence until branches == 0.  	 */  	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);  	if (!new_sl)  		return -ENOMEM;  	env->total_states++;  	env->peak_states++; +	env->prev_jmps_processed = env->jmps_processed; +	env->prev_insn_processed = env->insn_processed;  	/* add new state to the head of linked list */  	new = &new_sl->state; @@ -6642,6 +7281,12 @@ next:  		return err;  	}  	new->insn_idx = insn_idx; +	WARN_ONCE(new->branches != 1, +		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); + +	cur->parent = new; +	cur->first_insn_idx = insn_idx; +	clear_jmp_history(cur);  	new_sl->next = *explored_state(env, insn_idx);  	*explored_state(env, insn_idx) = new_sl;  	/* connect new state to parentage chain. Current frame needs all @@ -6651,17 +7296,18 @@ next:  	 * the state of the call instruction (with WRITTEN set), and r0 comes  	 * from callee with its full parentage chain, anyway.  	 */ -	for (j = 0; j <= cur->curframe; j++) -		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) -			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];  	/* clear write marks in current state: the writes we did are not writes  	 * our child did, so they don't screen off its reads from us.  	 * (There are no read marks in current state, because reads always mark  	 * their parent and current state never has children yet.  Only  	 * explored_states can get read marks.)  	 */ -	for (i = 0; i < BPF_REG_FP; i++) -		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE; +	for (j = 0; j <= cur->curframe; j++) { +		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) +			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; +		for (i = 0; i < BPF_REG_FP; i++) +			cur->frame[j]->regs[i].live = REG_LIVE_NONE; +	}  	/* all stack frames are accessible from callee, clear them all */  	for (j = 0; j <= cur->curframe; j++) { @@ -6688,6 +7334,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)  	case PTR_TO_SOCK_COMMON_OR_NULL:  	case PTR_TO_TCP_SOCK:  	case PTR_TO_TCP_SOCK_OR_NULL: +	case PTR_TO_XDP_SOCK:  		return false;  	default:  		return true; @@ -6719,6 +7366,7 @@ static int do_check(struct bpf_verifier_env *env)  	struct bpf_reg_state *regs;  	int insn_cnt = env->prog->len;  	bool do_print_state = false; +	int prev_insn_idx = -1;  	env->prev_linfo = NULL; @@ -6727,6 +7375,7 @@ static int do_check(struct bpf_verifier_env *env)  		return -ENOMEM;  	state->curframe = 0;  	state->speculative = false; +	state->branches = 1;  	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);  	if (!state->frame[0]) {  		kfree(state); @@ -6743,6 +7392,7 @@ static int do_check(struct bpf_verifier_env *env)  		u8 class;  		int err; +		env->prev_insn_idx = prev_insn_idx;  		if (env->insn_idx >= insn_cnt) {  			verbose(env, "invalid insn idx %d insn_cnt %d\n",  				env->insn_idx, insn_cnt); @@ -6815,6 +7465,7 @@ static int do_check(struct bpf_verifier_env *env)  		regs = cur_regs(env);  		env->insn_aux_data[env->insn_idx].seen = true; +		prev_insn_idx = env->insn_idx;  		if (class == BPF_ALU || class == BPF_ALU64) {  			err = check_alu_op(env, insn); @@ -6933,6 +7584,7 @@ static int do_check(struct bpf_verifier_env *env)  		} else if (class == BPF_JMP || class == BPF_JMP32) {  			u8 opcode = BPF_OP(insn->code); +			env->jmps_processed++;  			if (opcode == BPF_CALL) {  				if (BPF_SRC(insn->code) != BPF_K ||  				    insn->off != 0 || @@ -6987,7 +7639,6 @@ static int do_check(struct bpf_verifier_env *env)  				if (state->curframe) {  					/* exit from nested function */ -					env->prev_insn_idx = env->insn_idx;  					err = prepare_func_exit(env, &env->insn_idx);  					if (err)  						return err; @@ -7018,7 +7669,8 @@ static int do_check(struct bpf_verifier_env *env)  				if (err)  					return err;  process_bpf_exit: -				err = pop_stack(env, &env->prev_insn_idx, +				update_branch_counts(env, env->cur_state); +				err = pop_stack(env, &prev_insn_idx,  						&env->insn_idx);  				if (err < 0) {  					if (err != -ENOENT) @@ -7821,6 +8473,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)  		case PTR_TO_TCP_SOCK:  			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;  			break; +		case PTR_TO_XDP_SOCK: +			convert_ctx_access = bpf_xdp_sock_convert_ctx_access; +			break;  		default:  			continue;  		} diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 22066c28ba61..ef7338cebd18 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -17,8 +17,8 @@ struct xsk_map {  static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)  { -	int cpu, err = -EINVAL;  	struct xsk_map *m; +	int cpu, err;  	u64 cost;  	if (!capable(CAP_NET_ADMIN)) @@ -152,6 +152,12 @@ void __xsk_map_flush(struct bpf_map *map)  static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)  { +	WARN_ON_ONCE(!rcu_read_lock_held()); +	return __xsk_map_lookup_elem(map, *(u32 *)key); +} + +static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) +{  	return ERR_PTR(-EOPNOTSUPP);  } @@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {  	.map_free = xsk_map_free,  	.map_get_next_key = xsk_map_get_next_key,  	.map_lookup_elem = xsk_map_lookup_elem, +	.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,  	.map_update_elem = xsk_map_update_elem,  	.map_delete_elem = xsk_map_delete_elem,  	.map_check_btf = map_check_no_btf, diff --git a/net/core/filter.c b/net/core/filter.c index 949adc3d9abb..2014d76e0d2a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5695,6 +5695,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)  	return INET_ECN_set_ce(skb);  } +bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, +				  struct bpf_insn_access_aux *info) +{ +	if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id)) +		return false; + +	if (off % size != 0) +		return false; + +	switch (off) { +	default: +		return size == sizeof(__u32); +	} +} + +u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, +				    const struct bpf_insn *si, +				    struct bpf_insn *insn_buf, +				    struct bpf_prog *prog, u32 *target_size) +{ +	struct bpf_insn *insn = insn_buf; + +#define BPF_XDP_SOCK_GET(FIELD)						\ +	do {								\ +		BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) >	\ +			     FIELD_SIZEOF(struct bpf_xdp_sock, FIELD));	\ +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\ +				      si->dst_reg, si->src_reg,		\ +				      offsetof(struct xdp_sock, FIELD)); \ +	} while (0) + +	switch (si->off) { +	case offsetof(struct bpf_xdp_sock, queue_id): +		BPF_XDP_SOCK_GET(queue_id); +		break; +	} + +	return insn - insn_buf; +} +  static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {  	.func           = bpf_skb_ecn_set_ce,  	.gpl_only       = false, @@ -5897,6 +5937,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  	case BPF_FUNC_skc_lookup_tcp:  		return &bpf_sock_addr_skc_lookup_tcp_proto;  #endif /* CONFIG_INET */ +	case BPF_FUNC_sk_storage_get: +		return &bpf_sk_storage_get_proto; +	case BPF_FUNC_sk_storage_delete: +		return &bpf_sk_storage_delete_proto;  	default:  		return bpf_base_func_proto(func_id);  	} @@ -5934,6 +5978,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  		return &bpf_sk_storage_get_proto;  	case BPF_FUNC_sk_storage_delete:  		return &bpf_sk_storage_delete_proto; +#ifdef CONFIG_SOCK_CGROUP_DATA +	case BPF_FUNC_skb_cgroup_id: +		return &bpf_skb_cgroup_id_proto; +#endif  #ifdef CONFIG_INET  	case BPF_FUNC_tcp_sock:  		return &bpf_tcp_sock_proto; @@ -6114,6 +6162,14 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  		return &bpf_get_local_storage_proto;  	case BPF_FUNC_perf_event_output:  		return &bpf_sockopt_event_output_proto; +	case BPF_FUNC_sk_storage_get: +		return &bpf_sk_storage_get_proto; +	case BPF_FUNC_sk_storage_delete: +		return &bpf_sk_storage_delete_proto; +#ifdef CONFIG_INET +	case BPF_FUNC_tcp_sock: +		return &bpf_tcp_sock_proto; +#endif /* CONFIG_INET */  	default:  		return bpf_base_func_proto(func_id);  	} @@ -6801,6 +6857,13 @@ static bool sock_addr_is_valid_access(int off, int size,  		if (size != size_default)  			return false;  		break; +	case offsetof(struct bpf_sock_addr, sk): +		if (type != BPF_READ) +			return false; +		if (size != sizeof(__u64)) +			return false; +		info->reg_type = PTR_TO_SOCKET; +		break;  	default:  		if (type == BPF_READ) {  			if (size != size_default) @@ -6844,6 +6907,11 @@ static bool sock_ops_is_valid_access(int off, int size,  			if (size != sizeof(__u64))  				return false;  			break; +		case offsetof(struct bpf_sock_ops, sk): +			if (size != sizeof(__u64)) +				return false; +			info->reg_type = PTR_TO_SOCKET_OR_NULL; +			break;  		default:  			if (size != size_default)  				return false; @@ -7751,6 +7819,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,  			struct bpf_sock_addr_kern, struct in6_addr, t_ctx,  			s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);  		break; +	case offsetof(struct bpf_sock_addr, sk): +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk), +				      si->dst_reg, si->src_reg, +				      offsetof(struct bpf_sock_addr_kern, sk)); +		break;  	}  	return insn - insn_buf; @@ -8010,6 +8083,19 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,  		SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,  					  struct sock, type);  		break; +	case offsetof(struct bpf_sock_ops, sk): +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( +						struct bpf_sock_ops_kern, +						is_fullsock), +				      si->dst_reg, si->src_reg, +				      offsetof(struct bpf_sock_ops_kern, +					       is_fullsock)); +		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( +						struct bpf_sock_ops_kern, sk), +				      si->dst_reg, si->src_reg, +				      offsetof(struct bpf_sock_ops_kern, sk)); +		break;  	}  	return insn - insn_buf;  } diff --git a/net/core/sock.c b/net/core/sock.c index af09a23e4822..ef471f643c95 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1039,6 +1039,10 @@ set_rcvbuf:  		}  		break; +	case SO_DETACH_REUSEPORT_BPF: +		ret = reuseport_detach_prog(sk); +		break; +  	case SO_DETACH_FILTER:  		ret = sk_detach_filter(sk);  		break; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index dc4aefdf2a08..9408f9264d05 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -332,3 +332,27 @@ int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)  	return 0;  }  EXPORT_SYMBOL(reuseport_attach_prog); + +int reuseport_detach_prog(struct sock *sk) +{ +	struct sock_reuseport *reuse; +	struct bpf_prog *old_prog; + +	if (!rcu_access_pointer(sk->sk_reuseport_cb)) +		return sk->sk_reuseport ? -ENOENT : -EINVAL; + +	old_prog = NULL; +	spin_lock_bh(&reuseport_lock); +	reuse = rcu_dereference_protected(sk->sk_reuseport_cb, +					  lockdep_is_held(&reuseport_lock)); +	rcu_swap_protected(reuse->prog, old_prog, +			   lockdep_is_held(&reuseport_lock)); +	spin_unlock_bh(&reuseport_lock); + +	if (!old_prog) +		return -ENOENT; + +	sk_reuseport_prog_free(old_prog); +	return 0; +} +EXPORT_SYMBOL(reuseport_detach_prog); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 253e5a2856be..0917f8cf4fab 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -170,21 +170,12 @@ always += ibumad_kern.o  always += hbm_out_kern.o  KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include -KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ +KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/  KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/  KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include  KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf  HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable -HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/ - -HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/  KBUILD_HOSTLDLIBS		+= $(LIBBPF) -lelf  HOSTLDLIBS_tracex4		+= -lrt @@ -206,6 +197,17 @@ HOSTCC = $(CROSS_COMPILE)gcc  CLANG_ARCH_ARGS = -target $(ARCH)  endif +# Don't evaluate probes and warnings if we need to run make recursively +ifneq ($(src),) +HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ +	$(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \ +	echo okay) + +ifeq ($(HDR_PROBE),) +$(warning WARNING: Detected possible issues with include path.) +$(warning WARNING: Please install kernel headers locally (make headers_install).) +endif +  BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)  BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)  BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') @@ -223,6 +225,7 @@ ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)  	DWARF2BTF = y  endif  endif +endif  # Trick to allow make to be run from this directory  all: diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index e51eb060244e..2d4b717726b6 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -14,7 +14,7 @@  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include "bpf_insn.h"  #include "sock_example.h" diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 480b7ad6a1f2..b905b32ff185 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -50,8 +50,8 @@  #include "cgroup_helpers.h"  #include "hbm.h"  #include "bpf_util.h" -#include "bpf/bpf.h" -#include "bpf/libbpf.h" +#include "bpf.h" +#include "libbpf.h"  bool outFlag = true;  int minRate = 1000;		/* cgroup rate limit in Mbps */ @@ -411,7 +411,7 @@ static void Usage(void)  	       "    -l         also limit flows using loopback\n"  	       "    -n <#>     to create cgroup \"/hbm#\" and attach prog\n"  	       "               Default is /hbm1\n" -	       "    --no_cn    disable CN notifcations\n" +	       "    --no_cn    disable CN notifications\n"  	       "    -r <rate>  Rate in Mbps\n"  	       "    -s         Update HBM stats\n"  	       "    -t <time>  Exit after specified seconds (default is 0)\n" diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c index 097d76143363..cb5a8f994849 100644 --- a/samples/bpf/ibumad_user.c +++ b/samples/bpf/ibumad_user.c @@ -25,7 +25,7 @@  #include "bpf_load.h"  #include "bpf_util.h" -#include "bpf/libbpf.h" +#include "libbpf.h"  static void dump_counts(int fd)  { diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 7f90796ae15a..a219442afbee 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -3,7 +3,7 @@  #include <assert.h>  #include <linux/bpf.h>  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include "sock_example.h"  #include <unistd.h>  #include <arpa/inet.h> diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index bc257333ad92..6de383ddd08b 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -3,7 +3,7 @@  #include <assert.h>  #include <linux/bpf.h>  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include "sock_example.h"  #include <unistd.h>  #include <arpa/inet.h> diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 5b39421adb44..a8e5fa02e8a8 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -15,8 +15,8 @@  #include <net/if.h>  #include "bpf_util.h" -#include "bpf/bpf.h" -#include "bpf/libbpf.h" +#include "bpf.h" +#include "libbpf.h"  static int ifindex;  static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 07e1b9269e49..586ff751aba9 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -18,8 +18,8 @@  #include <netinet/ether.h>  #include <unistd.h>  #include <time.h> -#include "bpf/bpf.h" -#include "bpf/libbpf.h" +#include "bpf.h" +#include "libbpf.h"  #define STATS_INTERVAL_S 2U diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index f88e1d7093d6..5b46ee12c696 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -24,7 +24,7 @@  #include <fcntl.h>  #include <libgen.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include <bpf/bpf.h> diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 586b294d72d3..f5dc7e1f8bc6 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -25,7 +25,7 @@ static const char *__doc__ =  #define MAX_PROG 6  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include "bpf_util.h" diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index be317f5f058f..15bb6f67f9c3 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -16,7 +16,7 @@  #include "bpf_util.h"  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  static int ifindex_in;  static int ifindex_out; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 09747bee6668..ce71be187205 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -16,7 +16,7 @@  #include "bpf_util.h"  #include <bpf/bpf.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  static int ifindex_in;  static int ifindex_out; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 1f66419631c3..1469b66ebad1 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -21,7 +21,7 @@  #include <sys/ioctl.h>  #include <sys/syscall.h>  #include "bpf_util.h" -#include "bpf/libbpf.h" +#include "libbpf.h"  #include <sys/resource.h>  #include <libgen.h> diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 1210f3b170f0..c7e4e45d824a 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"  #include <arpa/inet.h>  #include <linux/if_link.h> -#include "bpf/bpf.h" -#include "bpf/libbpf.h" +#include "bpf.h" +#include "libbpf.h"  #include "bpf_util.h"  static int ifindex = -1; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index e746a00d122e..394896430712 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -14,7 +14,7 @@  #include <netinet/ether.h>  #include <unistd.h>  #include <time.h> -#include "bpf/libbpf.h" +#include "libbpf.h"  #include <bpf/bpf.h>  #include "bpf_util.h"  #include "xdp_tx_iptunnel_common.h" diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index d08ee1ab7bb4..0f5eb0d7f2df 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -27,8 +27,8 @@  #include <time.h>  #include <unistd.h> -#include "bpf/libbpf.h" -#include "bpf/xsk.h" +#include "libbpf.h" +#include "xsk.h"  #include <bpf/bpf.h>  #ifndef SOL_XDP diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index f7261fad45c1..5215e0870bcb 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -21,6 +21,7 @@  #include <sys/vfs.h>  #include <bpf.h> +#include <libbpf.h> /* libbpf_num_possible_cpus */  #include "main.h" @@ -439,57 +440,13 @@ unsigned int get_page_size(void)  unsigned int get_possible_cpus(void)  { -	static unsigned int result; -	char buf[128]; -	long int n; -	char *ptr; -	int fd; - -	if (result) -		return result; - -	fd = open("/sys/devices/system/cpu/possible", O_RDONLY); -	if (fd < 0) { -		p_err("can't open sysfs possible cpus"); -		exit(-1); -	} - -	n = read(fd, buf, sizeof(buf)); -	if (n < 2) { -		p_err("can't read sysfs possible cpus"); -		exit(-1); -	} -	close(fd); +	int cpus = libbpf_num_possible_cpus(); -	if (n == sizeof(buf)) { -		p_err("read sysfs possible cpus overflow"); +	if (cpus < 0) { +		p_err("Can't get # of possible cpus: %s", strerror(-cpus));  		exit(-1);  	} - -	ptr = buf; -	n = 0; -	while (*ptr && *ptr != '\n') { -		unsigned int a, b; - -		if (sscanf(ptr, "%u-%u", &a, &b) == 2) { -			n += b - a + 1; - -			ptr = strchr(ptr, '-') + 1; -		} else if (sscanf(ptr, "%u", &a) == 1) { -			n++; -		} else { -			assert(0); -		} - -		while (isdigit(*ptr)) -			ptr++; -		if (*ptr == ',') -			ptr++; -	} - -	result = n; - -	return result; +	return cpus;  }  static char * diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h new file mode 100644 index 000000000000..77f7c1638eb1 --- /dev/null +++ b/tools/include/uapi/asm-generic/socket.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_SOCKET_H +#define __ASM_GENERIC_SOCKET_H + +#include <linux/posix_types.h> +#include <asm/sockios.h> + +/* For setsockopt(2) */ +#define SOL_SOCKET	1 + +#define SO_DEBUG	1 +#define SO_REUSEADDR	2 +#define SO_TYPE		3 +#define SO_ERROR	4 +#define SO_DONTROUTE	5 +#define SO_BROADCAST	6 +#define SO_SNDBUF	7 +#define SO_RCVBUF	8 +#define SO_SNDBUFFORCE	32 +#define SO_RCVBUFFORCE	33 +#define SO_KEEPALIVE	9 +#define SO_OOBINLINE	10 +#define SO_NO_CHECK	11 +#define SO_PRIORITY	12 +#define SO_LINGER	13 +#define SO_BSDCOMPAT	14 +#define SO_REUSEPORT	15 +#ifndef SO_PASSCRED /* powerpc only differs in these */ +#define SO_PASSCRED	16 +#define SO_PEERCRED	17 +#define SO_RCVLOWAT	18 +#define SO_SNDLOWAT	19 +#define SO_RCVTIMEO_OLD	20 +#define SO_SNDTIMEO_OLD	21 +#endif + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION		22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT	23 +#define SO_SECURITY_ENCRYPTION_NETWORK		24 + +#define SO_BINDTODEVICE	25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER	26 +#define SO_DETACH_FILTER	27 +#define SO_GET_FILTER		SO_ATTACH_FILTER + +#define SO_PEERNAME		28 + +#define SO_ACCEPTCONN		30 + +#define SO_PEERSEC		31 +#define SO_PASSSEC		34 + +#define SO_MARK			36 + +#define SO_PROTOCOL		38 +#define SO_DOMAIN		39 + +#define SO_RXQ_OVFL             40 + +#define SO_WIFI_STATUS		41 +#define SCM_WIFI_STATUS	SO_WIFI_STATUS +#define SO_PEEK_OFF		42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS		43 + +#define SO_LOCK_FILTER		44 + +#define SO_SELECT_ERR_QUEUE	45 + +#define SO_BUSY_POLL		46 + +#define SO_MAX_PACING_RATE	47 + +#define SO_BPF_EXTENSIONS	48 + +#define SO_INCOMING_CPU		49 + +#define SO_ATTACH_BPF		50 +#define SO_DETACH_BPF		SO_DETACH_FILTER + +#define SO_ATTACH_REUSEPORT_CBPF	51 +#define SO_ATTACH_REUSEPORT_EBPF	52 + +#define SO_CNX_ADVICE		53 + +#define SCM_TIMESTAMPING_OPT_STATS	54 + +#define SO_MEMINFO		55 + +#define SO_INCOMING_NAPI_ID	56 + +#define SO_COOKIE		57 + +#define SCM_TIMESTAMPING_PKTINFO	58 + +#define SO_PEERGROUPS		59 + +#define SO_ZEROCOPY		60 + +#define SO_TXTIME		61 +#define SCM_TXTIME		SO_TXTIME + +#define SO_BINDTOIFINDEX	62 + +#define SO_TIMESTAMP_OLD        29 +#define SO_TIMESTAMPNS_OLD      35 +#define SO_TIMESTAMPING_OLD     37 + +#define SO_TIMESTAMP_NEW        63 +#define SO_TIMESTAMPNS_NEW      64 +#define SO_TIMESTAMPING_NEW     65 + +#define SO_RCVTIMEO_NEW         66 +#define SO_SNDTIMEO_NEW         67 + +#define SO_DETACH_REUSEPORT_BPF 68 + +#if !defined(__KERNEL__) + +#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) +/* on 64-bit and x32, avoid the ?: operator */ +#define SO_TIMESTAMP		SO_TIMESTAMP_OLD +#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD + +#define SO_RCVTIMEO		SO_RCVTIMEO_OLD +#define SO_SNDTIMEO		SO_SNDTIMEO_OLD +#else +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) + +#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW) +#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW) +#endif + +#define SCM_TIMESTAMP           SO_TIMESTAMP +#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS +#define SCM_TIMESTAMPING        SO_TIMESTAMPING + +#endif + +#endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0e879721f75a..b077507efa3f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3085,6 +3085,10 @@ struct bpf_sock_tuple {  	};  }; +struct bpf_xdp_sock { +	__u32 queue_id; +}; +  #define XDP_PACKET_HEADROOM 256  /* User return codes for XDP prog type. @@ -3245,6 +3249,7 @@ struct bpf_sock_addr {  	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.  				 * Stored in network byte order.  				 */ +	__bpf_md_ptr(struct bpf_sock *, sk);  };  /* User bpf_sock_ops struct to access socket values and specify request ops @@ -3296,6 +3301,7 @@ struct bpf_sock_ops {  	__u32 sk_txhash;  	__u64 bytes_received;  	__u64 bytes_acked; +	__bpf_md_ptr(struct bpf_sock *, sk);  };  /* Definitions for bpf_sock_ops_cb_flags */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 0d4b4fe10a84..c7d7993c44bb 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -26,10 +26,11 @@  #include <memory.h>  #include <unistd.h>  #include <asm/unistd.h> +#include <errno.h>  #include <linux/bpf.h>  #include "bpf.h"  #include "libbpf.h" -#include <errno.h> +#include "libbpf_internal.h"  /*   * When building perf, unistd.h is overridden. __NR_bpf is @@ -53,10 +54,6 @@  # endif  #endif -#ifndef min -#define min(x, y) ((x) < (y) ? (x) : (y)) -#endif -  static inline __u64 ptr_to_u64(const void *ptr)  {  	return (__u64) (unsigned long) ptr; diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 6978314ea7f6..8c67561c93b0 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -6,10 +6,7 @@  #include <linux/err.h>  #include <linux/bpf.h>  #include "libbpf.h" - -#ifndef min -#define min(x, y) ((x) < (y) ? (x) : (y)) -#endif +#include "libbpf_internal.h"  struct bpf_prog_linfo {  	void *raw_linfo; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b2478e98c367..467224feb43b 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -16,9 +16,6 @@  #include "libbpf_internal.h"  #include "hashmap.h" -#define max(a, b) ((a) > (b) ? (a) : (b)) -#define min(a, b) ((a) < (b) ? (a) : (b)) -  #define BTF_MAX_NR_TYPES 0x7fffffff  #define BTF_MAX_STR_OFFSET 0x7fffffff diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index ba4ffa831aa4..88a52ae56fc6 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -17,6 +17,7 @@ extern "C" {  #define BTF_ELF_SEC ".BTF"  #define BTF_EXT_ELF_SEC ".BTF.ext" +#define MAPS_ELF_SEC ".maps"  struct btf;  struct btf_ext; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 4b22db77e2cc..7065bb5b2752 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -18,9 +18,6 @@  #include "libbpf.h"  #include "libbpf_internal.h" -#define min(x, y) ((x) < (y) ? (x) : (y)) -#define max(x, y) ((x) < (y) ? (y) : (x)) -  static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";  static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c8fbc050fd78..4259c9f0cfe7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -207,7 +207,8 @@ static const char * const libbpf_type_to_btf_name[] = {  struct bpf_map {  	int fd;  	char *name; -	size_t offset; +	int sec_idx; +	size_t sec_offset;  	int map_ifindex;  	int inner_map_fd;  	struct bpf_map_def def; @@ -234,6 +235,7 @@ struct bpf_object {  	size_t nr_programs;  	struct bpf_map *maps;  	size_t nr_maps; +	size_t maps_cap;  	struct bpf_secdata sections;  	bool loaded; @@ -260,6 +262,7 @@ struct bpf_object {  		} *reloc;  		int nr_reloc;  		int maps_shndx; +		int btf_maps_shndx;  		int text_shndx;  		int data_shndx;  		int rodata_shndx; @@ -512,6 +515,7 @@ static struct bpf_object *bpf_object__new(const char *path,  	obj->efile.obj_buf = obj_buf;  	obj->efile.obj_buf_sz = obj_buf_sz;  	obj->efile.maps_shndx = -1; +	obj->efile.btf_maps_shndx = -1;  	obj->efile.data_shndx = -1;  	obj->efile.rodata_shndx = -1;  	obj->efile.bss_shndx = -1; @@ -646,7 +650,9 @@ static int compare_bpf_map(const void *_a, const void *_b)  	const struct bpf_map *a = _a;  	const struct bpf_map *b = _b; -	return a->offset - b->offset; +	if (a->sec_idx != b->sec_idx) +		return a->sec_idx - b->sec_idx; +	return a->sec_offset - b->sec_offset;  }  static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) @@ -763,24 +769,55 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,  	return -ENOENT;  } -static bool bpf_object__has_maps(const struct bpf_object *obj) +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)  { -	return obj->efile.maps_shndx >= 0 || -	       obj->efile.data_shndx >= 0 || -	       obj->efile.rodata_shndx >= 0 || -	       obj->efile.bss_shndx >= 0; +	struct bpf_map *new_maps; +	size_t new_cap; +	int i; + +	if (obj->nr_maps < obj->maps_cap) +		return &obj->maps[obj->nr_maps++]; + +	new_cap = max(4ul, obj->maps_cap * 3 / 2); +	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); +	if (!new_maps) { +		pr_warning("alloc maps for object failed\n"); +		return ERR_PTR(-ENOMEM); +	} + +	obj->maps_cap = new_cap; +	obj->maps = new_maps; + +	/* zero out new maps */ +	memset(obj->maps + obj->nr_maps, 0, +	       (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); +	/* +	 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) +	 * when failure (zclose won't close negative fd)). +	 */ +	for (i = obj->nr_maps; i < obj->maps_cap; i++) { +		obj->maps[i].fd = -1; +		obj->maps[i].inner_map_fd = -1; +	} + +	return &obj->maps[obj->nr_maps++];  }  static int -bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, -			      enum libbpf_map_type type, Elf_Data *data, -			      void **data_buff) +bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, +			      int sec_idx, Elf_Data *data, void **data_buff)  { -	struct bpf_map_def *def = &map->def;  	char map_name[BPF_OBJ_NAME_LEN]; +	struct bpf_map_def *def; +	struct bpf_map *map; + +	map = bpf_object__add_map(obj); +	if (IS_ERR(map)) +		return PTR_ERR(map);  	map->libbpf_type = type; -	map->offset = ~(typeof(map->offset))0; +	map->sec_idx = sec_idx; +	map->sec_offset = 0;  	snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,  		 libbpf_type_to_btf_name[type]);  	map->name = strdup(map_name); @@ -788,7 +825,10 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,  		pr_warning("failed to alloc map name\n");  		return -ENOMEM;  	} +	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n", +		 map_name, map->sec_idx, map->sec_offset); +	def = &map->def;  	def->type = BPF_MAP_TYPE_ARRAY;  	def->key_size = sizeof(int);  	def->value_size = data->d_size; @@ -808,29 +848,61 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,  	return 0;  } -static int bpf_object__init_maps(struct bpf_object *obj, int flags) +static int bpf_object__init_global_data_maps(struct bpf_object *obj) +{ +	int err; + +	if (!obj->caps.global_data) +		return 0; +	/* +	 * Populate obj->maps with libbpf internal maps. +	 */ +	if (obj->efile.data_shndx >= 0) { +		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, +						    obj->efile.data_shndx, +						    obj->efile.data, +						    &obj->sections.data); +		if (err) +			return err; +	} +	if (obj->efile.rodata_shndx >= 0) { +		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, +						    obj->efile.rodata_shndx, +						    obj->efile.rodata, +						    &obj->sections.rodata); +		if (err) +			return err; +	} +	if (obj->efile.bss_shndx >= 0) { +		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, +						    obj->efile.bss_shndx, +						    obj->efile.bss, NULL); +		if (err) +			return err; +	} +	return 0; +} + +static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  { -	int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; -	bool strict = !(flags & MAPS_RELAX_COMPAT);  	Elf_Data *symbols = obj->efile.symbols; +	int i, map_def_sz = 0, nr_maps = 0, nr_syms;  	Elf_Data *data = NULL; -	int ret = 0; +	Elf_Scn *scn; + +	if (obj->efile.maps_shndx < 0) +		return 0;  	if (!symbols)  		return -EINVAL; -	nr_syms = symbols->d_size / sizeof(GElf_Sym); -	if (obj->efile.maps_shndx >= 0) { -		Elf_Scn *scn = elf_getscn(obj->efile.elf, -					  obj->efile.maps_shndx); - -		if (scn) -			data = elf_getdata(scn, NULL); -		if (!scn || !data) { -			pr_warning("failed to get Elf_Data from map section %d\n", -				   obj->efile.maps_shndx); -			return -EINVAL; -		} +	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); +	if (scn) +		data = elf_getdata(scn, NULL); +	if (!scn || !data) { +		pr_warning("failed to get Elf_Data from map section %d\n", +			   obj->efile.maps_shndx); +		return -EINVAL;  	}  	/* @@ -840,16 +912,8 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)  	 *  	 * TODO: Detect array of map and report error.  	 */ -	if (obj->caps.global_data) { -		if (obj->efile.data_shndx >= 0) -			nr_maps_glob++; -		if (obj->efile.rodata_shndx >= 0) -			nr_maps_glob++; -		if (obj->efile.bss_shndx >= 0) -			nr_maps_glob++; -	} - -	for (i = 0; data && i < nr_syms; i++) { +	nr_syms = symbols->d_size / sizeof(GElf_Sym); +	for (i = 0; i < nr_syms; i++) {  		GElf_Sym sym;  		if (!gelf_getsym(symbols, i, &sym)) @@ -858,79 +922,59 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)  			continue;  		nr_maps++;  	} - -	if (!nr_maps && !nr_maps_glob) -		return 0; -  	/* Assume equally sized map definitions */ -	if (data) { -		pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, -			 nr_maps, data->d_size); - -		map_def_sz = data->d_size / nr_maps; -		if (!data->d_size || (data->d_size % nr_maps) != 0) { -			pr_warning("unable to determine map definition size " -				   "section %s, %d maps in %zd bytes\n", -				   obj->path, nr_maps, data->d_size); -			return -EINVAL; -		} -	} - -	nr_maps += nr_maps_glob; -	obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); -	if (!obj->maps) { -		pr_warning("alloc maps for object failed\n"); -		return -ENOMEM; -	} -	obj->nr_maps = nr_maps; - -	for (i = 0; i < nr_maps; i++) { -		/* -		 * fill all fd with -1 so won't close incorrect -		 * fd (fd=0 is stdin) when failure (zclose won't close -		 * negative fd)). -		 */ -		obj->maps[i].fd = -1; -		obj->maps[i].inner_map_fd = -1; +	pr_debug("maps in %s: %d maps in %zd bytes\n", +		 obj->path, nr_maps, data->d_size); + +	map_def_sz = data->d_size / nr_maps; +	if (!data->d_size || (data->d_size % nr_maps) != 0) { +		pr_warning("unable to determine map definition size " +			   "section %s, %d maps in %zd bytes\n", +			   obj->path, nr_maps, data->d_size); +		return -EINVAL;  	} -	/* -	 * Fill obj->maps using data in "maps" section. -	 */ -	for (i = 0, map_idx = 0; data && i < nr_syms; i++) { +	/* Fill obj->maps using data in "maps" section.  */ +	for (i = 0; i < nr_syms; i++) {  		GElf_Sym sym;  		const char *map_name;  		struct bpf_map_def *def; +		struct bpf_map *map;  		if (!gelf_getsym(symbols, i, &sym))  			continue;  		if (sym.st_shndx != obj->efile.maps_shndx)  			continue; -		map_name = elf_strptr(obj->efile.elf, -				      obj->efile.strtabidx, +		map = bpf_object__add_map(obj); +		if (IS_ERR(map)) +			return PTR_ERR(map); + +		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,  				      sym.st_name);  		if (!map_name) {  			pr_warning("failed to get map #%d name sym string for obj %s\n", -				   map_idx, obj->path); +				   i, obj->path);  			return -LIBBPF_ERRNO__FORMAT;  		} -		obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; -		obj->maps[map_idx].offset = sym.st_value; +		map->libbpf_type = LIBBPF_MAP_UNSPEC; +		map->sec_idx = sym.st_shndx; +		map->sec_offset = sym.st_value; +		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", +			 map_name, map->sec_idx, map->sec_offset);  		if (sym.st_value + map_def_sz > data->d_size) {  			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",  				   obj->path, map_name);  			return -EINVAL;  		} -		obj->maps[map_idx].name = strdup(map_name); -		if (!obj->maps[map_idx].name) { +		map->name = strdup(map_name); +		if (!map->name) {  			pr_warning("failed to alloc map name\n");  			return -ENOMEM;  		} -		pr_debug("map %d is \"%s\"\n", map_idx, -			 obj->maps[map_idx].name); +		pr_debug("map %d is \"%s\"\n", i, map->name);  		def = (struct bpf_map_def *)(data->d_buf + sym.st_value);  		/*  		 * If the definition of the map in the object file fits in @@ -939,7 +983,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)  		 * calloc above.  		 */  		if (map_def_sz <= sizeof(struct bpf_map_def)) { -			memcpy(&obj->maps[map_idx].def, def, map_def_sz); +			memcpy(&map->def, def, map_def_sz);  		} else {  			/*  			 * Here the map structure being read is bigger than what @@ -959,37 +1003,340 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)  						return -EINVAL;  				}  			} -			memcpy(&obj->maps[map_idx].def, def, -			       sizeof(struct bpf_map_def)); +			memcpy(&map->def, def, sizeof(struct bpf_map_def));  		} -		map_idx++;  	} +	return 0; +} -	if (!obj->caps.global_data) -		goto finalize; +static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, +						     __u32 id) +{ +	const struct btf_type *t = btf__type_by_id(btf, id); -	/* -	 * Populate rest of obj->maps with libbpf internal maps. -	 */ -	if (obj->efile.data_shndx >= 0) -		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], -						    LIBBPF_MAP_DATA, -						    obj->efile.data, -						    &obj->sections.data); -	if (!ret && obj->efile.rodata_shndx >= 0) -		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], -						    LIBBPF_MAP_RODATA, -						    obj->efile.rodata, -						    &obj->sections.rodata); -	if (!ret && obj->efile.bss_shndx >= 0) -		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], -						    LIBBPF_MAP_BSS, -						    obj->efile.bss, NULL); -finalize: -	if (!ret) +	while (true) { +		switch (BTF_INFO_KIND(t->info)) { +		case BTF_KIND_VOLATILE: +		case BTF_KIND_CONST: +		case BTF_KIND_RESTRICT: +		case BTF_KIND_TYPEDEF: +			t = btf__type_by_id(btf, t->type); +			break; +		default: +			return t; +		} +	} +} + +static bool get_map_field_int(const char *map_name, +			      const struct btf *btf, +			      const struct btf_type *def, +			      const struct btf_member *m, +			      const void *data, __u32 *res) { +	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); +	const char *name = btf__name_by_offset(btf, m->name_off); +	__u32 int_info = *(const __u32 *)(const void *)(t + 1); + +	if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) { +		pr_warning("map '%s': attr '%s': expected INT, got %u.\n", +			   map_name, name, BTF_INFO_KIND(t->info)); +		return false; +	} +	if (t->size != 4 || BTF_INT_BITS(int_info) != 32 || +	    BTF_INT_OFFSET(int_info)) { +		pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, " +			   "got %u-byte (%d-bit) one with bit offset %d.\n", +			   map_name, name, t->size, BTF_INT_BITS(int_info), +			   BTF_INT_OFFSET(int_info)); +		return false; +	} +	if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) { +		pr_warning("map '%s': attr '%s': bitfield is not supported.\n", +			   map_name, name); +		return false; +	} +	if (m->offset % 32) { +		pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n", +			   map_name, name); +		return false; +	} + +	*res = *(const __u32 *)(data + m->offset / 8); +	return true; +} + +static int bpf_object__init_user_btf_map(struct bpf_object *obj, +					 const struct btf_type *sec, +					 int var_idx, int sec_idx, +					 const Elf_Data *data, bool strict) +{ +	const struct btf_type *var, *def, *t; +	const struct btf_var_secinfo *vi; +	const struct btf_var *var_extra; +	const struct btf_member *m; +	const void *def_data; +	const char *map_name; +	struct bpf_map *map; +	int vlen, i; + +	vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx; +	var = btf__type_by_id(obj->btf, vi->type); +	var_extra = (const void *)(var + 1); +	map_name = btf__name_by_offset(obj->btf, var->name_off); +	vlen = BTF_INFO_VLEN(var->info); + +	if (map_name == NULL || map_name[0] == '\0') { +		pr_warning("map #%d: empty name.\n", var_idx); +		return -EINVAL; +	} +	if ((__u64)vi->offset + vi->size > data->d_size) { +		pr_warning("map '%s' BTF data is corrupted.\n", map_name); +		return -EINVAL; +	} +	if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) { +		pr_warning("map '%s': unexpected var kind %u.\n", +			   map_name, BTF_INFO_KIND(var->info)); +		return -EINVAL; +	} +	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && +	    var_extra->linkage != BTF_VAR_STATIC) { +		pr_warning("map '%s': unsupported var linkage %u.\n", +			   map_name, var_extra->linkage); +		return -EOPNOTSUPP; +	} + +	def = skip_mods_and_typedefs(obj->btf, var->type); +	if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) { +		pr_warning("map '%s': unexpected def kind %u.\n", +			   map_name, BTF_INFO_KIND(var->info)); +		return -EINVAL; +	} +	if (def->size > vi->size) { +		pr_warning("map '%s': invalid def size.\n", map_name); +		return -EINVAL; +	} + +	map = bpf_object__add_map(obj); +	if (IS_ERR(map)) +		return PTR_ERR(map); +	map->name = strdup(map_name); +	if (!map->name) { +		pr_warning("map '%s': failed to alloc map name.\n", map_name); +		return -ENOMEM; +	} +	map->libbpf_type = LIBBPF_MAP_UNSPEC; +	map->def.type = BPF_MAP_TYPE_UNSPEC; +	map->sec_idx = sec_idx; +	map->sec_offset = vi->offset; +	pr_debug("map '%s': at sec_idx %d, offset %zu.\n", +		 map_name, map->sec_idx, map->sec_offset); + +	def_data = data->d_buf + vi->offset; +	vlen = BTF_INFO_VLEN(def->info); +	m = (const void *)(def + 1); +	for (i = 0; i < vlen; i++, m++) { +		const char *name = btf__name_by_offset(obj->btf, m->name_off); + +		if (!name) { +			pr_warning("map '%s': invalid field #%d.\n", +				   map_name, i); +			return -EINVAL; +		} +		if (strcmp(name, "type") == 0) { +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       def_data, &map->def.type)) +				return -EINVAL; +			pr_debug("map '%s': found type = %u.\n", +				 map_name, map->def.type); +		} else if (strcmp(name, "max_entries") == 0) { +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       def_data, &map->def.max_entries)) +				return -EINVAL; +			pr_debug("map '%s': found max_entries = %u.\n", +				 map_name, map->def.max_entries); +		} else if (strcmp(name, "map_flags") == 0) { +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       def_data, &map->def.map_flags)) +				return -EINVAL; +			pr_debug("map '%s': found map_flags = %u.\n", +				 map_name, map->def.map_flags); +		} else if (strcmp(name, "key_size") == 0) { +			__u32 sz; + +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       def_data, &sz)) +				return -EINVAL; +			pr_debug("map '%s': found key_size = %u.\n", +				 map_name, sz); +			if (map->def.key_size && map->def.key_size != sz) { +				pr_warning("map '%s': conflictling key size %u != %u.\n", +					   map_name, map->def.key_size, sz); +				return -EINVAL; +			} +			map->def.key_size = sz; +		} else if (strcmp(name, "key") == 0) { +			__s64 sz; + +			t = btf__type_by_id(obj->btf, m->type); +			if (!t) { +				pr_warning("map '%s': key type [%d] not found.\n", +					   map_name, m->type); +				return -EINVAL; +			} +			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { +				pr_warning("map '%s': key spec is not PTR: %u.\n", +					   map_name, BTF_INFO_KIND(t->info)); +				return -EINVAL; +			} +			sz = btf__resolve_size(obj->btf, t->type); +			if (sz < 0) { +				pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n", +					   map_name, t->type, sz); +				return sz; +			} +			pr_debug("map '%s': found key [%u], sz = %lld.\n", +				 map_name, t->type, sz); +			if (map->def.key_size && map->def.key_size != sz) { +				pr_warning("map '%s': conflictling key size %u != %lld.\n", +					   map_name, map->def.key_size, sz); +				return -EINVAL; +			} +			map->def.key_size = sz; +			map->btf_key_type_id = t->type; +		} else if (strcmp(name, "value_size") == 0) { +			__u32 sz; + +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       def_data, &sz)) +				return -EINVAL; +			pr_debug("map '%s': found value_size = %u.\n", +				 map_name, sz); +			if (map->def.value_size && map->def.value_size != sz) { +				pr_warning("map '%s': conflictling value size %u != %u.\n", +					   map_name, map->def.value_size, sz); +				return -EINVAL; +			} +			map->def.value_size = sz; +		} else if (strcmp(name, "value") == 0) { +			__s64 sz; + +			t = btf__type_by_id(obj->btf, m->type); +			if (!t) { +				pr_warning("map '%s': value type [%d] not found.\n", +					   map_name, m->type); +				return -EINVAL; +			} +			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { +				pr_warning("map '%s': value spec is not PTR: %u.\n", +					   map_name, BTF_INFO_KIND(t->info)); +				return -EINVAL; +			} +			sz = btf__resolve_size(obj->btf, t->type); +			if (sz < 0) { +				pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n", +					   map_name, t->type, sz); +				return sz; +			} +			pr_debug("map '%s': found value [%u], sz = %lld.\n", +				 map_name, t->type, sz); +			if (map->def.value_size && map->def.value_size != sz) { +				pr_warning("map '%s': conflictling value size %u != %lld.\n", +					   map_name, map->def.value_size, sz); +				return -EINVAL; +			} +			map->def.value_size = sz; +			map->btf_value_type_id = t->type; +		} else { +			if (strict) { +				pr_warning("map '%s': unknown field '%s'.\n", +					   map_name, name); +				return -ENOTSUP; +			} +			pr_debug("map '%s': ignoring unknown field '%s'.\n", +				 map_name, name); +		} +	} + +	if (map->def.type == BPF_MAP_TYPE_UNSPEC) { +		pr_warning("map '%s': map type isn't specified.\n", map_name); +		return -EINVAL; +	} + +	return 0; +} + +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) +{ +	const struct btf_type *sec = NULL; +	int nr_types, i, vlen, err; +	const struct btf_type *t; +	const char *name; +	Elf_Data *data; +	Elf_Scn *scn; + +	if (obj->efile.btf_maps_shndx < 0) +		return 0; + +	scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx); +	if (scn) +		data = elf_getdata(scn, NULL); +	if (!scn || !data) { +		pr_warning("failed to get Elf_Data from map section %d (%s)\n", +			   obj->efile.maps_shndx, MAPS_ELF_SEC); +		return -EINVAL; +	} + +	nr_types = btf__get_nr_types(obj->btf); +	for (i = 1; i <= nr_types; i++) { +		t = btf__type_by_id(obj->btf, i); +		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) +			continue; +		name = btf__name_by_offset(obj->btf, t->name_off); +		if (strcmp(name, MAPS_ELF_SEC) == 0) { +			sec = t; +			break; +		} +	} + +	if (!sec) { +		pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC); +		return -ENOENT; +	} + +	vlen = BTF_INFO_VLEN(sec->info); +	for (i = 0; i < vlen; i++) { +		err = bpf_object__init_user_btf_map(obj, sec, i, +						    obj->efile.btf_maps_shndx, +						    data, strict); +		if (err) +			return err; +	} + +	return 0; +} + +static int bpf_object__init_maps(struct bpf_object *obj, int flags) +{ +	bool strict = !(flags & MAPS_RELAX_COMPAT); +	int err; + +	err = bpf_object__init_user_maps(obj, strict); +	if (err) +		return err; + +	err = bpf_object__init_user_btf_maps(obj, strict); +	if (err) +		return err; + +	err = bpf_object__init_global_data_maps(obj); +	if (err) +		return err; + +	if (obj->nr_maps) {  		qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),  		      compare_bpf_map); -	return ret; +	} +	return 0;  }  static bool section_have_execinstr(struct bpf_object *obj, int idx) @@ -1078,6 +1425,86 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)  	}  } +static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) +{ +	return obj->efile.btf_maps_shndx >= 0; +} + +static int bpf_object__init_btf(struct bpf_object *obj, +				Elf_Data *btf_data, +				Elf_Data *btf_ext_data) +{ +	bool btf_required = bpf_object__is_btf_mandatory(obj); +	int err = 0; + +	if (btf_data) { +		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); +		if (IS_ERR(obj->btf)) { +			pr_warning("Error loading ELF section %s: %d.\n", +				   BTF_ELF_SEC, err); +			goto out; +		} +		err = btf__finalize_data(obj, obj->btf); +		if (err) { +			pr_warning("Error finalizing %s: %d.\n", +				   BTF_ELF_SEC, err); +			goto out; +		} +	} +	if (btf_ext_data) { +		if (!obj->btf) { +			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", +				 BTF_EXT_ELF_SEC, BTF_ELF_SEC); +			goto out; +		} +		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, +					    btf_ext_data->d_size); +		if (IS_ERR(obj->btf_ext)) { +			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", +				   BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); +			obj->btf_ext = NULL; +			goto out; +		} +	} +out: +	if (err || IS_ERR(obj->btf)) { +		if (btf_required) +			err = err ? : PTR_ERR(obj->btf); +		else +			err = 0; +		if (!IS_ERR_OR_NULL(obj->btf)) +			btf__free(obj->btf); +		obj->btf = NULL; +	} +	if (btf_required && !obj->btf) { +		pr_warning("BTF is required, but is missing or corrupted.\n"); +		return err == 0 ? -ENOENT : err; +	} +	return 0; +} + +static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) +{ +	int err = 0; + +	if (!obj->btf) +		return 0; + +	bpf_object__sanitize_btf(obj); +	bpf_object__sanitize_btf_ext(obj); + +	err = btf__load(obj->btf); +	if (err) { +		pr_warning("Error loading %s into kernel: %d.\n", +			   BTF_ELF_SEC, err); +		btf__free(obj->btf); +		obj->btf = NULL; +		if (bpf_object__is_btf_mandatory(obj)) +			return err; +	} +	return 0; +} +  static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  {  	Elf *elf = obj->efile.elf; @@ -1102,24 +1529,21 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  		if (gelf_getshdr(scn, &sh) != &sh) {  			pr_warning("failed to get section(%d) header from %s\n",  				   idx, obj->path); -			err = -LIBBPF_ERRNO__FORMAT; -			goto out; +			return -LIBBPF_ERRNO__FORMAT;  		}  		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);  		if (!name) {  			pr_warning("failed to get section(%d) name from %s\n",  				   idx, obj->path); -			err = -LIBBPF_ERRNO__FORMAT; -			goto out; +			return -LIBBPF_ERRNO__FORMAT;  		}  		data = elf_getdata(scn, 0);  		if (!data) {  			pr_warning("failed to get section(%d) data from %s(%s)\n",  				   idx, name, obj->path); -			err = -LIBBPF_ERRNO__FORMAT; -			goto out; +			return -LIBBPF_ERRNO__FORMAT;  		}  		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",  			 idx, name, (unsigned long)data->d_size, @@ -1130,12 +1554,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  			err = bpf_object__init_license(obj,  						       data->d_buf,  						       data->d_size); +			if (err) +				return err;  		} else if (strcmp(name, "version") == 0) {  			err = bpf_object__init_kversion(obj,  							data->d_buf,  							data->d_size); +			if (err) +				return err;  		} else if (strcmp(name, "maps") == 0) {  			obj->efile.maps_shndx = idx; +		} else if (strcmp(name, MAPS_ELF_SEC) == 0) { +			obj->efile.btf_maps_shndx = idx;  		} else if (strcmp(name, BTF_ELF_SEC) == 0) {  			btf_data = data;  		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { @@ -1144,11 +1574,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  			if (obj->efile.symbols) {  				pr_warning("bpf: multiple SYMTAB in %s\n",  					   obj->path); -				err = -LIBBPF_ERRNO__FORMAT; -			} else { -				obj->efile.symbols = data; -				obj->efile.strtabidx = sh.sh_link; +				return -LIBBPF_ERRNO__FORMAT;  			} +			obj->efile.symbols = data; +			obj->efile.strtabidx = sh.sh_link;  		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {  			if (sh.sh_flags & SHF_EXECINSTR) {  				if (strcmp(name, ".text") == 0) @@ -1162,6 +1591,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  					pr_warning("failed to alloc program %s (%s): %s",  						   name, obj->path, cp); +					return err;  				}  			} else if (strcmp(name, ".data") == 0) {  				obj->efile.data = data; @@ -1173,8 +1603,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  				pr_debug("skip section(%d) %s\n", idx, name);  			}  		} else if (sh.sh_type == SHT_REL) { +			int nr_reloc = obj->efile.nr_reloc;  			void *reloc = obj->efile.reloc; -			int nr_reloc = obj->efile.nr_reloc + 1;  			int sec = sh.sh_info; /* points to other section */  			/* Only do relo for section with exec instructions */ @@ -1184,79 +1614,37 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  				continue;  			} -			reloc = reallocarray(reloc, nr_reloc, +			reloc = reallocarray(reloc, nr_reloc + 1,  					     sizeof(*obj->efile.reloc));  			if (!reloc) {  				pr_warning("realloc failed\n"); -				err = -ENOMEM; -			} else { -				int n = nr_reloc - 1; +				return -ENOMEM; +			} -				obj->efile.reloc = reloc; -				obj->efile.nr_reloc = nr_reloc; +			obj->efile.reloc = reloc; +			obj->efile.nr_reloc++; -				obj->efile.reloc[n].shdr = sh; -				obj->efile.reloc[n].data = data; -			} +			obj->efile.reloc[nr_reloc].shdr = sh; +			obj->efile.reloc[nr_reloc].data = data;  		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {  			obj->efile.bss = data;  			obj->efile.bss_shndx = idx;  		} else {  			pr_debug("skip section(%d) %s\n", idx, name);  		} -		if (err) -			goto out;  	}  	if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {  		pr_warning("Corrupted ELF file: index of strtab invalid\n");  		return -LIBBPF_ERRNO__FORMAT;  	} -	if (btf_data) { -		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); -		if (IS_ERR(obj->btf)) { -			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", -				   BTF_ELF_SEC, PTR_ERR(obj->btf)); -			obj->btf = NULL; -		} else { -			err = btf__finalize_data(obj, obj->btf); -			if (!err) { -				bpf_object__sanitize_btf(obj); -				err = btf__load(obj->btf); -			} -			if (err) { -				pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n", -					   BTF_ELF_SEC, err); -				btf__free(obj->btf); -				obj->btf = NULL; -				err = 0; -			} -		} -	} -	if (btf_ext_data) { -		if (!obj->btf) { -			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", -				 BTF_EXT_ELF_SEC, BTF_ELF_SEC); -		} else { -			obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, -						    btf_ext_data->d_size); -			if (IS_ERR(obj->btf_ext)) { -				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", -					   BTF_EXT_ELF_SEC, -					   PTR_ERR(obj->btf_ext)); -				obj->btf_ext = NULL; -			} else { -				bpf_object__sanitize_btf_ext(obj); -			} -		} -	} -	if (bpf_object__has_maps(obj)) { +	err = bpf_object__init_btf(obj, btf_data, btf_ext_data); +	if (!err)  		err = bpf_object__init_maps(obj, flags); -		if (err) -			goto out; -	} -	err = bpf_object__init_prog_names(obj); -out: +	if (!err) +		err = bpf_object__sanitize_and_load_btf(obj); +	if (!err) +		err = bpf_object__init_prog_names(obj);  	return err;  } @@ -1275,7 +1663,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)  }  struct bpf_program * -bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) +bpf_object__find_program_by_title(const struct bpf_object *obj, +				  const char *title)  {  	struct bpf_program *pos; @@ -1297,7 +1686,8 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,  static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,  				      int shndx)  { -	return shndx == obj->efile.maps_shndx; +	return shndx == obj->efile.maps_shndx || +	       shndx == obj->efile.btf_maps_shndx;  }  static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, @@ -1341,14 +1731,14 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,  	prog->nr_reloc = nrels;  	for (i = 0; i < nrels; i++) { -		GElf_Sym sym; -		GElf_Rel rel; -		unsigned int insn_idx; -		unsigned int shdr_idx;  		struct bpf_insn *insns = prog->insns;  		enum libbpf_map_type type; +		unsigned int insn_idx; +		unsigned int shdr_idx;  		const char *name;  		size_t map_idx; +		GElf_Sym sym; +		GElf_Rel rel;  		if (!gelf_getrel(data, i, &rel)) {  			pr_warning("relocation: failed to get %d reloc\n", i); @@ -1416,9 +1806,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,  				if (maps[map_idx].libbpf_type != type)  					continue;  				if (type != LIBBPF_MAP_UNSPEC || -				    maps[map_idx].offset == sym.st_value) { -					pr_debug("relocation: find map %zd (%s) for insn %u\n", -						 map_idx, maps[map_idx].name, insn_idx); +				    (maps[map_idx].sec_idx == sym.st_shndx && +				     maps[map_idx].sec_offset == sym.st_value)) { +					pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n", +						 map_idx, maps[map_idx].name, +						 maps[map_idx].sec_idx, +						 maps[map_idx].sec_offset, +						 insn_idx);  					break;  				}  			} @@ -1438,14 +1832,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,  	return 0;  } -static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) +static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)  {  	struct bpf_map_def *def = &map->def;  	__u32 key_type_id = 0, value_type_id = 0;  	int ret; +	/* if it's BTF-defined map, we don't need to search for type IDs */ +	if (map->sec_idx == obj->efile.btf_maps_shndx) +		return 0; +  	if (!bpf_map__is_internal(map)) { -		ret = btf__get_map_kv_tids(btf, map->name, def->key_size, +		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,  					   def->value_size, &key_type_id,  					   &value_type_id);  	} else { @@ -1453,7 +1851,7 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)  		 * LLVM annotates global data differently in BTF, that is,  		 * only as '.data', '.bss' or '.rodata'.  		 */ -		ret = btf__find_by_name(btf, +		ret = btf__find_by_name(obj->btf,  				libbpf_type_to_btf_name[map->libbpf_type]);  	}  	if (ret < 0) @@ -1740,25 +2138,26 @@ bpf_object__create_maps(struct bpf_object *obj)  		create_attr.key_size = def->key_size;  		create_attr.value_size = def->value_size;  		create_attr.max_entries = def->max_entries; -		create_attr.btf_fd = -1; +		create_attr.btf_fd = 0;  		create_attr.btf_key_type_id = 0;  		create_attr.btf_value_type_id = 0;  		if (bpf_map_type__is_map_in_map(def->type) &&  		    map->inner_map_fd >= 0)  			create_attr.inner_map_fd = map->inner_map_fd; -		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { +		if (obj->btf && !bpf_map_find_btf_info(obj, map)) {  			create_attr.btf_fd = btf__fd(obj->btf);  			create_attr.btf_key_type_id = map->btf_key_type_id;  			create_attr.btf_value_type_id = map->btf_value_type_id;  		}  		*pfd = bpf_create_map_xattr(&create_attr); -		if (*pfd < 0 && create_attr.btf_fd >= 0) { +		if (*pfd < 0 && (create_attr.btf_key_type_id || +				 create_attr.btf_value_type_id)) {  			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));  			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",  				   map->name, cp, errno); -			create_attr.btf_fd = -1; +			create_attr.btf_fd = 0;  			create_attr.btf_key_type_id = 0;  			create_attr.btf_value_type_id = 0;  			map->btf_key_type_id = 0; @@ -2049,7 +2448,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	load_attr.license = license;  	load_attr.kern_version = kern_version;  	load_attr.prog_ifindex = prog->prog_ifindex; -	load_attr.prog_btf_fd = prog->btf_fd; +	load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;  	load_attr.func_info = prog->func_info;  	load_attr.func_info_rec_size = prog->func_info_rec_size;  	load_attr.func_info_cnt = prog->func_info_cnt; @@ -2195,8 +2594,8 @@ out:  	return err;  } -static bool bpf_program__is_function_storage(struct bpf_program *prog, -					     struct bpf_object *obj) +static bool bpf_program__is_function_storage(const struct bpf_program *prog, +					     const struct bpf_object *obj)  {  	return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;  } @@ -2902,17 +3301,17 @@ bpf_object__next(struct bpf_object *prev)  	return next;  } -const char *bpf_object__name(struct bpf_object *obj) +const char *bpf_object__name(const struct bpf_object *obj)  {  	return obj ? obj->path : ERR_PTR(-EINVAL);  } -unsigned int bpf_object__kversion(struct bpf_object *obj) +unsigned int bpf_object__kversion(const struct bpf_object *obj)  {  	return obj ? obj->kern_version : 0;  } -struct btf *bpf_object__btf(struct bpf_object *obj) +struct btf *bpf_object__btf(const struct bpf_object *obj)  {  	return obj ? obj->btf : NULL;  } @@ -2933,13 +3332,14 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,  	return 0;  } -void *bpf_object__priv(struct bpf_object *obj) +void *bpf_object__priv(const struct bpf_object *obj)  {  	return obj ? obj->priv : ERR_PTR(-EINVAL);  }  static struct bpf_program * -__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) +__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, +		    bool forward)  {  	size_t nr_programs = obj->nr_programs;  	ssize_t idx; @@ -2964,7 +3364,7 @@ __bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)  }  struct bpf_program * -bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)  {  	struct bpf_program *prog = prev; @@ -2976,7 +3376,7 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)  }  struct bpf_program * -bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) +bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)  {  	struct bpf_program *prog = next; @@ -2998,7 +3398,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,  	return 0;  } -void *bpf_program__priv(struct bpf_program *prog) +void *bpf_program__priv(const struct bpf_program *prog)  {  	return prog ? prog->priv : ERR_PTR(-EINVAL);  } @@ -3008,7 +3408,7 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)  	prog->prog_ifindex = ifindex;  } -const char *bpf_program__title(struct bpf_program *prog, bool needs_copy) +const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)  {  	const char *title; @@ -3024,7 +3424,7 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)  	return title;  } -int bpf_program__fd(struct bpf_program *prog) +int bpf_program__fd(const struct bpf_program *prog)  {  	return bpf_program__nth_fd(prog, 0);  } @@ -3057,7 +3457,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,  	return 0;  } -int bpf_program__nth_fd(struct bpf_program *prog, int n) +int bpf_program__nth_fd(const struct bpf_program *prog, int n)  {  	int fd; @@ -3085,25 +3485,25 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)  	prog->type = type;  } -static bool bpf_program__is_type(struct bpf_program *prog, +static bool bpf_program__is_type(const struct bpf_program *prog,  				 enum bpf_prog_type type)  {  	return prog ? (prog->type == type) : false;  } -#define BPF_PROG_TYPE_FNS(NAME, TYPE)			\ -int bpf_program__set_##NAME(struct bpf_program *prog)	\ -{							\ -	if (!prog)					\ -		return -EINVAL;				\ -	bpf_program__set_type(prog, TYPE);		\ -	return 0;					\ -}							\ -							\ -bool bpf_program__is_##NAME(struct bpf_program *prog)	\ -{							\ -	return bpf_program__is_type(prog, TYPE);	\ -}							\ +#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\ +int bpf_program__set_##NAME(struct bpf_program *prog)		\ +{								\ +	if (!prog)						\ +		return -EINVAL;					\ +	bpf_program__set_type(prog, TYPE);			\ +	return 0;						\ +}								\ +								\ +bool bpf_program__is_##NAME(const struct bpf_program *prog)	\ +{								\ +	return bpf_program__is_type(prog, TYPE);		\ +}								\  BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);  BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); @@ -3302,17 +3702,17 @@ bpf_program__identify_section(struct bpf_program *prog,  					expected_attach_type);  } -int bpf_map__fd(struct bpf_map *map) +int bpf_map__fd(const struct bpf_map *map)  {  	return map ? map->fd : -EINVAL;  } -const struct bpf_map_def *bpf_map__def(struct bpf_map *map) +const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)  {  	return map ? &map->def : ERR_PTR(-EINVAL);  } -const char *bpf_map__name(struct bpf_map *map) +const char *bpf_map__name(const struct bpf_map *map)  {  	return map ? map->name : NULL;  } @@ -3343,17 +3743,17 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,  	return 0;  } -void *bpf_map__priv(struct bpf_map *map) +void *bpf_map__priv(const struct bpf_map *map)  {  	return map ? map->priv : ERR_PTR(-EINVAL);  } -bool bpf_map__is_offload_neutral(struct bpf_map *map) +bool bpf_map__is_offload_neutral(const struct bpf_map *map)  {  	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;  } -bool bpf_map__is_internal(struct bpf_map *map) +bool bpf_map__is_internal(const struct bpf_map *map)  {  	return map->libbpf_type != LIBBPF_MAP_UNSPEC;  } @@ -3378,7 +3778,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)  }  static struct bpf_map * -__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) +__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)  {  	ssize_t idx;  	struct bpf_map *s, *e; @@ -3402,7 +3802,7 @@ __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)  }  struct bpf_map * -bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)  {  	if (prev == NULL)  		return obj->maps; @@ -3411,7 +3811,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)  }  struct bpf_map * -bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) +bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)  {  	if (next == NULL) {  		if (!obj->nr_maps) @@ -3423,7 +3823,7 @@ bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)  }  struct bpf_map * -bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) +bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)  {  	struct bpf_map *pos; @@ -3435,7 +3835,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)  }  int -bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) +bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)  {  	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));  } @@ -3443,13 +3843,7 @@ bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)  struct bpf_map *  bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)  { -	int i; - -	for (i = 0; i < obj->nr_maps; i++) { -		if (obj->maps[i].offset == offset) -			return &obj->maps[i]; -	} -	return ERR_PTR(-ENOENT); +	return ERR_PTR(-ENOTSUP);  }  long libbpf_get_error(const void *ptr) @@ -3835,3 +4229,60 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)  					     desc->array_offset, addr);  	}  } + +int libbpf_num_possible_cpus(void) +{ +	static const char *fcpu = "/sys/devices/system/cpu/possible"; +	int len = 0, n = 0, il = 0, ir = 0; +	unsigned int start = 0, end = 0; +	static int cpus; +	char buf[128]; +	int error = 0; +	int fd = -1; + +	if (cpus > 0) +		return cpus; + +	fd = open(fcpu, O_RDONLY); +	if (fd < 0) { +		error = errno; +		pr_warning("Failed to open file %s: %s\n", +			   fcpu, strerror(error)); +		return -error; +	} +	len = read(fd, buf, sizeof(buf)); +	close(fd); +	if (len <= 0) { +		error = len ? errno : EINVAL; +		pr_warning("Failed to read # of possible cpus from %s: %s\n", +			   fcpu, strerror(error)); +		return -error; +	} +	if (len == sizeof(buf)) { +		pr_warning("File %s size overflow\n", fcpu); +		return -EOVERFLOW; +	} +	buf[len] = '\0'; + +	for (ir = 0, cpus = 0; ir <= len; ir++) { +		/* Each sub string separated by ',' has format \d+-\d+ or \d+ */ +		if (buf[ir] == ',' || buf[ir] == '\0') { +			buf[ir] = '\0'; +			n = sscanf(&buf[il], "%u-%u", &start, &end); +			if (n <= 0) { +				pr_warning("Failed to get # CPUs from %s\n", +					   &buf[il]); +				return -EINVAL; +			} else if (n == 1) { +				end = start; +			} +			cpus += end - start + 1; +			il = ir + 1; +		} +	} +	if (cpus <= 0) { +		pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); +		return -EINVAL; +	} +	return cpus; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1af0d48178c8..d639f47e3110 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -98,15 +98,16 @@ struct bpf_object_load_attr {  LIBBPF_API int bpf_object__load(struct bpf_object *obj);  LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);  LIBBPF_API int bpf_object__unload(struct bpf_object *obj); -LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); -LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); +LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); +LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);  struct btf; -LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); +LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);  LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);  LIBBPF_API struct bpf_program * -bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); +bpf_object__find_program_by_title(const struct bpf_object *obj, +				  const char *title);  LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);  #define bpf_object__for_each_safe(pos, tmp)			\ @@ -118,7 +119,7 @@ LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);  typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);  LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,  				    bpf_object_clear_priv_t clear_priv); -LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); +LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);  LIBBPF_API int  libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, @@ -129,7 +130,7 @@ LIBBPF_API int libbpf_attach_type_by_name(const char *name,  /* Accessors of bpf_program */  struct bpf_program;  LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, -						 struct bpf_object *obj); +						 const struct bpf_object *obj);  #define bpf_object__for_each_program(pos, obj)		\  	for ((pos) = bpf_program__next(NULL, (obj));	\ @@ -137,24 +138,23 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,  	     (pos) = bpf_program__next((pos), (obj)))  LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, -						 struct bpf_object *obj); +						 const struct bpf_object *obj); -typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, -					 void *); +typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);  LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,  				     bpf_program_clear_priv_t clear_priv); -LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); +LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);  LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,  					 __u32 ifindex); -LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, +LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,  					  bool needs_copy);  LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,  				 __u32 kern_version); -LIBBPF_API int bpf_program__fd(struct bpf_program *prog); +LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);  LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,  					 const char *path,  					 int instance); @@ -227,7 +227,7 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,  LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,  				     bpf_program_prep_t prep); -LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); +LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);  /*   * Adjust type of BPF program. Default is kprobe. @@ -246,14 +246,14 @@ LIBBPF_API void  bpf_program__set_expected_attach_type(struct bpf_program *prog,  				      enum bpf_attach_type type); -LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);  /*   * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -275,10 +275,10 @@ struct bpf_map_def {   */  struct bpf_map;  LIBBPF_API struct bpf_map * -bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); +bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);  LIBBPF_API int -bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); +bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);  /*   * Get bpf_map through the offset of corresponding struct bpf_map_def @@ -288,7 +288,7 @@ LIBBPF_API struct bpf_map *  bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);  LIBBPF_API struct bpf_map * -bpf_map__next(struct bpf_map *map, struct bpf_object *obj); +bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);  #define bpf_object__for_each_map(pos, obj)		\  	for ((pos) = bpf_map__next(NULL, (obj));	\  	     (pos) != NULL;				\ @@ -296,22 +296,22 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);  #define bpf_map__for_each bpf_object__for_each_map  LIBBPF_API struct bpf_map * -bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); +bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); -LIBBPF_API int bpf_map__fd(struct bpf_map *map); -LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); -LIBBPF_API const char *bpf_map__name(struct bpf_map *map); +LIBBPF_API int bpf_map__fd(const struct bpf_map *map); +LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);  LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);  LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);  typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);  LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,  				 bpf_map_clear_priv_t clear_priv); -LIBBPF_API void *bpf_map__priv(struct bpf_map *map); +LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);  LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);  LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); -LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); -LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map); +LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);  LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);  LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);  LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); @@ -454,6 +454,22 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);  LIBBPF_API void  bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); +/* + * A helper function to get the number of possible CPUs before looking up + * per-CPU maps. Negative errno is returned on failure. + * + * Example usage: + * + *     int ncpus = libbpf_num_possible_cpus(); + *     if (ncpus < 0) { + *          // error handling + *     } + *     long values[ncpus]; + *     bpf_map_lookup_elem(per_cpu_map_fd, key, values); + * + */ +LIBBPF_API int libbpf_num_possible_cpus(void); +  #ifdef __cplusplus  } /* extern "C" */  #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 46dcda89df21..2c6d835620d2 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -172,4 +172,5 @@ LIBBPF_0.0.4 {  		btf_dump__new;  		btf__parse_elf;  		bpf_object__load_xattr; +		libbpf_num_possible_cpus;  } LIBBPF_0.0.3; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 61d90eb82ee6..2ac29bd36226 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -23,6 +23,13 @@  #define BTF_PARAM_ENC(name, type) (name), (type)  #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) +#ifndef min +# define min(x, y) ((x) < (y) ? (x) : (y)) +#endif +#ifndef max +# define max(x, y) ((x) < (y) ? (y) : (x)) +#endif +  extern void libbpf_print(enum libbpf_print_level level,  			 const char *format, ...)  	__attribute__((format(printf, 2, 3))); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 38667b62f1fe..7ef6293b4fd7 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -60,10 +60,8 @@ struct xsk_socket {  	struct xsk_umem *umem;  	struct xsk_socket_config config;  	int fd; -	int xsks_map;  	int ifindex;  	int prog_fd; -	int qidconf_map_fd;  	int xsks_map_fd;  	__u32 queue_id;  	char ifname[IFNAMSIZ]; @@ -265,15 +263,11 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  	/* This is the C-program:  	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)  	 * { -	 *     int *qidconf, index = ctx->rx_queue_index; +	 *     int index = ctx->rx_queue_index;  	 *  	 *     // A set entry here means that the correspnding queue_id  	 *     // has an active AF_XDP socket bound to it. -	 *     qidconf = bpf_map_lookup_elem(&qidconf_map, &index); -	 *     if (!qidconf) -	 *         return XDP_ABORTED; -	 * -	 *     if (*qidconf) +	 *     if (bpf_map_lookup_elem(&xsks_map, &index))  	 *         return bpf_redirect_map(&xsks_map, index, 0);  	 *  	 *     return XDP_PASS; @@ -286,15 +280,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),  		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), -		BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd), +		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),  		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),  		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), -		BPF_MOV32_IMM(BPF_REG_0, 0), -		/* if r1 == 0 goto +8 */ -		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),  		BPF_MOV32_IMM(BPF_REG_0, 2), -		/* r1 = *(u32 *)(r1 + 0) */ -		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),  		/* if r1 == 0 goto +5 */  		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),  		/* r2 = *(u32 *)(r10 - 4) */ @@ -366,18 +355,11 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)  	if (max_queues < 0)  		return max_queues; -	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map", +	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",  				 sizeof(int), sizeof(int), max_queues, 0);  	if (fd < 0)  		return fd; -	xsk->qidconf_map_fd = fd; -	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", -				 sizeof(int), sizeof(int), max_queues, 0); -	if (fd < 0) { -		close(xsk->qidconf_map_fd); -		return fd; -	}  	xsk->xsks_map_fd = fd;  	return 0; @@ -385,10 +367,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)  static void xsk_delete_bpf_maps(struct xsk_socket *xsk)  { -	close(xsk->qidconf_map_fd); +	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);  	close(xsk->xsks_map_fd); -	xsk->qidconf_map_fd = -1; -	xsk->xsks_map_fd = -1;  }  static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) @@ -417,10 +397,9 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  	if (err)  		goto out_map_ids; -	for (i = 0; i < prog_info.nr_map_ids; i++) { -		if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1) -			break; +	xsk->xsks_map_fd = -1; +	for (i = 0; i < prog_info.nr_map_ids; i++) {  		fd = bpf_map_get_fd_by_id(map_ids[i]);  		if (fd < 0)  			continue; @@ -431,11 +410,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  			continue;  		} -		if (!strcmp(map_info.name, "qidconf_map")) { -			xsk->qidconf_map_fd = fd; -			continue; -		} -  		if (!strcmp(map_info.name, "xsks_map")) {  			xsk->xsks_map_fd = fd;  			continue; @@ -445,40 +419,18 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  	}  	err = 0; -	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) { +	if (xsk->xsks_map_fd == -1)  		err = -ENOENT; -		xsk_delete_bpf_maps(xsk); -	}  out_map_ids:  	free(map_ids);  	return err;  } -static void xsk_clear_bpf_maps(struct xsk_socket *xsk) -{ -	int qid = false; - -	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); -	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); -} -  static int xsk_set_bpf_maps(struct xsk_socket *xsk)  { -	int qid = true, fd = xsk->fd, err; - -	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); -	if (err) -		goto out; - -	err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0); -	if (err) -		goto out; - -	return 0; -out: -	xsk_clear_bpf_maps(xsk); -	return err; +	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, +				   &xsk->fd, 0);  }  static int xsk_setup_xdp_prog(struct xsk_socket *xsk) @@ -497,26 +449,27 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  			return err;  		err = xsk_load_xdp_prog(xsk); -		if (err) -			goto out_maps; +		if (err) { +			xsk_delete_bpf_maps(xsk); +			return err; +		}  	} else {  		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);  		err = xsk_lookup_bpf_maps(xsk); -		if (err) -			goto out_load; +		if (err) { +			close(xsk->prog_fd); +			return err; +		}  	}  	err = xsk_set_bpf_maps(xsk); -	if (err) -		goto out_load; +	if (err) { +		xsk_delete_bpf_maps(xsk); +		close(xsk->prog_fd); +		return err; +	}  	return 0; - -out_load: -	close(xsk->prog_fd); -out_maps: -	xsk_delete_bpf_maps(xsk); -	return err;  }  int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, @@ -643,9 +596,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  		goto out_mmap_tx;  	} -	xsk->qidconf_map_fd = -1; -	xsk->xsks_map_fd = -1; - +	xsk->prog_fd = -1;  	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {  		err = xsk_setup_xdp_prog(xsk);  		if (err) @@ -708,8 +659,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)  	if (!xsk)  		return; -	xsk_clear_bpf_maps(xsk); -	xsk_delete_bpf_maps(xsk); +	if (xsk->prog_fd != -1) { +		xsk_delete_bpf_maps(xsk); +		close(xsk->prog_fd); +	}  	optlen = sizeof(off);  	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index d8df5c9b5b2f..fb5ce43e28b3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -280,4 +280,5 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)  		 ) > $(VERIFIER_TESTS_H))  EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ -	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) +	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \ +	feature diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h index b25595ea4a78..05f036df8a4c 100644 --- a/tools/testing/selftests/bpf/bpf_endian.h +++ b/tools/testing/selftests/bpf/bpf_endian.h @@ -2,6 +2,7 @@  #ifndef __BPF_ENDIAN__  #define __BPF_ENDIAN__ +#include <linux/stddef.h>  #include <linux/swab.h>  /* LLVM's BPF target selects the endianness of the CPU diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e6d243b7cd74..1a5b1accf091 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -31,7 +31,7 @@ static int (*bpf_map_pop_elem)(void *map, void *value) =  	(void *) BPF_FUNC_map_pop_elem;  static int (*bpf_map_peek_elem)(void *map, void *value) =  	(void *) BPF_FUNC_map_peek_elem; -static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = +static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =  	(void *) BPF_FUNC_probe_read;  static unsigned long long (*bpf_ktime_get_ns)(void) =  	(void *) BPF_FUNC_ktime_get_ns; @@ -62,7 +62,7 @@ static int (*bpf_perf_event_output)(void *ctx, void *map,  	(void *) BPF_FUNC_perf_event_output;  static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =  	(void *) BPF_FUNC_get_stackid; -static int (*bpf_probe_write_user)(void *dst, void *src, int size) = +static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =  	(void *) BPF_FUNC_probe_write_user;  static int (*bpf_current_task_under_cgroup)(void *map, int index) =  	(void *) BPF_FUNC_current_task_under_cgroup; diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index a29206ebbd13..ec219f84e041 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,44 +6,17 @@  #include <stdlib.h>  #include <string.h>  #include <errno.h> +#include <libbpf.h> /* libbpf_num_possible_cpus */  static inline unsigned int bpf_num_possible_cpus(void)  { -	static const char *fcpu = "/sys/devices/system/cpu/possible"; -	unsigned int start, end, possible_cpus = 0; -	char buff[128]; -	FILE *fp; -	int len, n, i, j = 0; +	int possible_cpus = libbpf_num_possible_cpus(); -	fp = fopen(fcpu, "r"); -	if (!fp) { -		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno)); +	if (possible_cpus < 0) { +		printf("Failed to get # of possible cpus: '%s'!\n", +		       strerror(-possible_cpus));  		exit(1);  	} - -	if (!fgets(buff, sizeof(buff), fp)) { -		printf("Failed to read %s!\n", fcpu); -		exit(1); -	} - -	len = strlen(buff); -	for (i = 0; i <= len; i++) { -		if (buff[i] == ',' || buff[i] == '\0') { -			buff[i] = '\0'; -			n = sscanf(&buff[j], "%u-%u", &start, &end); -			if (n <= 0) { -				printf("Failed to retrieve # possible CPUs!\n"); -				exit(1); -			} else if (n == 1) { -				end = start; -			} -			possible_cpus += end - start + 1; -			j = i + 1; -		} -	} - -	fclose(fp); -  	return possible_cpus;  } diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 0d89f0396be4..e95c33e333a4 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -47,7 +47,7 @@ int enable_all_controllers(char *cgroup_path)  	char buf[PATH_MAX];  	char *c, *c2;  	int fd, cfd; -	size_t len; +	ssize_t len;  	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);  	fd = open(path, O_RDONLY); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index c0091137074b..e1b55261526f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,  			      const char *format, va_list args)  {  	if (level != LIBBPF_DEBUG) -		return 0; +		return vfprintf(stderr, format, args);  	if (!strstr(format, "verifier log"))  		return 0; @@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type)  void test_bpf_verif_scale(void)  { -	const char *scale[] = { -		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o" +	const char *sched_cls[] = { +		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",  	}; -	const char *pyperf[] = { -		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o" +	const char *raw_tp[] = { +		/* full unroll by llvm */ +		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o", + +		/* partial unroll. llvm will unroll loop ~150 times. +		 * C loop count -> 600. +		 * Asm loop count -> 4. +		 * 16k insns in loop body. +		 * Total of 5 such loops. Total program size ~82k insns. +		 */ +		"./pyperf600.o", + +		/* no unroll at all. +		 * C loop count -> 600. +		 * ASM loop count -> 600. +		 * ~110 insns in loop body. +		 * Total of 5 such loops. Total program size ~1500 insns. +		 */ +		"./pyperf600_nounroll.o", + +		"./loop1.o", "./loop2.o", + +		/* partial unroll. 19k insn in a loop. +		 * Total program size 20.8k insn. +		 * ~350k processed_insns +		 */ +		"./strobemeta.o", + +		/* no unroll, tiny loops */ +		"./strobemeta_nounroll1.o", +		"./strobemeta_nounroll2.o", +	}; +	const char *cg_sysctl[] = { +		"./test_sysctl_loop1.o", "./test_sysctl_loop2.o",  	};  	int err, i;  	if (verifier_stats)  		libbpf_set_print(libbpf_debug_print); -	for (i = 0; i < ARRAY_SIZE(scale); i++) { -		err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS); -		printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK"); +	err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT); +	printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL"); + +	for (i = 0; i < ARRAY_SIZE(sched_cls); i++) { +		err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS); +		printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");  	} -	for (i = 0; i < ARRAY_SIZE(pyperf); i++) { -		err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT); -		printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK"); +	for (i = 0; i < ARRAY_SIZE(raw_tp); i++) { +		err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT); +		printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");  	} + +	for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) { +		err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL); +		printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK"); +	} +	err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP); +	printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK"); + +	err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL); +	printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");  } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 81ad9a0b29d0..849f42e548b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -57,17 +57,25 @@ struct frag_hdr {  	__be32 identification;  }; -struct bpf_map_def SEC("maps") jmp_table = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 key_size; +	__u32 value_size; +} jmp_table SEC(".maps") = {  	.type = BPF_MAP_TYPE_PROG_ARRAY, +	.max_entries = 8,  	.key_size = sizeof(__u32),  	.value_size = sizeof(__u32), -	.max_entries = 8  }; -struct bpf_map_def SEC("maps") last_dissection = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct bpf_flow_keys *value; +} last_dissection SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct bpf_flow_keys),  	.max_entries = 1,  }; diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c new file mode 100644 index 000000000000..dea395af9ea9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +SEC("raw_tracepoint/kfree_skb") +int nested_loops(volatile struct pt_regs* ctx) +{ +	int i, j, sum = 0, m; + +	for (j = 0; j < 300; j++) +		for (i = 0; i < j; i++) { +			if (j & 1) +				m = ctx->rax; +			else +				m = j; +			sum += i * m; +		} + +	return sum; +} diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c new file mode 100644 index 000000000000..0637bd8e8bcf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +SEC("raw_tracepoint/consume_skb") +int while_true(volatile struct pt_regs* ctx) +{ +	int i = 0; + +	while (true) { +		if (ctx->rax & 1) +			i += 3; +		else +			i += 7; +		if (i > 40) +			break; +	} + +	return i; +} diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c new file mode 100644 index 000000000000..30a0f6cba080 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; + +SEC("raw_tracepoint/consume_skb") +int while_true(volatile struct pt_regs* ctx) +{ +	__u64 i = 0, sum = 0; +	do { +		i++; +		sum += ctx->rax; +	} while (i < 0x100000000ULL); +	return sum; +} diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 9f741e69cebe..a25c82a5b7c8 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -10,24 +10,22 @@  #define REFRESH_TIME_NS	100000000  #define NS_PER_SEC	1000000000 -struct bpf_map_def SEC("maps") percpu_netcnt = { +struct { +	__u32 type; +	struct bpf_cgroup_storage_key *key; +	struct percpu_net_cnt *value; +} percpu_netcnt SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, -	.key_size = sizeof(struct bpf_cgroup_storage_key), -	.value_size = sizeof(struct percpu_net_cnt),  }; -BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, -		     struct percpu_net_cnt); - -struct bpf_map_def SEC("maps") netcnt = { +struct { +	__u32 type; +	struct bpf_cgroup_storage_key *key; +	struct net_cnt *value; +} netcnt SEC(".maps") = {  	.type = BPF_MAP_TYPE_CGROUP_STORAGE, -	.key_size = sizeof(struct bpf_cgroup_storage_key), -	.value_size = sizeof(struct net_cnt),  }; -BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key, -		     struct net_cnt); -  SEC("cgroup/skb")  int bpf_nextcnt(struct __sk_buff *skb)  { diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 0cc5e4ee90bd..6b0781391be5 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *  		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);  		if (symbol_counter == NULL)  			return 0; -#pragma unroll +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) +#else +#pragma clang loop unroll(full) +#endif  		/* Unwind python stack */  		for (int i = 0; i < STACK_MAX_LEN; ++i) {  			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c new file mode 100644 index 000000000000..cb49b89e37cd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 600 +/* clang will not unroll the loop 600 times. + * Instead it will unroll it to the amount it deemed + * appropriate, but the loop will still execute 600 times. + * Total program size is around 90k insns + */ +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c new file mode 100644 index 000000000000..6beff7502f4d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 600 +#define NO_UNROLL +/* clang will not unroll at all. + * Total program size is around 2k insns + */ +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 9ff8ac4b0bf6..6aabb681fb9a 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -7,25 +7,36 @@  #include "bpf_helpers.h"  #include "bpf_endian.h" -struct bpf_map_def SEC("maps") socket_cookies = { -	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(__u64), -	.value_size = sizeof(__u32), -	.max_entries = 1 << 8, +struct socket_cookie { +	__u64 cookie_key; +	__u32 cookie_value; +}; + +struct { +	__u32 type; +	__u32 map_flags; +	int *key; +	struct socket_cookie *value; +} socket_cookies SEC(".maps") = { +	.type = BPF_MAP_TYPE_SK_STORAGE, +	.map_flags = BPF_F_NO_PREALLOC,  };  SEC("cgroup/connect6")  int set_cookie(struct bpf_sock_addr *ctx)  { -	__u32 cookie_value = 0xFF; -	__u64 cookie_key; +	struct socket_cookie *p;  	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)  		return 1; -	cookie_key = bpf_get_socket_cookie(ctx); -	if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0)) -		return 0; +	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, +			       BPF_SK_STORAGE_GET_F_CREATE); +	if (!p) +		return 1; + +	p->cookie_value = 0xFF; +	p->cookie_key = bpf_get_socket_cookie(ctx);  	return 1;  } @@ -33,9 +44,8 @@ int set_cookie(struct bpf_sock_addr *ctx)  SEC("sockops")  int update_cookie(struct bpf_sock_ops *ctx)  { -	__u32 new_cookie_value; -	__u32 *cookie_value; -	__u64 cookie_key; +	struct bpf_sock *sk; +	struct socket_cookie *p;  	if (ctx->family != AF_INET6)  		return 1; @@ -43,14 +53,17 @@ int update_cookie(struct bpf_sock_ops *ctx)  	if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)  		return 1; -	cookie_key = bpf_get_socket_cookie(ctx); +	if (!ctx->sk) +		return 1; + +	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0); +	if (!p) +		return 1; -	cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key); -	if (!cookie_value) +	if (p->cookie_key != bpf_get_socket_cookie(ctx))  		return 1; -	new_cookie_value = (ctx->local_port << 8) | *cookie_value; -	bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0); +	p->cookie_value = (ctx->local_port << 8) | p->cookie_value;  	return 1;  } diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index ed3e4a551c57..9390e0244259 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -1,6 +1,5 @@  #include <linux/bpf.h>  #include "bpf_helpers.h" -#include "bpf_util.h"  #include "bpf_endian.h"  int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index 65fbfdb6cd3a..e80484d98a1a 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -1,6 +1,6 @@  #include <linux/bpf.h> +  #include "bpf_helpers.h" -#include "bpf_util.h"  #include "bpf_endian.h"  int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index bdc22be46f2e..d85c874ef25e 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -1,6 +1,5 @@  #include <linux/bpf.h>  #include "bpf_helpers.h" -#include "bpf_util.h"  #include "bpf_endian.h"  int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.c b/tools/testing/selftests/bpf/progs/strobemeta.c new file mode 100644 index 000000000000..d3df3d86f092 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 100 +#define STROBE_MAX_MAP_ENTRIES 20 +/* full unroll by llvm #undef NO_UNROLL */ +#include "strobemeta.h" + diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h new file mode 100644 index 000000000000..1ff73f60a3e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/types.h> +#include "bpf_helpers.h" + +typedef uint32_t pid_t; +struct task_struct {}; + +#define TASK_COMM_LEN 16 +#define PERF_MAX_STACK_DEPTH 127 + +#define STROBE_TYPE_INVALID 0 +#define STROBE_TYPE_INT 1 +#define STROBE_TYPE_STR 2 +#define STROBE_TYPE_MAP 3 + +#define STACK_TABLE_EPOCH_SHIFT 20 +#define STROBE_MAX_STR_LEN 1 +#define STROBE_MAX_CFGS 32 +#define STROBE_MAX_PAYLOAD						\ +	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\ +	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + +struct strobe_value_header { +	/* +	 * meaning depends on type: +	 * 1. int: 0, if value not set, 1 otherwise +	 * 2. str: 1 always, whether value is set or not is determined by ptr +	 * 3. map: 1 always, pointer points to additional struct with number +	 *    of entries (up to STROBE_MAX_MAP_ENTRIES) +	 */ +	uint16_t len; +	/* +	 * _reserved might be used for some future fields/flags, but we always +	 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16 +	 * bytes in one go and get both header and value +	 */ +	uint8_t _reserved[6]; +}; + +/* + * strobe_value_generic is used from BPF probe only, but needs to be a union + * of strobe_value_int/strobe_value_str/strobe_value_map + */ +struct strobe_value_generic { +	struct strobe_value_header header; +	union { +		int64_t val; +		void *ptr; +	}; +}; + +struct strobe_value_int { +	struct strobe_value_header header; +	int64_t value; +}; + +struct strobe_value_str { +	struct strobe_value_header header; +	const char* value; +}; + +struct strobe_value_map { +	struct strobe_value_header header; +	const struct strobe_map_raw* value; +}; + +struct strobe_map_entry { +	const char* key; +	const char* val; +}; + +/* + * Map of C-string key/value pairs with fixed maximum capacity. Each map has + * corresponding int64 ID, which application can use (or ignore) in whatever + * way appropriate. Map is "write-only", there is no way to get data out of + * map. Map is intended to be used to provide metadata for profilers and is + * not to be used for internal in-app communication. All methods are + * thread-safe. + */ +struct strobe_map_raw { +	/* +	 * general purpose unique ID that's up to application to decide +	 * whether and how to use; for request metadata use case id is unique +	 * request ID that's used to match metadata with stack traces on +	 * Strobelight backend side +	 */ +	int64_t id; +	/* number of used entries in map */ +	int64_t cnt; +	/* +	 * having volatile doesn't change anything on BPF side, but clang +	 * emits warnings for passing `volatile const char *` into +	 * bpf_probe_read_str that expects just `const char *` +	 */ +	const char* tag; +	/* +	 * key/value entries, each consisting of 2 pointers to key and value +	 * C strings +	 */ +	struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES]; +}; + +/* Following values define supported values of TLS mode */ +#define TLS_NOT_SET -1 +#define TLS_LOCAL_EXEC 0 +#define TLS_IMM_EXEC 1 +#define TLS_GENERAL_DYN 2 + +/* + * structure that universally represents TLS location (both for static + * executables and shared libraries) + */ +struct strobe_value_loc { +	/* +	 * tls_mode defines what TLS mode was used for particular metavariable: +	 * - -1 (TLS_NOT_SET) - no metavariable; +	 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode; +	 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode; +	 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode; +	 * Local Dynamic mode is not yet supported, because never seen in +	 * practice.  Mode defines how offset field is interpreted. See +	 * calc_location() in below for details. +	 */ +	int64_t tls_mode; +	/* +	 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64, +	 * tpidr_el0 for aarch64). +	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset +	 * from thread pointer; +	 * TLS_GENERAL_DYN: absolute addres of double GOT entry +	 * containing tls_index_t struct; +	 */ +	int64_t offset; +}; + +struct strobemeta_cfg { +	int64_t req_meta_idx; +	struct strobe_value_loc int_locs[STROBE_MAX_INTS]; +	struct strobe_value_loc str_locs[STROBE_MAX_STRS]; +	struct strobe_value_loc map_locs[STROBE_MAX_MAPS]; +}; + +struct strobe_map_descr { +	uint64_t id; +	int16_t tag_len; +	/* +	 * cnt <0 - map value isn't set; +	 * 0 - map has id set, but no key/value entries +	 */ +	int16_t cnt; +	/* +	 * both key_lens[i] and val_lens[i] should be >0 for present key/value +	 * entry +	 */ +	uint16_t key_lens[STROBE_MAX_MAP_ENTRIES]; +	uint16_t val_lens[STROBE_MAX_MAP_ENTRIES]; +}; + +struct strobemeta_payload { +	/* req_id has valid request ID, if req_meta_valid == 1 */ +	int64_t req_id; +	uint8_t req_meta_valid; +	/* +	 * mask has Nth bit set to 1, if Nth metavar was present and +	 * successfully read +	 */ +	uint64_t int_vals_set_mask; +	int64_t int_vals[STROBE_MAX_INTS]; +	/* len is >0 for present values */ +	uint16_t str_lens[STROBE_MAX_STRS]; +	/* if map_descrs[i].cnt == -1, metavar is not present/set */ +	struct strobe_map_descr map_descrs[STROBE_MAX_MAPS]; +	/* +	 * payload has compactly packed values of str and map variables in the +	 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0 +	 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines +	 * value length +	 */ +	char payload[STROBE_MAX_PAYLOAD]; +}; + +struct strobelight_bpf_sample { +	uint64_t ktime; +	char comm[TASK_COMM_LEN]; +	pid_t pid; +	int user_stack_id; +	int kernel_stack_id; +	int has_meta; +	struct strobemeta_payload metadata; +	/* +	 * makes it possible to pass (<real payload size> + 1) as data size to +	 * perf_submit() to avoid perf_submit's paranoia about passing zero as +	 * size, as it deduces that <real payload size> might be +	 * **theoretically** zero +	 */ +	char dummy_safeguard; +}; + +struct bpf_map_def SEC("maps") samples = { +	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +	.key_size = sizeof(int), +	.value_size = sizeof(int), +	.max_entries = 32, +}; + +struct bpf_map_def SEC("maps") stacks_0 = { +	.type = BPF_MAP_TYPE_STACK_TRACE, +	.key_size = sizeof(uint32_t), +	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH, +	.max_entries = 16, +}; + +struct bpf_map_def SEC("maps") stacks_1 = { +	.type = BPF_MAP_TYPE_STACK_TRACE, +	.key_size = sizeof(uint32_t), +	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH, +	.max_entries = 16, +}; + +struct bpf_map_def SEC("maps") sample_heap = { +	.type = BPF_MAP_TYPE_PERCPU_ARRAY, +	.key_size = sizeof(uint32_t), +	.value_size = sizeof(struct strobelight_bpf_sample), +	.max_entries = 1, +}; + +struct bpf_map_def SEC("maps") strobemeta_cfgs = { +	.type = BPF_MAP_TYPE_PERCPU_ARRAY, +	.key_size = sizeof(pid_t), +	.value_size = sizeof(struct strobemeta_cfg), +	.max_entries = STROBE_MAX_CFGS, +}; + +/* Type for the dtv.  */ +/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */ +typedef union dtv { +	size_t counter; +	struct { +		void* val; +		bool is_static; +	} pointer; +} dtv_t; + +/* Partial definition for tcbhead_t */ +/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */ +struct tcbhead { +	void* tcb; +	dtv_t* dtv; +}; + +/* + * TLS module/offset information for shared library case. + * For x86-64, this is mapped onto two entries in GOT. + * For aarch64, this is pointed to by second GOT entry. + */ +struct tls_index { +	uint64_t module; +	uint64_t offset; +}; + +static inline __attribute__((always_inline)) +void *calc_location(struct strobe_value_loc *loc, void *tls_base) +{ +	/* +	 * tls_mode value is: +	 * - -1 (TLS_NOT_SET), if no metavar is present; +	 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS +	 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64); +	 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS; +	 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS; +	 * This schema allows to use something like: +	 * (tls_mode + 1) * (tls_base + offset) +	 * to get NULL for "no metavar" location, or correct pointer for local +	 * executable mode without doing extra ifs. +	 */ +	if (loc->tls_mode <= TLS_LOCAL_EXEC) { +		/* static executable is simple, we just have offset from +		 * tls_base */ +		void *addr = tls_base + loc->offset; +		/* multiply by (tls_mode + 1) to get NULL, if we have no +		 * metavar in this slot */ +		return (void *)((loc->tls_mode + 1) * (int64_t)addr); +	} +	/* +	 * Other modes are more complicated, we need to jump through few hoops. +	 * +	 * For immediate executable mode (currently supported only for aarch64): +	 *  - loc->offset is pointing to a GOT entry containing fixed offset +	 *  relative to tls_base; +	 * +	 * For general dynamic mode: +	 *  - loc->offset is pointing to a beginning of double GOT entries; +	 *  - (for aarch64 only) second entry points to tls_index_t struct; +	 *  - (for x86-64 only) two GOT entries are already tls_index_t; +	 *  - tls_index_t->module is used to find start of TLS section in +	 *  which variable resides; +	 *  - tls_index_t->offset provides offset within that TLS section, +	 *  pointing to value of variable. +	 */ +	struct tls_index tls_index; +	dtv_t *dtv; +	void *tls_ptr; + +	bpf_probe_read(&tls_index, sizeof(struct tls_index), +		       (void *)loc->offset); +	/* valid module index is always positive */ +	if (tls_index.module > 0) { +		/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */ +		bpf_probe_read(&dtv, sizeof(dtv), +			       &((struct tcbhead *)tls_base)->dtv); +		dtv += tls_index.module; +	} else { +		dtv = NULL; +	} +	bpf_probe_read(&tls_ptr, sizeof(void *), dtv); +	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */ +	return tls_ptr && tls_ptr != (void *)-1 +		? tls_ptr + tls_index.offset +		: NULL; +} + +static inline __attribute__((always_inline)) +void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, +		  struct strobe_value_generic *value, +		  struct strobemeta_payload *data) +{ +	void *location = calc_location(&cfg->int_locs[idx], tls_base); +	if (!location) +		return; + +	bpf_probe_read(value, sizeof(struct strobe_value_generic), location); +	data->int_vals[idx] = value->val; +	if (value->header.len) +		data->int_vals_set_mask |= (1 << idx); +} + +static inline __attribute__((always_inline)) +uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base, +		      struct strobe_value_generic *value, +		      struct strobemeta_payload *data, void *payload) +{ +	void *location; +	uint32_t len; + +	data->str_lens[idx] = 0; +	location = calc_location(&cfg->str_locs[idx], tls_base); +	if (!location) +		return 0; + +	bpf_probe_read(value, sizeof(struct strobe_value_generic), location); +	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr); +	/* +	 * if bpf_probe_read_str returns error (<0), due to casting to +	 * unsinged int, it will become big number, so next check is +	 * sufficient to check for errors AND prove to BPF verifier, that +	 * bpf_probe_read_str won't return anything bigger than +	 * STROBE_MAX_STR_LEN +	 */ +	if (len > STROBE_MAX_STR_LEN) +		return 0; + +	data->str_lens[idx] = len; +	return len; +} + +static inline __attribute__((always_inline)) +void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, +		   struct strobe_value_generic *value, +		   struct strobemeta_payload* data, void *payload) +{ +	struct strobe_map_descr* descr = &data->map_descrs[idx]; +	struct strobe_map_raw map; +	void *location; +	uint32_t len; +	int i; + +	descr->tag_len = 0; /* presume no tag is set */ +	descr->cnt = -1; /* presume no value is set */ + +	location = calc_location(&cfg->map_locs[idx], tls_base); +	if (!location) +		return payload; + +	bpf_probe_read(value, sizeof(struct strobe_value_generic), location); +	if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr)) +		return payload; + +	descr->id = map.id; +	descr->cnt = map.cnt; +	if (cfg->req_meta_idx == idx) { +		data->req_id = map.id; +		data->req_meta_valid = 1; +	} + +	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag); +	if (len <= STROBE_MAX_STR_LEN) { +		descr->tag_len = len; +		payload += len; +	} + +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) +#else +#pragma unroll +#endif +	for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) { +		descr->key_lens[i] = 0; +		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, +					 map.entries[i].key); +		if (len <= STROBE_MAX_STR_LEN) { +			descr->key_lens[i] = len; +			payload += len; +		} +		descr->val_lens[i] = 0; +		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, +					 map.entries[i].val); +		if (len <= STROBE_MAX_STR_LEN) { +			descr->val_lens[i] = len; +			payload += len; +		} +	} + +	return payload; +} + +/* + * read_strobe_meta returns NULL, if no metadata was read; otherwise returns + * pointer to *right after* payload ends + */ +static inline __attribute__((always_inline)) +void *read_strobe_meta(struct task_struct* task, +		       struct strobemeta_payload* data) { +	pid_t pid = bpf_get_current_pid_tgid() >> 32; +	struct strobe_value_generic value = {0}; +	struct strobemeta_cfg *cfg; +	void *tls_base, *payload; + +	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); +	if (!cfg) +		return NULL; + +	data->int_vals_set_mask = 0; +	data->req_meta_valid = 0; +	payload = data->payload; +	/* +	 * we don't have struct task_struct definition, it should be: +	 * tls_base = (void *)task->thread.fsbase; +	 */ +	tls_base = (void *)task; + +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) +#else +#pragma unroll +#endif +	for (int i = 0; i < STROBE_MAX_INTS; ++i) { +		read_int_var(cfg, i, tls_base, &value, data); +	} +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) +#else +#pragma unroll +#endif +	for (int i = 0; i < STROBE_MAX_STRS; ++i) { +		payload += read_str_var(cfg, i, tls_base, &value, data, payload); +	} +#ifdef NO_UNROLL +#pragma clang loop unroll(disable) +#else +#pragma unroll +#endif +	for (int i = 0; i < STROBE_MAX_MAPS; ++i) { +		payload = read_map_var(cfg, i, tls_base, &value, data, payload); +	} +	/* +	 * return pointer right after end of payload, so it's possible to +	 * calculate exact amount of useful data that needs to be sent +	 */ +	return payload; +} + +SEC("raw_tracepoint/kfree_skb") +int on_event(struct pt_regs *ctx) { +	pid_t pid =  bpf_get_current_pid_tgid() >> 32; +	struct strobelight_bpf_sample* sample; +	struct task_struct *task; +	uint32_t zero = 0; +	uint64_t ktime_ns; +	void *sample_end; + +	sample = bpf_map_lookup_elem(&sample_heap, &zero); +	if (!sample) +		return 0; /* this will never happen */ + +	sample->pid = pid; +	bpf_get_current_comm(&sample->comm, TASK_COMM_LEN); +	ktime_ns = bpf_ktime_get_ns(); +	sample->ktime = ktime_ns; + +	task = (struct task_struct *)bpf_get_current_task(); +	sample_end = read_strobe_meta(task, &sample->metadata); +	sample->has_meta = sample_end != NULL; +	sample_end = sample_end ? : &sample->metadata; + +	if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) { +		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0); +		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK); +	} else { +		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0); +		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK); +	} + +	uint64_t sample_size = sample_end - (void *)sample; +	/* should always be true */ +	if (sample_size < sizeof(struct strobelight_bpf_sample)) +		bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size); +	return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c new file mode 100644 index 000000000000..f0a1669e11d6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 13 +#define STROBE_MAX_MAP_ENTRIES 20 +#define NO_UNROLL +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c new file mode 100644 index 000000000000..4291a7d642e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 30 +#define STROBE_MAX_MAP_ENTRIES 20 +#define NO_UNROLL +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c new file mode 100644 index 000000000000..28c16bb583b6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { +	unsigned int v4; +	unsigned int v6; +}; + +/* just to validate we can handle maps in multiple sections */ +struct bpf_map_def SEC("maps") btf_map_legacy = { +	.type = BPF_MAP_TYPE_ARRAY, +	.key_size = sizeof(int), +	.value_size = sizeof(long long), +	.max_entries = 4, +}; + +BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); + +struct { +	int *key; +	struct ipv_counts *value; +	unsigned int type; +	unsigned int max_entries; +} btf_map SEC(".maps") = { +	.type = BPF_MAP_TYPE_ARRAY, +	.max_entries = 4, +}; + +struct dummy_tracepoint_args { +	unsigned long long pad; +	struct sock *sock; +}; + +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) +{ +	struct ipv_counts *counts; +	int key = 0; + +	if (!arg->sock) +		return 0; + +	counts = bpf_map_lookup_elem(&btf_map, &key); +	if (!counts) +		return 0; + +	counts->v6++; + +	/* just verify we can reference both maps */ +	counts = bpf_map_lookup_elem(&btf_map_legacy, &key); +	if (!counts) +		return 0; + +	return 0; +} + +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ +	return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ +	return test_long_fname_1(arg); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index f6d9f238e00a..aaa6ec250e15 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -15,17 +15,25 @@ struct stack_trace_t {  	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];  }; -struct bpf_map_def SEC("maps") perfmap = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 key_size; +	__u32 value_size; +} perfmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +	.max_entries = 2,  	.key_size = sizeof(int),  	.value_size = sizeof(__u32), -	.max_entries = 2,  }; -struct bpf_map_def SEC("maps") stackdata_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct stack_trace_t *value; +} stackdata_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct stack_trace_t),  	.max_entries = 1,  }; @@ -47,10 +55,13 @@ struct bpf_map_def SEC("maps") stackdata_map = {   * issue and avoid complicated C programming massaging.   * This is an acceptable workaround since there is one entry here.   */ -struct bpf_map_def SEC("maps") rawdata_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 (*value)[2 * MAX_STACK_RAWTP]; +} rawdata_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,  	.max_entries = 1,  }; diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 5ab14e941980..866cc7ddbe43 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -7,17 +7,23 @@  #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") result_number = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 *value; +} result_number SEC(".maps") = {  	.type		= BPF_MAP_TYPE_ARRAY, -	.key_size	= sizeof(__u32), -	.value_size	= sizeof(__u64),  	.max_entries	= 11,  }; -struct bpf_map_def SEC("maps") result_string = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	const char (*value)[32]; +} result_string SEC(".maps") = {  	.type		= BPF_MAP_TYPE_ARRAY, -	.key_size	= sizeof(__u32), -	.value_size	= 32,  	.max_entries	= 5,  }; @@ -27,10 +33,13 @@ struct foo {  	__u64 c;  }; -struct bpf_map_def SEC("maps") result_struct = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct foo *value; +} result_struct SEC(".maps") = {  	.type		= BPF_MAP_TYPE_ARRAY, -	.key_size	= sizeof(__u32), -	.value_size	= sizeof(struct foo),  	.max_entries	= 5,  }; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 1e10c9590991..848cbb90f581 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -169,38 +169,53 @@ struct eth_hdr {  	unsigned short eth_proto;  }; -struct bpf_map_def SEC("maps") vip_map = { +struct { +	__u32 type; +	__u32 max_entries; +	struct vip *key; +	struct vip_meta *value; +} vip_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(struct vip), -	.value_size = sizeof(struct vip_meta),  	.max_entries = MAX_VIPS,  }; -struct bpf_map_def SEC("maps") ch_rings = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} ch_rings SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = CH_RINGS_SIZE,  }; -struct bpf_map_def SEC("maps") reals = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct real_definition *value; +} reals SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct real_definition),  	.max_entries = MAX_REALS,  }; -struct bpf_map_def SEC("maps") stats = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct vip_stats *value; +} stats SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct vip_stats),  	.max_entries = MAX_VIPS,  }; -struct bpf_map_def SEC("maps") ctl_array = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct ctl_value *value; +} ctl_array SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct ctl_value),  	.max_entries = CTL_MAP_SIZE,  }; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index ba44a14e6dc4..c63ecf3ca573 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -165,38 +165,53 @@ struct eth_hdr {  	unsigned short eth_proto;  }; -struct bpf_map_def SEC("maps") vip_map = { +struct { +	__u32 type; +	__u32 max_entries; +	struct vip *key; +	struct vip_meta *value; +} vip_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(struct vip), -	.value_size = sizeof(struct vip_meta),  	.max_entries = MAX_VIPS,  }; -struct bpf_map_def SEC("maps") ch_rings = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} ch_rings SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = CH_RINGS_SIZE,  }; -struct bpf_map_def SEC("maps") reals = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct real_definition *value; +} reals SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct real_definition),  	.max_entries = MAX_REALS,  }; -struct bpf_map_def SEC("maps") stats = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct vip_stats *value; +} stats SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct vip_stats),  	.max_entries = MAX_VIPS,  }; -struct bpf_map_def SEC("maps") ctl_array = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct ctl_value *value; +} ctl_array SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct ctl_value),  	.max_entries = CTL_MAP_SIZE,  }; diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index af8cc68ed2f9..40d9c2853393 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -11,29 +11,31 @@ struct hmap_elem {  	int var[VAR_NUM];  }; -struct bpf_map_def SEC("maps") hash_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct hmap_elem *value; +} hash_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(int), -	.value_size = sizeof(struct hmap_elem),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem); -  struct array_elem {  	struct bpf_spin_lock lock;  	int var[VAR_NUM];  }; -struct bpf_map_def SEC("maps") array_map = { +struct { +	__u32 type; +	__u32 max_entries; +	int *key; +	struct array_elem *value; +} array_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(int), -	.value_size = sizeof(struct array_elem),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem); -  SEC("map_lock_demo")  int bpf_map_lock_test(struct __sk_buff *skb)  { diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c new file mode 100644 index 000000000000..463964d79f73 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -0,0 +1,261 @@ +#include <stddef.h> +#include <inttypes.h> +#include <errno.h> +#include <linux/seg6_local.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* Packet parsing state machine helpers. */ +#define cursor_advance(_cursor, _len) \ +	({ void *_tmp = _cursor; _cursor += _len; _tmp; }) + +#define SR6_FLAG_ALERT (1 << 4) + +#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ +				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) +#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ +				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) +#define BPF_PACKET_HEADER __attribute__((packed)) + +struct ip6_t { +	unsigned int ver:4; +	unsigned int priority:8; +	unsigned int flow_label:20; +	unsigned short payload_len; +	unsigned char next_header; +	unsigned char hop_limit; +	unsigned long long src_hi; +	unsigned long long src_lo; +	unsigned long long dst_hi; +	unsigned long long dst_lo; +} BPF_PACKET_HEADER; + +struct ip6_addr_t { +	unsigned long long hi; +	unsigned long long lo; +} BPF_PACKET_HEADER; + +struct ip6_srh_t { +	unsigned char nexthdr; +	unsigned char hdrlen; +	unsigned char type; +	unsigned char segments_left; +	unsigned char first_segment; +	unsigned char flags; +	unsigned short tag; + +	struct ip6_addr_t segments[0]; +} BPF_PACKET_HEADER; + +struct sr6_tlv_t { +	unsigned char type; +	unsigned char len; +	unsigned char value[0]; +} BPF_PACKET_HEADER; + +static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +{ +	void *cursor, *data_end; +	struct ip6_srh_t *srh; +	struct ip6_t *ip; +	uint8_t *ipver; + +	data_end = (void *)(long)skb->data_end; +	cursor = (void *)(long)skb->data; +	ipver = (uint8_t *)cursor; + +	if ((void *)ipver + sizeof(*ipver) > data_end) +		return NULL; + +	if ((*ipver >> 4) != 6) +		return NULL; + +	ip = cursor_advance(cursor, sizeof(*ip)); +	if ((void *)ip + sizeof(*ip) > data_end) +		return NULL; + +	if (ip->next_header != 43) +		return NULL; + +	srh = cursor_advance(cursor, sizeof(*srh)); +	if ((void *)srh + sizeof(*srh) > data_end) +		return NULL; + +	if (srh->type != 4) +		return NULL; + +	return srh; +} + +static __attribute__((always_inline)) +int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, +		   uint32_t old_pad, uint32_t pad_off) +{ +	int err; + +	if (new_pad != old_pad) { +		err = bpf_lwt_seg6_adjust_srh(skb, pad_off, +					  (int) new_pad - (int) old_pad); +		if (err) +			return err; +	} + +	if (new_pad > 0) { +		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +					0, 0, 0}; +		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; + +		pad_tlv->type = SR6_TLV_PADDING; +		pad_tlv->len = new_pad - 2; + +		err = bpf_lwt_seg6_store_bytes(skb, pad_off, +					       (void *)pad_tlv_buf, new_pad); +		if (err) +			return err; +	} + +	return 0; +} + +static __attribute__((always_inline)) +int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, +			  uint32_t *tlv_off, uint32_t *pad_size, +			  uint32_t *pad_off) +{ +	uint32_t srh_off, cur_off; +	int offset_valid = 0; +	int err; + +	srh_off = (char *)srh - (char *)(long)skb->data; +	// cur_off = end of segments, start of possible TLVs +	cur_off = srh_off + sizeof(*srh) + +		sizeof(struct ip6_addr_t) * (srh->first_segment + 1); + +	*pad_off = 0; + +	// we can only go as far as ~10 TLVs due to the BPF max stack size +	#pragma clang loop unroll(disable) +	for (int i = 0; i < 100; i++) { +		struct sr6_tlv_t tlv; + +		if (cur_off == *tlv_off) +			offset_valid = 1; + +		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) +			break; + +		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); +		if (err) +			return err; + +		if (tlv.type == SR6_TLV_PADDING) { +			*pad_size = tlv.len + sizeof(tlv); +			*pad_off = cur_off; + +			if (*tlv_off == srh_off) { +				*tlv_off = cur_off; +				offset_valid = 1; +			} +			break; + +		} else if (tlv.type == SR6_TLV_HMAC) { +			break; +		} + +		cur_off += sizeof(tlv) + tlv.len; +	} // we reached the padding or HMAC TLVs, or the end of the SRH + +	if (*pad_off == 0) +		*pad_off = cur_off; + +	if (*tlv_off == -1) +		*tlv_off = cur_off; +	else if (!offset_valid) +		return -EINVAL; + +	return 0; +} + +static __attribute__((always_inline)) +int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, +	    struct sr6_tlv_t *itlv, uint8_t tlv_size) +{ +	uint32_t srh_off = (char *)srh - (char *)(long)skb->data; +	uint8_t len_remaining, new_pad; +	uint32_t pad_off = 0; +	uint32_t pad_size = 0; +	uint32_t partial_srh_len; +	int err; + +	if (tlv_off != -1) +		tlv_off += srh_off; + +	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) +		return -EINVAL; + +	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); +	if (err) +		return err; + +	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); +	if (err) +		return err; + +	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); +	if (err) +		return err; + +	// the following can't be moved inside update_tlv_pad because the +	// bpf verifier has some issues with it +	pad_off += sizeof(*itlv) + itlv->len; +	partial_srh_len = pad_off - srh_off; +	len_remaining = partial_srh_len % 8; +	new_pad = 8 - len_remaining; + +	if (new_pad == 1) // cannot pad for 1 byte only +		new_pad = 9; +	else if (new_pad == 8) +		new_pad = 0; + +	return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +// Add an Egress TLV fc00::4, add the flag A, +// and apply End.X action to fc42::1 +SEC("lwt_seg6local") +int __add_egr_x(struct __sk_buff *skb) +{ +	unsigned long long hi = 0xfc42000000000000; +	unsigned long long lo = 0x1; +	struct ip6_srh_t *srh = get_srh(skb); +	uint8_t new_flags = SR6_FLAG_ALERT; +	struct ip6_addr_t addr; +	int err, offset; + +	if (srh == NULL) +		return BPF_DROP; + +	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; + +	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, +		      (struct sr6_tlv_t *)&tlv, 20); +	if (err) +		return BPF_DROP; + +	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); +	err = bpf_lwt_seg6_store_bytes(skb, offset, +				       (void *)&new_flags, sizeof(new_flags)); +	if (err) +		return BPF_DROP; + +	addr.lo = htonll(lo); +	addr.hi = htonll(hi); +	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, +				  (void *)&addr, sizeof(addr)); +	if (err) +		return BPF_DROP; +	return BPF_REDIRECT; +} +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 5b54ec637ada..435a9527733e 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -21,38 +21,55 @@ int _version SEC("version") = 1;  #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)  #endif -struct bpf_map_def SEC("maps") outer_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 key_size; +	__u32 value_size; +} outer_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS, +	.max_entries = 1,  	.key_size = sizeof(__u32),  	.value_size = sizeof(__u32), -	.max_entries = 1,  }; -struct bpf_map_def SEC("maps") result_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} result_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = NR_RESULTS,  }; -struct bpf_map_def SEC("maps") tmp_index_ovr_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	int *value; +} tmp_index_ovr_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(int),  	.max_entries = 1,  }; -struct bpf_map_def SEC("maps") linum_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} linum_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 1,  }; -struct bpf_map_def SEC("maps") data_check_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct data_check *value; +} data_check_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct data_check),  	.max_entries = 1,  }; diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 45a1a1a2c345..6ac68be5d68b 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -4,24 +4,26 @@  #include <linux/version.h>  #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") info_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 *value; +} info_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u64),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64); - -struct bpf_map_def SEC("maps") status_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 *value; +} status_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u64),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64); -  SEC("send_signal_demo")  int bpf_send_signal_test(void *ctx)  { diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index 1c39e4ccb7f1..c3d383d650cb 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -27,31 +27,43 @@ enum bpf_linum_array_idx {  	__NR_BPF_LINUM_ARRAY_IDX,  }; -struct bpf_map_def SEC("maps") addr_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct sockaddr_in6 *value; +} addr_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct sockaddr_in6),  	.max_entries = __NR_BPF_ADDR_ARRAY_IDX,  }; -struct bpf_map_def SEC("maps") sock_result_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct bpf_sock *value; +} sock_result_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct bpf_sock),  	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,  }; -struct bpf_map_def SEC("maps") tcp_sock_result_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct bpf_tcp_sock *value; +} tcp_sock_result_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct bpf_tcp_sock),  	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,  }; -struct bpf_map_def SEC("maps") linum_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} linum_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = __NR_BPF_LINUM_ARRAY_IDX,  }; @@ -60,26 +72,26 @@ struct bpf_spinlock_cnt {  	__u32 cnt;  }; -struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { +struct { +	__u32 type; +	__u32 map_flags; +	int *key; +	struct bpf_spinlock_cnt *value; +} sk_pkt_out_cnt SEC(".maps") = {  	.type = BPF_MAP_TYPE_SK_STORAGE, -	.key_size = sizeof(int), -	.value_size = sizeof(struct bpf_spinlock_cnt), -	.max_entries = 0,  	.map_flags = BPF_F_NO_PREALLOC,  }; -BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); - -struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { +struct { +	__u32 type; +	__u32 map_flags; +	int *key; +	struct bpf_spinlock_cnt *value; +} sk_pkt_out_cnt10 SEC(".maps") = {  	.type = BPF_MAP_TYPE_SK_STORAGE, -	.key_size = sizeof(int), -	.value_size = sizeof(struct bpf_spinlock_cnt), -	.max_entries = 0,  	.map_flags = BPF_F_NO_PREALLOC,  }; -BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt); -  static bool is_loopback6(__u32 *a6)  {  	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 40f904312090..0a77ae36d981 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -10,30 +10,29 @@ struct hmap_elem {  	int test_padding;  }; -struct bpf_map_def SEC("maps") hmap = { +struct { +	__u32 type; +	__u32 max_entries; +	int *key; +	struct hmap_elem *value; +} hmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(int), -	.value_size = sizeof(struct hmap_elem),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem); - -  struct cls_elem {  	struct bpf_spin_lock lock;  	volatile int cnt;  }; -struct bpf_map_def SEC("maps") cls_map = { +struct { +	__u32 type; +	struct bpf_cgroup_storage_key *key; +	struct cls_elem *value; +} cls_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_CGROUP_STORAGE, -	.key_size = sizeof(struct bpf_cgroup_storage_key), -	.value_size = sizeof(struct cls_elem),  }; -BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key, -		     struct cls_elem); -  struct bpf_vqueue {  	struct bpf_spin_lock lock;  	/* 4 byte hole */ @@ -42,14 +41,16 @@ struct bpf_vqueue {  	unsigned int rate;  }; -struct bpf_map_def SEC("maps") vqueue = { +struct { +	__u32 type; +	__u32 max_entries; +	int *key; +	struct bpf_vqueue *value; +} vqueue SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(int), -	.value_size = sizeof(struct bpf_vqueue),  	.max_entries = 1,  }; -BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);  #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)  SEC("spin_lock_demo") diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index d86c281e957f..fcf2280bb60c 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -8,34 +8,50 @@  #define PERF_MAX_STACK_DEPTH         127  #endif -struct bpf_map_def SEC("maps") control_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} control_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 1,  }; -struct bpf_map_def SEC("maps") stackid_hmap = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} stackid_hmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 16384,  }; -struct bpf_map_def SEC("maps") stackmap = { +typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { +	__u32 type; +	__u32 max_entries; +	__u32 map_flags; +	__u32 key_size; +	__u32 value_size; +} stackmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_STACK_TRACE, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct bpf_stack_build_id) -		* PERF_MAX_STACK_DEPTH,  	.max_entries = 128,  	.map_flags = BPF_F_STACK_BUILD_ID, +	.key_size = sizeof(__u32), +	.value_size = sizeof(stack_trace_t),  }; -struct bpf_map_def SEC("maps") stack_amap = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	/* there seems to be a bug in kernel not handling typedef properly */ +	struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH]; +} stack_amap SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct bpf_stack_build_id) -		* PERF_MAX_STACK_DEPTH,  	.max_entries = 128,  }; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index af111af7ca1a..7ad09adbf648 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -8,31 +8,47 @@  #define PERF_MAX_STACK_DEPTH         127  #endif -struct bpf_map_def SEC("maps") control_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} control_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 1,  }; -struct bpf_map_def SEC("maps") stackid_hmap = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} stackid_hmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 16384,  }; -struct bpf_map_def SEC("maps") stackmap = { +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { +	__u32 type; +	__u32 max_entries; +	__u32 key_size; +	__u32 value_size; +} stackmap SEC(".maps") = {  	.type = BPF_MAP_TYPE_STACK_TRACE, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,  	.max_entries = 16384, +	.key_size = sizeof(__u32), +	.value_size = sizeof(stack_trace_t),  }; -struct bpf_map_def SEC("maps") stack_amap = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 (*value)[PERF_MAX_STACK_DEPTH]; +} stack_amap SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,  	.max_entries = 16384,  }; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c new file mode 100644 index 000000000000..608a06871572 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <stdint.h> +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> + +#include "bpf_helpers.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ +#define TCP_MEM_LOOPS 28  /* because 30 doesn't fit into 512 bytes of stack */ +#define MAX_ULONG_STR_LEN 7 +#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) + +static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) +{ +	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; +	unsigned char i; +	char name[64]; +	int ret; + +	memset(name, 0, sizeof(name)); +	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0); +	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) +		return 0; + +#pragma clang loop unroll(disable) +	for (i = 0; i < sizeof(tcp_mem_name); ++i) +		if (name[i] != tcp_mem_name[i]) +			return 0; + +	return 1; +} + +SEC("cgroup/sysctl") +int sysctl_tcp_mem(struct bpf_sysctl *ctx) +{ +	unsigned long tcp_mem[TCP_MEM_LOOPS] = {}; +	char value[MAX_VALUE_STR_LEN]; +	unsigned char i, off = 0; +	int ret; + +	if (ctx->write) +		return 0; + +	if (!is_tcp_mem(ctx)) +		return 0; + +	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN); +	if (ret < 0 || ret >= MAX_VALUE_STR_LEN) +		return 0; + +#pragma clang loop unroll(disable) +	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { +		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, +				  tcp_mem + i); +		if (ret <= 0 || ret > MAX_ULONG_STR_LEN) +			return 0; +		off += ret & MAX_ULONG_STR_LEN; +	} + +	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2]; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c new file mode 100644 index 000000000000..cb201cbe11e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <stdint.h> +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> + +#include "bpf_helpers.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ +#define TCP_MEM_LOOPS 20  /* because 30 doesn't fit into 512 bytes of stack */ +#define MAX_ULONG_STR_LEN 7 +#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) + +static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) +{ +	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; +	unsigned char i; +	char name[64]; +	int ret; + +	memset(name, 0, sizeof(name)); +	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0); +	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1) +		return 0; + +#pragma clang loop unroll(disable) +	for (i = 0; i < sizeof(tcp_mem_name); ++i) +		if (name[i] != tcp_mem_name[i]) +			return 0; + +	return 1; +} + + +SEC("cgroup/sysctl") +int sysctl_tcp_mem(struct bpf_sysctl *ctx) +{ +	unsigned long tcp_mem[TCP_MEM_LOOPS] = {}; +	char value[MAX_VALUE_STR_LEN]; +	unsigned char i, off = 0; +	int ret; + +	if (ctx->write) +		return 0; + +	if (!is_tcp_mem(ctx)) +		return 0; + +	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN); +	if (ret < 0 || ret >= MAX_VALUE_STR_LEN) +		return 0; + +#pragma clang loop unroll(disable) +	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) { +		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0, +				  tcp_mem + i); +		if (ret <= 0 || ret > MAX_ULONG_STR_LEN) +			return 0; +		off += ret & MAX_ULONG_STR_LEN; +	} + +	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2]; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index a295cad805d7..5cbbff416998 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -8,7 +8,6 @@  #include <linux/bpf.h>  #include "bpf_helpers.h" -#include "bpf_util.h"  /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */  #define MAX_ULONG_STR_LEN 0xF @@ -16,6 +15,10 @@  /* Max supported length of sysctl value string (pow2). */  #define MAX_VALUE_STR_LEN 0x40 +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif +  static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)  {  	char tcp_mem_name[] = "net/ipv4/tcp_mem"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index bee3bbecc0c4..df98f7e32832 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -148,10 +148,13 @@ struct tcp_estats_basic_event {  	struct tcp_estats_conn_id conn_id;  }; -struct bpf_map_def SEC("maps") ev_record_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct tcp_estats_basic_event *value; +} ev_record_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct tcp_estats_basic_event),  	.max_entries = 1024,  }; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index c7c3240e0dd4..38e10c9fd996 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -14,17 +14,23 @@  #include "bpf_endian.h"  #include "test_tcpbpf.h" -struct bpf_map_def SEC("maps") global_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct tcpbpf_globals *value; +} global_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct tcpbpf_globals),  	.max_entries = 4,  }; -struct bpf_map_def SEC("maps") sockopt_results = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	int *value; +} sockopt_results SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(int),  	.max_entries = 2,  }; diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index ec6db6e64c41..d073d37d4e27 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -14,18 +14,26 @@  #include "bpf_endian.h"  #include "test_tcpnotify.h" -struct bpf_map_def SEC("maps") global_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct tcpnotify_globals *value; +} global_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct tcpnotify_globals),  	.max_entries = 4,  }; -struct bpf_map_def SEC("maps") perf_event_map = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 key_size; +	__u32 value_size; +} perf_event_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +	.max_entries = 2,  	.key_size = sizeof(int),  	.value_size = sizeof(__u32), -	.max_entries = 2,  };  int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index 5e7df8bb5b5d..ec3d2c1c8cf9 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -22,17 +22,23 @@  int _version SEC("version") = 1; -struct bpf_map_def SEC("maps") rxcnt = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u64 *value; +} rxcnt SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u64),  	.max_entries = 256,  }; -struct bpf_map_def SEC("maps") vip2tnl = { +struct { +	__u32 type; +	__u32 max_entries; +	struct vip *key; +	struct iptnl_info *value; +} vip2tnl SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(struct vip), -	.value_size = sizeof(struct iptnl_info),  	.max_entries = MAX_IPTNL_ENTRIES,  }; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c new file mode 100644 index 000000000000..7fa4677df22e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_iptunnel_common.h" + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") rxcnt = { +	.type = BPF_MAP_TYPE_PERCPU_ARRAY, +	.key_size = sizeof(__u32), +	.value_size = sizeof(__u64), +	.max_entries = 256, +}; + +struct bpf_map_def SEC("maps") vip2tnl = { +	.type = BPF_MAP_TYPE_HASH, +	.key_size = sizeof(struct vip), +	.value_size = sizeof(struct iptnl_info), +	.max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline void count_tx(__u32 protocol) +{ +	__u64 *rxcnt_count; + +	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); +	if (rxcnt_count) +		*rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, void *data_end, +				     __u8 protocol) +{ +	struct tcphdr *th; +	struct udphdr *uh; + +	switch (protocol) { +	case IPPROTO_TCP: +		th = (struct tcphdr *)trans_data; +		if (th + 1 > data_end) +			return -1; +		return th->dest; +	case IPPROTO_UDP: +		uh = (struct udphdr *)trans_data; +		if (uh + 1 > data_end) +			return -1; +		return uh->dest; +	default: +		return 0; +	} +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, +				       const struct ethhdr *old_eth, +				       const struct iptnl_info *tnl, +				       __be16 h_proto) +{ +	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); +	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); +	new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ +	void *data_end = (void *)(long)xdp->data_end; +	void *data = (void *)(long)xdp->data; +	struct iptnl_info *tnl; +	struct ethhdr *new_eth; +	struct ethhdr *old_eth; +	struct iphdr *iph = data + sizeof(struct ethhdr); +	__u16 *next_iph; +	__u16 payload_len; +	struct vip vip = {}; +	int dport; +	__u32 csum = 0; +	int i; + +	if (iph + 1 > data_end) +		return XDP_DROP; + +	dport = get_dport(iph + 1, data_end, iph->protocol); +	if (dport == -1) +		return XDP_DROP; + +	vip.protocol = iph->protocol; +	vip.family = AF_INET; +	vip.daddr.v4 = iph->daddr; +	vip.dport = dport; +	payload_len = bpf_ntohs(iph->tot_len); + +	tnl = bpf_map_lookup_elem(&vip2tnl, &vip); +	/* It only does v4-in-v4 */ +	if (!tnl || tnl->family != AF_INET) +		return XDP_PASS; + +	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) +		return XDP_DROP; + +	data = (void *)(long)xdp->data; +	data_end = (void *)(long)xdp->data_end; + +	new_eth = data; +	iph = data + sizeof(*new_eth); +	old_eth = data + sizeof(*iph); + +	if (new_eth + 1 > data_end || +	    old_eth + 1 > data_end || +	    iph + 1 > data_end) +		return XDP_DROP; + +	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + +	iph->version = 4; +	iph->ihl = sizeof(*iph) >> 2; +	iph->frag_off =	0; +	iph->protocol = IPPROTO_IPIP; +	iph->check = 0; +	iph->tos = 0; +	iph->tot_len = bpf_htons(payload_len + sizeof(*iph)); +	iph->daddr = tnl->daddr.v4; +	iph->saddr = tnl->saddr.v4; +	iph->ttl = 8; + +	next_iph = (__u16 *)iph; +#pragma clang loop unroll(disable) +	for (i = 0; i < sizeof(*iph) >> 1; i++) +		csum += *next_iph++; + +	iph->check = ~((csum & 0xffff) + (csum >> 16)); + +	count_tx(vip.protocol); + +	return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp) +{ +	void *data_end = (void *)(long)xdp->data_end; +	void *data = (void *)(long)xdp->data; +	struct iptnl_info *tnl; +	struct ethhdr *new_eth; +	struct ethhdr *old_eth; +	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); +	__u16 payload_len; +	struct vip vip = {}; +	int dport; + +	if (ip6h + 1 > data_end) +		return XDP_DROP; + +	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); +	if (dport == -1) +		return XDP_DROP; + +	vip.protocol = ip6h->nexthdr; +	vip.family = AF_INET6; +	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); +	vip.dport = dport; +	payload_len = ip6h->payload_len; + +	tnl = bpf_map_lookup_elem(&vip2tnl, &vip); +	/* It only does v6-in-v6 */ +	if (!tnl || tnl->family != AF_INET6) +		return XDP_PASS; + +	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) +		return XDP_DROP; + +	data = (void *)(long)xdp->data; +	data_end = (void *)(long)xdp->data_end; + +	new_eth = data; +	ip6h = data + sizeof(*new_eth); +	old_eth = data + sizeof(*ip6h); + +	if (new_eth + 1 > data_end || old_eth + 1 > data_end || +	    ip6h + 1 > data_end) +		return XDP_DROP; + +	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + +	ip6h->version = 6; +	ip6h->priority = 0; +	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); +	ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h)); +	ip6h->nexthdr = IPPROTO_IPV6; +	ip6h->hop_limit = 8; +	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); +	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + +	count_tx(vip.protocol); + +	return XDP_TX; +} + +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ +	void *data_end = (void *)(long)xdp->data_end; +	void *data = (void *)(long)xdp->data; +	struct ethhdr *eth = data; +	__u16 h_proto; + +	if (eth + 1 > data_end) +		return XDP_DROP; + +	h_proto = eth->h_proto; + +	if (h_proto == bpf_htons(ETH_P_IP)) +		return handle_ipv4(xdp); +	else if (h_proto == bpf_htons(ETH_P_IPV6)) + +		return handle_ipv6(xdp); +	else +		return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 4fe6aaad22a4..d2eddb5553d1 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -163,52 +163,66 @@ struct lb_stats {  	__u64 v1;  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { +struct { +	__u32 type; +	__u32 max_entries; +	struct vip_definition *key; +	struct vip_meta *value; +} vip_map SEC(".maps") = {  	.type = BPF_MAP_TYPE_HASH, -	.key_size = sizeof(struct vip_definition), -	.value_size = sizeof(struct vip_meta),  	.max_entries = 512, -	.map_flags = 0,  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 map_flags; +	struct flow_key *key; +	struct real_pos_lru *value; +} lru_cache SEC(".maps") = {  	.type = BPF_MAP_TYPE_LRU_HASH, -	.key_size = sizeof(struct flow_key), -	.value_size = sizeof(struct real_pos_lru),  	.max_entries = 300,  	.map_flags = 1U << 1,  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	__u32 *value; +} ch_rings SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(__u32),  	.max_entries = 12 * 655, -	.map_flags = 0,  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct real_definition *value; +} reals SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct real_definition),  	.max_entries = 40, -	.map_flags = 0,  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct lb_stats *value; +} stats SEC(".maps") = {  	.type = BPF_MAP_TYPE_PERCPU_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct lb_stats),  	.max_entries = 515, -	.map_flags = 0,  }; -struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { +struct { +	__u32 type; +	__u32 max_entries; +	__u32 *key; +	struct ctl_value *value; +} ctl_array SEC(".maps") = {  	.type = BPF_MAP_TYPE_ARRAY, -	.key_size = sizeof(__u32), -	.value_size = sizeof(struct ctl_value),  	.max_entries = 16, -	.map_flags = 0,  };  struct eth_hdr { diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 289daf54dec4..8351cb5f4a20 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4016,13 +4016,9 @@ struct btf_file_test {  };  static struct btf_file_test file_tests[] = { -{ -	.file = "test_btf_haskv.o", -}, -{ -	.file = "test_btf_nokv.o", -	.btf_kv_notfound = true, -}, +	{ .file = "test_btf_haskv.o", }, +	{ .file = "test_btf_newkv.o", }, +	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },  };  static int do_test_file(unsigned int test_num) diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c index 75646d9b34aa..7566c13eb51a 100644 --- a/tools/testing/selftests/bpf/test_select_reuseport.c +++ b/tools/testing/selftests/bpf/test_select_reuseport.c @@ -523,6 +523,58 @@ static void test_pass_on_err(int type, sa_family_t family)  	printf("OK\n");  } +static void test_detach_bpf(int type, sa_family_t family) +{ +#ifdef SO_DETACH_REUSEPORT_BPF +	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i; +	struct epoll_event ev; +	int cli_fd, err, nev; +	struct cmd cmd = {}; +	int optvalue = 0; + +	printf("%s: ", __func__); +	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, +			 &optvalue, sizeof(optvalue)); +	CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", +	      "err:%d errno:%d\n", err, errno); + +	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, +			 &optvalue, sizeof(optvalue)); +	CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)", +	      "err:%d errno:%d\n", err, errno); + +	for (i = 0; i < NR_RESULTS; i++) { +		err = bpf_map_lookup_elem(result_map, &i, &tmp); +		CHECK(err == -1, "lookup_elem(result_map)", +		      "i:%u err:%d errno:%d\n", i, err, errno); +		nr_run_before += tmp; +	} + +	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS); +	nev = epoll_wait(epfd, &ev, 1, 5); +	CHECK(nev <= 0, "nev <= 0", +	      "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", +	      nev,  type, family); + +	for (i = 0; i < NR_RESULTS; i++) { +		err = bpf_map_lookup_elem(result_map, &i, &tmp); +		CHECK(err == -1, "lookup_elem(result_map)", +		      "i:%u err:%d errno:%d\n", i, err, errno); +		nr_run_after += tmp; +	} + +	CHECK(nr_run_before != nr_run_after, +	      "nr_run_before != nr_run_after", +	      "nr_run_before:%u nr_run_after:%u\n", +	      nr_run_before, nr_run_after); + +	printf("OK\n"); +	close(cli_fd); +#else +	printf("%s: SKIP\n", __func__); +#endif +} +  static void prepare_sk_fds(int type, sa_family_t family, bool inany)  {  	const int first = REUSEPORT_ARRAY_SIZE - 1; @@ -664,6 +716,8 @@ static void test_all(void)  			test_pass(type, family);  			test_syncookie(type, family);  			test_pass_on_err(type, family); +			/* Must be the last test */ +			test_detach_bpf(type, family);  			cleanup_per_test();  			printf("\n"); diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index cac8ee57a013..15653b0e26eb 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -18,6 +18,11 @@  #define CG_PATH			"/foo"  #define SOCKET_COOKIE_PROG	"./socket_cookie_prog.o" +struct socket_cookie { +	__u64 cookie_key; +	__u32 cookie_value; +}; +  static int start_server(void)  {  	struct sockaddr_in6 addr; @@ -89,8 +94,7 @@ static int validate_map(struct bpf_map *map, int client_fd)  	__u32 cookie_expected_value;  	struct sockaddr_in6 addr;  	socklen_t len = sizeof(addr); -	__u32 cookie_value; -	__u64 cookie_key; +	struct socket_cookie val;  	int err = 0;  	int map_fd; @@ -101,17 +105,7 @@ static int validate_map(struct bpf_map *map, int client_fd)  	map_fd = bpf_map__fd(map); -	err = bpf_map_get_next_key(map_fd, NULL, &cookie_key); -	if (err) { -		log_err("Can't get cookie key from map"); -		goto out; -	} - -	err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value); -	if (err) { -		log_err("Can't get cookie value from map"); -		goto out; -	} +	err = bpf_map_lookup_elem(map_fd, &client_fd, &val);  	err = getsockname(client_fd, (struct sockaddr *)&addr, &len);  	if (err) { @@ -120,8 +114,8 @@ static int validate_map(struct bpf_map *map, int client_fd)  	}  	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; -	if (cookie_value != cookie_expected_value) { -		log_err("Unexpected value in map: %x != %x", cookie_value, +	if (val.cookie_value != cookie_expected_value) { +		log_err("Unexpected value in map: %x != %x", val.cookie_value,  			cookie_expected_value);  		goto err;  	} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6cb307201958..c5514daf8865 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -234,10 +234,10 @@ static void bpf_fill_scale1(struct bpf_test *self)  		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,  					-8 * (k % 64 + 1));  	} -	/* every jump adds 1 step to insn_processed, so to stay exactly -	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT +	/* is_state_visited() doesn't allocate state for pruning for every jump. +	 * Hence multiply jmps by 4 to accommodate that heuristic  	 */ -	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) +	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)  		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);  	insn[i] = BPF_EXIT_INSN();  	self->prog_len = i + 1; @@ -266,10 +266,7 @@ static void bpf_fill_scale2(struct bpf_test *self)  		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,  					-8 * (k % (64 - 4 * FUNC_NEST) + 1));  	} -	/* every jump adds 1 step to insn_processed, so to stay exactly -	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT -	 */ -	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) +	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)  		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);  	insn[i] = BPF_EXIT_INSN();  	self->prog_len = i + 1; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 9093a8f64dc6..2d752c4f8d9d 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -215,9 +215,11 @@  	BPF_MOV64_IMM(BPF_REG_0, 3),  	BPF_JMP_IMM(BPF_JA, 0, 0, -6),  	}, -	.prog_type = BPF_PROG_TYPE_TRACEPOINT, -	.errstr = "back-edge from insn", -	.result = REJECT, +	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +	.errstr_unpriv = "back-edge from insn", +	.result_unpriv = REJECT, +	.result = ACCEPT, +	.retval = 1,  },  {  	"calls: conditional call 4", @@ -250,22 +252,24 @@  	BPF_MOV64_IMM(BPF_REG_0, 3),  	BPF_EXIT_INSN(),  	}, -	.prog_type = BPF_PROG_TYPE_TRACEPOINT, -	.errstr = "back-edge from insn", -	.result = REJECT, +	.prog_type = BPF_PROG_TYPE_SCHED_CLS, +	.result = ACCEPT, +	.retval = 1,  },  {  	"calls: conditional call 6",  	.insns = { +	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), +	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),  	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), -	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), +	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),  	BPF_EXIT_INSN(),  	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,  		    offsetof(struct __sk_buff, mark)),  	BPF_EXIT_INSN(),  	}, -	.prog_type = BPF_PROG_TYPE_TRACEPOINT, -	.errstr = "back-edge from insn", +	.prog_type = BPF_PROG_TYPE_SCHED_CLS, +	.errstr = "infinite loop detected",  	.result = REJECT,  },  { diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c index 349c0862fb4c..4eb76ed739ce 100644 --- a/tools/testing/selftests/bpf/verifier/cfg.c +++ b/tools/testing/selftests/bpf/verifier/cfg.c @@ -41,7 +41,8 @@  	BPF_JMP_IMM(BPF_JA, 0, 0, -1),  	BPF_EXIT_INSN(),  	}, -	.errstr = "back-edge", +	.errstr = "unreachable insn 1", +	.errstr_unpriv = "back-edge",  	.result = REJECT,  },  { @@ -53,18 +54,20 @@  	BPF_JMP_IMM(BPF_JA, 0, 0, -4),  	BPF_EXIT_INSN(),  	}, -	.errstr = "back-edge", +	.errstr = "unreachable insn 4", +	.errstr_unpriv = "back-edge",  	.result = REJECT,  },  {  	"conditional loop",  	.insns = { -	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), +	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),  	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),  	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),  	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),  	BPF_EXIT_INSN(),  	}, -	.errstr = "back-edge", +	.errstr = "infinite loop detected", +	.errstr_unpriv = "back-edge",  	.result = REJECT,  }, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index d5c596fdc4b9..2c5fbe7bcd27 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -511,7 +511,8 @@  		    offsetof(struct __sk_buff, data)),  	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,  		    offsetof(struct __sk_buff, data_end)), -	BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), +	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, +		    offsetof(struct __sk_buff, mark)),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),  	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),  	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index 1f39d845c64f..67ab12410050 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -29,9 +29,9 @@  {  	"helper access to variable memory: stack, bitwise AND, zero included",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -46,9 +46,9 @@  {  	"helper access to variable memory: stack, bitwise AND + JMP, wrong max",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), @@ -122,9 +122,9 @@  {  	"helper access to variable memory: stack, JMP, bounds + offset",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), @@ -143,9 +143,9 @@  {  	"helper access to variable memory: stack, JMP, wrong max",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), @@ -163,9 +163,9 @@  {  	"helper access to variable memory: stack, JMP, no max check",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_MOV64_IMM(BPF_REG_4, 0), @@ -183,9 +183,9 @@  {  	"helper access to variable memory: stack, JMP, no min check",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), @@ -201,9 +201,9 @@  {  	"helper access to variable memory: stack, JMP (signed), no min check",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), -	BPF_MOV64_IMM(BPF_REG_2, 16),  	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),  	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), @@ -244,6 +244,7 @@  {  	"helper access to variable memory: map, JMP, wrong max",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),  	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), @@ -251,7 +252,7 @@  	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),  	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), -	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), +	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),  	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4), @@ -262,7 +263,7 @@  	BPF_MOV64_IMM(BPF_REG_0, 0),  	BPF_EXIT_INSN(),  	}, -	.fixup_map_hash_48b = { 3 }, +	.fixup_map_hash_48b = { 4 },  	.errstr = "invalid access to map value, value_size=48 off=0 size=49",  	.result = REJECT,  	.prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -296,6 +297,7 @@  {  	"helper access to variable memory: map adjusted, JMP, wrong max",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),  	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), @@ -304,7 +306,7 @@  	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), -	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), +	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),  	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4), @@ -315,7 +317,7 @@  	BPF_MOV64_IMM(BPF_REG_0, 0),  	BPF_EXIT_INSN(),  	}, -	.fixup_map_hash_48b = { 3 }, +	.fixup_map_hash_48b = { 4 },  	.errstr = "R1 min value is outside of the array range",  	.result = REJECT,  	.prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -337,8 +339,8 @@  {  	"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",  	.insns = { +	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),  	BPF_MOV64_IMM(BPF_REG_1, 0), -	BPF_MOV64_IMM(BPF_REG_2, 1),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),  	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -562,6 +564,7 @@  {  	"helper access to variable memory: 8 bytes leak",  	.insns = { +	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),  	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),  	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),  	BPF_MOV64_IMM(BPF_REG_0, 0), @@ -572,7 +575,6 @@  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), -	BPF_MOV64_IMM(BPF_REG_2, 1),  	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),  	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),  	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c new file mode 100644 index 000000000000..5e980a5ab69d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/loops1.c @@ -0,0 +1,161 @@ +{ +	"bounded loop, count to 4", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_0, 0), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2), +	BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +	.retval = 4, +}, +{ +	"bounded loop, count to 20", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_0, 0), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2), +	BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"bounded loop, count from positive unknown to 4", +	.insns = { +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), +	BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2), +	BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +	.retval = 4, +}, +{ +	"bounded loop, count from totally unknown to 4", +	.insns = { +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2), +	BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"bounded loop, count to 4 with equality", +	.insns = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2), +		BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"bounded loop, start in the middle", +	.insns = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_JMP_A(1), +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2), +		BPF_EXIT_INSN(), +	}, +	.result = REJECT, +	.errstr = "back-edge", +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +	.retval = 4, +}, +{ +	"bounded loop containing a forward jump", +	.insns = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +		BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0), +		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3), +		BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +	.retval = 4, +}, +{ +	"bounded loop that jumps out rather than in", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_6, 0), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), +	BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2), +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), +	BPF_JMP_A(-4), +	BPF_EXIT_INSN(), +	}, +	.result = ACCEPT, +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"infinite loop after a conditional jump", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_0, 5), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_JMP_A(-2), +	BPF_EXIT_INSN(), +	}, +	.result = REJECT, +	.errstr = "program is too large", +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"bounded recursion", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_1, 0), +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), +	BPF_EXIT_INSN(), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), +	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1), +	BPF_EXIT_INSN(), +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5), +	BPF_EXIT_INSN(), +	}, +	.result = REJECT, +	.errstr = "back-edge", +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"infinite loop in two jumps", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_0, 0), +	BPF_JMP_A(0), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2), +	BPF_EXIT_INSN(), +	}, +	.result = REJECT, +	.errstr = "loop detected", +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ +	"infinite loop: three-jump trick", +	.insns = { +	BPF_MOV64_IMM(BPF_REG_0, 0), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1), +	BPF_EXIT_INSN(), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1), +	BPF_EXIT_INSN(), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), +	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), +	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11), +	BPF_EXIT_INSN(), +	}, +	.result = REJECT, +	.errstr = "loop detected", +	.prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c index bbdba990fefb..da7a4b37cb98 100644 --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c @@ -29,21 +29,6 @@  	.prog_type = BPF_PROG_TYPE_SOCK_OPS,  },  { -	"prevent map lookup in xskmap", -	.insns = { -	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), -	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), -	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), -	BPF_LD_MAP_FD(BPF_REG_1, 0), -	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), -	BPF_EXIT_INSN(), -	}, -	.fixup_map_xskmap = { 3 }, -	.result = REJECT, -	.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", -	.prog_type = BPF_PROG_TYPE_XDP, -}, -{  	"prevent map lookup in stack trace",  	.insns = {  	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index b31cd2cf50d0..9ed192e14f5f 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -498,3 +498,21 @@  	.result = REJECT,  	.errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",  }, +{ +	"bpf_map_lookup_elem(xskmap, &key); xs->queue_id", +	.insns = { +	BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), +	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +	BPF_LD_MAP_FD(BPF_REG_1, 0), +	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), +	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), +	BPF_EXIT_INSN(), +	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)), +	BPF_MOV64_IMM(BPF_REG_0, 0), +	BPF_EXIT_INSN(), +	}, +	.fixup_map_xskmap = { 3 }, +	.prog_type = BPF_PROG_TYPE_XDP, +	.result = ACCEPT, +},  | 

