From dbcfe5f76dd5266b8f308b5a8f9ef52f74b2d6e7 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Mon, 9 Jan 2017 10:19:46 -0800 Subject: bpf: split check_mem_access logic for map values Move the logic to check memory accesses to a PTR_TO_MAP_VALUE_ADJ from check_mem_access() to a separate helper check_map_access_adj(). This enables to use those checks in other parts of the verifier as well, where boundaries on PTR_TO_MAP_VALUE_ADJ might need to be checked, for example when checking helper function arguments. The same thing is already happening for other types such as PTR_TO_PACKET and its check_packet_access() helper. The code has been copied verbatim, with the only difference of removing the "off += reg->max_value" statement and moving the sum into the call statement to check_map_access(), as that was only needed due to the earlier common check_map_access() call. Signed-off-by: Gianluca Borello Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 88 ++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 39 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83ed2f8f6f22..8333fbcfbfe7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -635,6 +635,51 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, return 0; } +/* check read/write into an adjusted map element */ +static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno, + int off, int size) +{ + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; + int err; + + /* We adjusted the register to this map value, so we + * need to change off and size to min_value and max_value + * respectively to make sure our theoretical access will be + * safe. + */ + if (log_level) + print_verifier_state(state); + env->varlen_map_value_access = true; + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ + if (reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + err = check_map_access(env, regno, reg->min_value + off, size); + if (err) { + verbose("R%d min value is outside of the array range\n", + regno); + return err; + } + + /* If we haven't set a max value then we need to bail + * since we can't be sure we won't do bad things. + */ + if (reg->max_value == BPF_REGISTER_MAX_RANGE) { + verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", + regno); + return -EACCES; + } + return check_map_access(env, regno, reg->max_value + off, size); +} + #define MAX_PACKET_OFF 0xffff static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, @@ -775,45 +820,10 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, return -EACCES; } - /* If we adjusted the register to this map value at all then we - * need to change off and size to min_value and max_value - * respectively to make sure our theoretical access will be - * safe. - */ - if (reg->type == PTR_TO_MAP_VALUE_ADJ) { - if (log_level) - print_verifier_state(state); - env->varlen_map_value_access = true; - /* The minimum value is only important with signed - * comparisons where we can't assume the floor of a - * value is 0. If we are using signed variables for our - * index'es we need to make sure that whatever we use - * will have a set floor within our range. - */ - if (reg->min_value < 0) { - verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", - regno); - return -EACCES; - } - err = check_map_access(env, regno, reg->min_value + off, - size); - if (err) { - verbose("R%d min value is outside of the array range\n", - regno); - return err; - } - - /* If we haven't set a max value then we need to bail - * since we can't be sure we won't do bad things. - */ - if (reg->max_value == BPF_REGISTER_MAX_RANGE) { - verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", - regno); - return -EACCES; - } - off += reg->max_value; - } - err = check_map_access(env, regno, off, size); + if (reg->type == PTR_TO_MAP_VALUE_ADJ) + err = check_map_access_adj(env, regno, off, size); + else + err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); -- cgit v1.2.3 From 5722569bb9c3bd922c4f10b5b2912fe88c255312 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Mon, 9 Jan 2017 10:19:47 -0800 Subject: bpf: allow helpers access to map element values Enable helpers to directly access a map element value by passing a register type PTR_TO_MAP_VALUE (or PTR_TO_MAP_VALUE_ADJ) to helper arguments ARG_PTR_TO_STACK or ARG_PTR_TO_RAW_STACK. This enables several use cases. For example, a typical tracing program might want to capture pathnames passed to sys_open() with: struct trace_data { char pathname[PATHLEN]; }; SEC("kprobe/sys_open") void bpf_sys_open(struct pt_regs *ctx) { struct trace_data data; bpf_probe_read(data.pathname, sizeof(data.pathname), ctx->di); /* consume data.pathname, for example via * bpf_trace_printk() or bpf_perf_event_output() */ } Such a program could easily hit the stack limit in case PATHLEN needs to be large or more local variables need to exist, both of which are quite common scenarios. Allowing direct helper access to map element values, one could do: struct bpf_map_def SEC("maps") scratch_map = { .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(struct trace_data), .max_entries = 1, }; SEC("kprobe/sys_open") int bpf_sys_open(struct pt_regs *ctx) { int id = 0; struct trace_data *p = bpf_map_lookup_elem(&scratch_map, &id); if (!p) return; bpf_probe_read(p->pathname, sizeof(p->pathname), ctx->di); /* consume p->pathname, for example via * bpf_trace_printk() or bpf_perf_event_output() */ } And wouldn't risk exhausting the stack. Code changes are loosely modeled after commit 6841de8b0d03 ("bpf: allow helpers access the packet directly"). Unlike with PTR_TO_PACKET, these changes just work with ARG_PTR_TO_STACK and ARG_PTR_TO_RAW_STACK (not ARG_PTR_TO_MAP_KEY, ARG_PTR_TO_MAP_VALUE, ...): adding those would be trivial, but since there is not currently a use case for that, it's reasonable to limit the set of changes. Also, add new tests to make sure accesses to map element values from helpers never go out of boundary, even when adjusted. Signed-off-by: Gianluca Borello Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 9 +- tools/testing/selftests/bpf/test_verifier.c | 491 ++++++++++++++++++++++++++++ 2 files changed, 498 insertions(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8333fbcfbfe7..b7014606745b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -627,7 +627,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; - if (off < 0 || off + size > map->value_size) { + if (off < 0 || size <= 0 || off + size > map->value_size) { verbose("invalid access to map value, value_size=%d off=%d size=%d\n", map->value_size, off, size); return -EACCES; @@ -1025,7 +1025,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (type == CONST_IMM && reg->imm == 0) /* final test in check_stack_boundary() */; - else if (type != PTR_TO_PACKET && type != expected_type) + else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE && + type != PTR_TO_MAP_VALUE_ADJ && type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { @@ -1088,6 +1089,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } if (regs[regno - 1].type == PTR_TO_PACKET) err = check_packet_access(env, regno - 1, 0, reg->imm); + else if (regs[regno - 1].type == PTR_TO_MAP_VALUE) + err = check_map_access(env, regno - 1, 0, reg->imm); + else if (regs[regno - 1].type == PTR_TO_MAP_VALUE_ADJ) + err = check_map_access_adj(env, regno - 1, 0, reg->imm); else err = check_stack_boundary(env, regno - 1, reg->imm, zero_size_allowed, meta); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 853d7e43434a..b7732e557bf9 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2905,6 +2905,497 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid bpf_context access", }, + { + "helper access to map: full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=56", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: negative range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=-8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=52", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): negative range (> adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=-8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const imm): negative range (< adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=52", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): negative range (> adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=-8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via const reg): negative range (< adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via variable): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via variable): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via variable): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via variable): no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to adjusted map (via variable): wrong max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo) + 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=45", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From f0318d01b694485af9678a4e120328ae3555be6d Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Mon, 9 Jan 2017 10:19:48 -0800 Subject: bpf: allow adjusted map element values to spill commit 484611357c19 ("bpf: allow access into map value arrays") introduces the ability to do pointer math inside a map element value via the PTR_TO_MAP_VALUE_ADJ register type. The current support doesn't handle the case where a PTR_TO_MAP_VALUE_ADJ is spilled into the stack, limiting several use cases, especially when generating bpf code from a compiler. Handle this case by explicitly enabling the register type PTR_TO_MAP_VALUE_ADJ to be spilled. Also, make sure that min_value and max_value are reset just for BPF_LDX operations that don't result in a restore of a spilled register from stack. Signed-off-by: Gianluca Borello Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 21 +++++++++---- tools/testing/selftests/bpf/test_verifier.c | 46 +++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b7014606745b..59ed07b9b4ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -481,6 +481,13 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) regs[regno].max_value = BPF_REGISTER_MAX_RANGE; } +static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, + u32 regno) +{ + mark_reg_unknown_value(regs, regno); + reset_reg_range_values(regs, regno); +} + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ @@ -532,6 +539,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) switch (type) { case PTR_TO_MAP_VALUE: case PTR_TO_MAP_VALUE_OR_NULL: + case PTR_TO_MAP_VALUE_ADJ: case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: @@ -616,7 +624,8 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, } if (value_regno >= 0) /* have read misc data from the stack */ - mark_reg_unknown_value(state->regs, value_regno); + mark_reg_unknown_value_and_range(state->regs, + value_regno); return 0; } } @@ -825,7 +834,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, else err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown_value(state->regs, value_regno); + mark_reg_unknown_value_and_range(state->regs, + value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = UNKNOWN_VALUE; @@ -837,7 +847,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, } err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { - mark_reg_unknown_value(state->regs, value_regno); + mark_reg_unknown_value_and_range(state->regs, + value_regno); /* note that reg.[id|off|range] == 0 */ state->regs[value_regno].type = reg_type; } @@ -870,7 +881,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, } err = check_packet_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown_value(state->regs, value_regno); + mark_reg_unknown_value_and_range(state->regs, + value_regno); } else { verbose("R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -2744,7 +2756,6 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - reset_reg_range_values(regs, insn->dst_reg); if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b7732e557bf9..e7b075819c08 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3396,6 +3396,52 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, + { + "map element value is preserved across register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + }, + { + "map element value (adjusted) is preserved across register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, + offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 06c1c049721a995dee2829ad13b24aaf5d7c5cce Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Mon, 9 Jan 2017 10:19:49 -0800 Subject: bpf: allow helpers access to variable memory Currently, helpers that read and write from/to the stack can do so using a pair of arguments of type ARG_PTR_TO_STACK and ARG_CONST_STACK_SIZE. ARG_CONST_STACK_SIZE accepts a constant register of type CONST_IMM, so that the verifier can safely check the memory access. However, requiring the argument to be a constant can be limiting in some circumstances. Since the current logic keeps track of the minimum and maximum value of a register throughout the simulated execution, ARG_CONST_STACK_SIZE can be changed to also accept an UNKNOWN_VALUE register in case its boundaries have been set and the range doesn't cause invalid memory accesses. One common situation when this is useful: int len; char buf[BUFSIZE]; /* BUFSIZE is 128 */ if (some_condition) len = 42; else len = 84; some_helper(..., buf, len & (BUFSIZE - 1)); The compiler can often decide to assign the constant values 42 or 48 into a variable on the stack, instead of keeping it in a register. When the variable is then read back from stack into the register in order to be passed to the helper, the verifier will not be able to recognize the register as constant (the verifier is not currently tracking all constant writes into memory), and the program won't be valid. However, by allowing the helper to accept an UNKNOWN_VALUE register, this program will work because the bitwise AND operation will set the range of possible values for the UNKNOWN_VALUE register to [0, BUFSIZE), so the verifier can guarantee the helper call will be safe (assuming the argument is of type ARG_CONST_STACK_SIZE_OR_ZERO, otherwise one more check against 0 would be needed). Custom ranges can be set not only with ALU operations, but also by explicitly comparing the UNKNOWN_VALUE register with constants. Another very common example happens when intercepting system call arguments and accessing user-provided data of variable size using bpf_probe_read(). One can load at runtime the user-provided length in an UNKNOWN_VALUE register, and then read that exact amount of data up to a compile-time determined limit in order to fit into the proper local storage allocated on the stack, without having to guess a suboptimal access size at compile time. Also, in case the helpers accepting the UNKNOWN_VALUE register operate in raw mode, disable the raw mode so that the program is required to initialize all memory, since there is no guarantee the helper will fill it completely, leaving possibilities for data leak (just relevant when the memory used by the helper is the stack, not when using a pointer to map element value or packet). In other words, ARG_PTR_TO_RAW_STACK will be treated as ARG_PTR_TO_STACK. Signed-off-by: Gianluca Borello Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 74 ++++- tools/testing/selftests/bpf/test_verifier.c | 410 ++++++++++++++++++++++++++++ 2 files changed, 474 insertions(+), 10 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 59ed07b9b4ea..3d4f7bf32aaf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -980,6 +980,25 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, return 0; } +static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = env->cur_state.regs; + + switch (regs[regno].type) { + case PTR_TO_PACKET: + return check_packet_access(env, regno, 0, access_size); + case PTR_TO_MAP_VALUE: + return check_map_access(env, regno, 0, access_size); + case PTR_TO_MAP_VALUE_ADJ: + return check_map_access_adj(env, regno, 0, access_size); + default: /* const_imm|ptr_to_stack or invalid ptr */ + return check_stack_boundary(env, regno, access_size, + zero_size_allowed, meta); + } +} + static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) @@ -1018,7 +1037,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { expected_type = CONST_IMM; - if (type != expected_type) + /* One exception. Allow UNKNOWN_VALUE registers when the + * boundaries are known and don't cause unsafe memory accesses + */ + if (type != UNKNOWN_VALUE && type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; @@ -1099,15 +1121,47 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); return -EACCES; } - if (regs[regno - 1].type == PTR_TO_PACKET) - err = check_packet_access(env, regno - 1, 0, reg->imm); - else if (regs[regno - 1].type == PTR_TO_MAP_VALUE) - err = check_map_access(env, regno - 1, 0, reg->imm); - else if (regs[regno - 1].type == PTR_TO_MAP_VALUE_ADJ) - err = check_map_access_adj(env, regno - 1, 0, reg->imm); - else - err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed, meta); + + /* If the register is UNKNOWN_VALUE, the access check happens + * using its boundaries. Otherwise, just use its imm + */ + if (type == UNKNOWN_VALUE) { + /* For unprivileged variable accesses, disable raw + * mode so that the program is required to + * initialize all the memory that the helper could + * just partially fill up. + */ + meta = NULL; + + if (reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned or 'var &= const'\n", + regno); + return -EACCES; + } + + if (reg->min_value == 0) { + err = check_helper_mem_access(env, regno - 1, 0, + zero_size_allowed, + meta); + if (err) + return err; + } + + if (reg->max_value == BPF_REGISTER_MAX_RANGE) { + verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + regno); + return -EACCES; + } + err = check_helper_mem_access(env, regno - 1, + reg->max_value, + zero_size_allowed, meta); + if (err) + return err; + } else { + /* register is CONST_IMM */ + err = check_helper_mem_access(env, regno - 1, reg->imm, + zero_size_allowed, meta); + } } return err; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e7b075819c08..9bb45346dc72 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3442,6 +3442,416 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, }, + { + "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, bitwise AND, zero included", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, bitwise AND + JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP (signed), correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP, bounds + offset", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP, no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 unbounded memory access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP, no min check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: stack, JMP (signed), no min check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: map, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, + sizeof(struct test_val), 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: map, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, + sizeof(struct test_val) + 1, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=49", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: map adjusted, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, + sizeof(struct test_val) - 20, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: map adjusted, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, + sizeof(struct test_val) - 19, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size > 0 not allowed on NULL", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=imm expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: size = 0 not allowed on != NULL", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-8 access_size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to variable memory: 8 bytes leak", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .errstr = "invalid indirect read from stack off -64+32 size 64", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: 8 bytes no leak (init memory)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 39f19ebbf57b403695f7b5f9cf322fe1ddb5d7fb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 9 Jan 2017 10:19:50 -0800 Subject: bpf: rename ARG_PTR_TO_STACK since ARG_PTR_TO_STACK is no longer just pointer to stack rename it to ARG_PTR_TO_MEM and adjust comment. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 12 ++++++------ kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 28 ++++++++++++++-------------- kernel/trace/bpf_trace.c | 20 ++++++++++---------- net/core/filter.c | 40 ++++++++++++++++++++-------------------- 5 files changed, 52 insertions(+), 52 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f74ae68086dc..94ea8d2383e6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -69,14 +69,14 @@ enum bpf_arg_type { /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ - ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ - ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not - * need to be initialized, helper function must fill - * all bytes or clear them in error case. + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, + * helper function must fill all bytes or clear + * them in error case. */ - ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ - ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ + ARG_CONST_SIZE, /* number of bytes accessed from memory */ + ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 045cbe673356..3d24e238221e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -176,6 +176,6 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .func = bpf_get_current_comm, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_RAW_STACK, - .arg2_type = ARG_CONST_STACK_SIZE, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d4f7bf32aaf..2efdc9128e3c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1034,8 +1034,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_STACK; if (type != PTR_TO_PACKET && type != expected_type) goto err_type; - } else if (arg_type == ARG_CONST_STACK_SIZE || - arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { + } else if (arg_type == ARG_CONST_SIZE || + arg_type == ARG_CONST_SIZE_OR_ZERO) { expected_type = CONST_IMM; /* One exception. Allow UNKNOWN_VALUE registers when the * boundaries are known and don't cause unsafe memory accesses @@ -1050,8 +1050,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_CTX; if (type != expected_type) goto err_type; - } else if (arg_type == ARG_PTR_TO_STACK || - arg_type == ARG_PTR_TO_RAW_STACK) { + } else if (arg_type == ARG_PTR_TO_MEM || + arg_type == ARG_PTR_TO_UNINIT_MEM) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a CONST_IMM type. Final test @@ -1062,7 +1062,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE && type != PTR_TO_MAP_VALUE_ADJ && type != expected_type) goto err_type; - meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; + meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -1108,9 +1108,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_stack_boundary(env, regno, meta->map_ptr->value_size, false, NULL); - } else if (arg_type == ARG_CONST_STACK_SIZE || - arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { - bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); + } else if (arg_type == ARG_CONST_SIZE || + arg_type == ARG_CONST_SIZE_OR_ZERO) { + bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); /* bpf_xxx(..., buf, len) call will access 'len' bytes * from stack pointer 'buf'. Check it @@ -1118,7 +1118,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (regno == 0) { /* kernel subsystem misconfigured verifier */ - verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); + verbose("ARG_CONST_SIZE cannot be first argument\n"); return -EACCES; } @@ -1235,15 +1235,15 @@ static int check_raw_mode(const struct bpf_func_proto *fn) { int count = 0; - if (fn->arg1_type == ARG_PTR_TO_RAW_STACK) + if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) count++; - if (fn->arg2_type == ARG_PTR_TO_RAW_STACK) + if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) count++; - if (fn->arg3_type == ARG_PTR_TO_RAW_STACK) + if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) count++; - if (fn->arg4_type == ARG_PTR_TO_RAW_STACK) + if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) count++; - if (fn->arg5_type == ARG_PTR_TO_RAW_STACK) + if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) count++; return count > 1 ? -EINVAL : 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fa77311dadb2..f883c43c96f3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -76,8 +76,8 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .func = bpf_probe_read, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_RAW_STACK, - .arg2_type = ARG_CONST_STACK_SIZE, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, }; @@ -109,8 +109,8 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_PTR_TO_STACK, - .arg3_type = ARG_CONST_STACK_SIZE, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto *bpf_get_probe_write_proto(void) @@ -213,8 +213,8 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .func = bpf_trace_printk, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, - .arg2_type = ARG_CONST_STACK_SIZE, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, }; const struct bpf_func_proto *bpf_get_trace_printk_proto(void) @@ -329,8 +329,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_STACK, - .arg5_type = ARG_CONST_STACK_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, }; static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); @@ -492,8 +492,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_STACK, - .arg5_type = ARG_CONST_STACK_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, }; BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, diff --git a/net/core/filter.c b/net/core/filter.c index 1969b3f118c1..f4d16a905754 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1416,8 +1416,8 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_STACK, - .arg4_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE, .arg5_type = ARG_ANYTHING, }; @@ -1447,8 +1447,8 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_RAW_STACK, - .arg4_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, }; BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) @@ -1601,10 +1601,10 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, - .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_STACK, - .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; @@ -2306,8 +2306,8 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_STACK, - .arg5_type = ARG_CONST_STACK_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, }; static unsigned short bpf_tunnel_key_af(u64 flags) @@ -2377,8 +2377,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_RAW_STACK, - .arg3_type = ARG_CONST_STACK_SIZE, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -2412,8 +2412,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_RAW_STACK, - .arg3_type = ARG_CONST_STACK_SIZE, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, }; static struct metadata_dst __percpu *md_dst; @@ -2483,8 +2483,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, - .arg3_type = ARG_CONST_STACK_SIZE, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -2509,8 +2509,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, - .arg3_type = ARG_CONST_STACK_SIZE, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto * @@ -2593,8 +2593,8 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_STACK, - .arg5_type = ARG_CONST_STACK_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, }; static const struct bpf_func_proto * -- cgit v1.2.3 From a5ef01aaac245d37edf113d65b0c146e96d841d1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 10 Jan 2017 15:02:07 +0100 Subject: bpf: Remove unused but set variable in __bpf_lru_list_shrink_inactive() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable 'first_node' in __bpf_lru_list_shrink_inactive() to fix the following GCC warning when building with 'W=1': kernel/bpf/bpf_lru_list.c:216:41: warning: variable ‘first_node’ set but not used [-Wunused-but-set-variable] Cc: Martin KaFai Lau Signed-off-by: Tobias Klauser Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/bpf_lru_list.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 89b7ef41c86b..d78501ee0609 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -213,11 +213,10 @@ __bpf_lru_list_shrink_inactive(struct bpf_lru *lru, enum bpf_lru_list_type tgt_free_type) { struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; - struct bpf_lru_node *node, *tmp_node, *first_node; + struct bpf_lru_node *node, *tmp_node; unsigned int nshrinked = 0; unsigned int i = 0; - first_node = list_first_entry(inactive, struct bpf_lru_node, list); list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { if (bpf_lru_node_is_ref(node)) { __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); -- cgit v1.2.3 From 3bf003335ba356aac5a43e28640159d4ae8a2a60 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 10 Jan 2017 15:02:16 +0100 Subject: bpf: Make unnecessarily global functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the functions __local_list_pop_free(), __local_list_pop_pending(), bpf_common_lru_populate() and bpf_percpu_lru_populate() static as they are not used outide of bpf_lru_list.c This fixes the following GCC warnings when building with 'W=1': kernel/bpf/bpf_lru_list.c:363:22: warning: no previous prototype for ‘__local_list_pop_free’ [-Wmissing-prototypes] kernel/bpf/bpf_lru_list.c:376:22: warning: no previous prototype for ‘__local_list_pop_pending’ [-Wmissing-prototypes] kernel/bpf/bpf_lru_list.c:560:6: warning: no previous prototype for ‘bpf_common_lru_populate’ [-Wmissing-prototypes] kernel/bpf/bpf_lru_list.c:577:6: warning: no previous prototype for ‘bpf_percpu_lru_populate’ [-Wmissing-prototypes] Cc: Martin KaFai Lau Signed-off-by: Tobias Klauser Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/bpf_lru_list.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index d78501ee0609..f62d1d56f41d 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -360,7 +360,8 @@ static void __local_list_add_pending(struct bpf_lru *lru, list_add(&node->list, local_pending_list(loc_l)); } -struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l) +static struct bpf_lru_node * +__local_list_pop_free(struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; @@ -373,8 +374,8 @@ struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l) return node; } -struct bpf_lru_node *__local_list_pop_pending(struct bpf_lru *lru, - struct bpf_lru_locallist *loc_l) +static struct bpf_lru_node * +__local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; bool force = false; @@ -557,8 +558,9 @@ void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) bpf_common_lru_push_free(lru, node); } -void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, - u32 elem_size, u32 nr_elems) +static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, + u32 node_offset, u32 elem_size, + u32 nr_elems) { struct bpf_lru_list *l = &lru->common_lru.lru_list; u32 i; @@ -574,8 +576,9 @@ void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, } } -void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, - u32 elem_size, u32 nr_elems) +static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, + u32 node_offset, u32 elem_size, + u32 nr_elems) { u32 i, pcpu_entries; int cpu; -- cgit v1.2.3 From 6b8cc1d11ef75c5b9c530b3d0d148f3c2dd25f93 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Jan 2017 11:51:32 +0100 Subject: bpf: pass original insn directly to convert_ctx_access Currently, when calling convert_ctx_access() callback for the various program types, we pass in insn->dst_reg, insn->src_reg, insn->off from the original instruction. This information is needed to rewrite the instruction that is based on the user ctx structure into a kernel representation for the ctx. As we'd like to allow access size beyond just BPF_W, we'd need also insn->code for that in order to decode the original access size. Given that, lets just pass insn directly to the convert_ctx_access() callback and work on that to not clutter the callback with even more arguments we need to pass when everything is already contained in insn. So lets go through that once, no functional change. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 ++- kernel/bpf/verifier.c | 3 +- kernel/trace/bpf_trace.c | 15 ++--- net/core/filter.c | 139 +++++++++++++++++++++++++---------------------- 4 files changed, 87 insertions(+), 77 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 94ea8d2383e6..f8c3560b01db 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -161,9 +161,10 @@ struct bpf_verifier_ops { enum bpf_reg_type *reg_type); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); - u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn, struct bpf_prog *prog); + u32 (*convert_ctx_access)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog); }; struct bpf_prog_type_list { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2efdc9128e3c..df7e47244e75 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3177,8 +3177,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f883c43c96f3..1860e7f1e5a8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -572,28 +572,29 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type return true; } -static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, +static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; - switch (ctx_off) { + switch (si->off) { case offsetof(struct bpf_perf_event_data, sample_period): BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, - data), dst_reg, src_reg, + data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); - *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, + *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, offsetof(struct perf_sample_data, period)); break; default: *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, - regs), dst_reg, src_reg, + regs), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, regs)); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, + si->off); break; } diff --git a/net/core/filter.c b/net/core/filter.c index f4d16a905754..8cfbdefbfb1c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,32 +2972,33 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, +static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; + int off; - switch (ctx_off) { + switch (si->off) { case offsetof(struct __sk_buff, len): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, len)); break; case offsetof(struct __sk_buff, protocol): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, offsetof(struct sk_buff, protocol)); break; case offsetof(struct __sk_buff, vlan_proto): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, offsetof(struct sk_buff, vlan_proto)); break; @@ -3005,17 +3006,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, priority)); else - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, priority)); break; case offsetof(struct __sk_buff, ingress_ifindex): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, skb_iif)); break; @@ -3023,17 +3024,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), - dst_reg, src_reg, + si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); - *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, offsetof(struct net_device, ifindex)); break; case offsetof(struct __sk_buff, hash): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, hash)); break; @@ -3041,63 +3042,74 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, mark)); else - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sk_buff, mark)); break; case offsetof(struct __sk_buff, pkt_type): - return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg, + si->src_reg, insn); case offsetof(struct __sk_buff, queue_mapping): - return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + return convert_skb_access(SKF_AD_QUEUE, si->dst_reg, + si->src_reg, insn); case offsetof(struct __sk_buff, vlan_present): return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, - dst_reg, src_reg, insn); + si->dst_reg, si->src_reg, insn); case offsetof(struct __sk_buff, vlan_tci): return convert_skb_access(SKF_AD_VLAN_TAG, - dst_reg, src_reg, insn); + si->dst_reg, si->src_reg, insn); case offsetof(struct __sk_buff, cb[0]) ... offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); prog->cb_access = 1; - ctx_off -= offsetof(struct __sk_buff, cb[0]); - ctx_off += offsetof(struct sk_buff, cb); - ctx_off += offsetof(struct qdisc_skb_cb, data); + off = si->off; + off -= offsetof(struct __sk_buff, cb[0]); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct qdisc_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, + si->src_reg, off); else - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, + si->src_reg, off); break; case offsetof(struct __sk_buff, tc_classid): - ctx_off -= offsetof(struct __sk_buff, tc_classid); - ctx_off += offsetof(struct sk_buff, cb); - ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2); + + off = si->off; + off -= offsetof(struct __sk_buff, tc_classid); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct qdisc_skb_cb, tc_classid); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, + si->src_reg, off); else - *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, + si->src_reg, off); break; case offsetof(struct __sk_buff, data): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), - dst_reg, src_reg, + si->dst_reg, si->src_reg, offsetof(struct sk_buff, data)); break; case offsetof(struct __sk_buff, data_end): - ctx_off -= offsetof(struct __sk_buff, data_end); - ctx_off += offsetof(struct sk_buff, cb); - ctx_off += offsetof(struct bpf_skb_data_end, data_end); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg, - ctx_off); + off = si->off; + off -= offsetof(struct __sk_buff, data_end); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_end); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); break; case offsetof(struct __sk_buff, tc_index): @@ -3105,110 +3117,107 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, + *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, offsetof(struct sk_buff, tc_index)); else - *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, offsetof(struct sk_buff, tc_index)); - break; #else if (type == BPF_WRITE) - *insn++ = BPF_MOV64_REG(dst_reg, dst_reg); + *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else - *insn++ = BPF_MOV64_IMM(dst_reg, 0); - break; + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif + break; } return insn - insn_buf; } static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, - int dst_reg, int src_reg, - int ctx_off, + const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; - switch (ctx_off) { + switch (si->off) { case offsetof(struct bpf_sock, bound_dev_if): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_bound_dev_if)); else - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_bound_dev_if)); break; case offsetof(struct bpf_sock, family): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, offsetof(struct sock, sk_family)); break; case offsetof(struct bpf_sock, type): - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); break; case offsetof(struct bpf_sock, protocol): - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); break; } return insn - insn_buf; } -static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, +static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; - switch (ctx_off) { + switch (si->off) { case offsetof(struct __sk_buff, ifindex): BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), - dst_reg, src_reg, + si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, offsetof(struct net_device, ifindex)); break; default: - return sk_filter_convert_ctx_access(type, dst_reg, src_reg, - ctx_off, insn_buf, prog); + return sk_filter_convert_ctx_access(type, si, insn_buf, prog); } return insn - insn_buf; } -static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, +static u32 xdp_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; - switch (ctx_off) { + switch (si->off) { case offsetof(struct xdp_md, data): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), - dst_reg, src_reg, + si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), - dst_reg, src_reg, + si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data_end)); break; } -- cgit v1.2.3 From 62c7989b24dbd348c2507ee6458ebf5637d6ddb5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Jan 2017 11:51:33 +0100 Subject: bpf: allow b/h/w/dw access for bpf's cb in ctx When structs are used to store temporary state in cb[] buffer that is used with programs and among tail calls, then the generated code will not always access the buffer in bpf_w chunks. We can ease programming of it and let this act more natural by allowing for aligned b/h/w/dw sized access for cb[] ctx member. Various test cases are attached as well for the selftest suite. Potentially, this can also be reused for other program types to pass data around. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 +- net/core/filter.c | 41 ++- tools/testing/selftests/bpf/test_verifier.c | 442 +++++++++++++++++++++++++++- 3 files changed, 478 insertions(+), 13 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index df7e47244e75..d60e12c67266 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3165,10 +3165,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || + insn->code == (BPF_LDX | BPF_MEM | BPF_H) || + insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else diff --git a/net/core/filter.c b/net/core/filter.c index 8cfbdefbfb1c..90383860e224 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2776,11 +2776,33 @@ static bool __is_valid_access(int off, int size) { if (off < 0 || off >= sizeof(struct __sk_buff)) return false; + /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) - return false; + + switch (off) { + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + if (size == sizeof(__u16) && + off > offsetof(struct __sk_buff, cb[4]) + sizeof(__u16)) + return false; + if (size == sizeof(__u32) && + off > offsetof(struct __sk_buff, cb[4])) + return false; + if (size == sizeof(__u64) && + off > offsetof(struct __sk_buff, cb[2])) + return false; + if (size != sizeof(__u8) && + size != sizeof(__u16) && + size != sizeof(__u32) && + size != sizeof(__u64)) + return false; + break; + default: + if (size != sizeof(__u32)) + return false; + } return true; } @@ -2799,7 +2821,7 @@ static bool sk_filter_is_valid_access(int off, int size, if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: break; default: return false; @@ -2823,7 +2845,7 @@ static bool lwt_is_valid_access(int off, int size, case offsetof(struct __sk_buff, mark): case offsetof(struct __sk_buff, priority): case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: break; default: return false; @@ -2915,7 +2937,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case offsetof(struct __sk_buff, tc_index): case offsetof(struct __sk_buff, priority): case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: case offsetof(struct __sk_buff, tc_classid): break; default: @@ -3066,8 +3088,11 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, insn); case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + BUILD_BUG_ON((offsetof(struct sk_buff, cb) + + offsetof(struct qdisc_skb_cb, data)) % + sizeof(__u64)); prog->cb_access = 1; off = si->off; @@ -3075,10 +3100,10 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, + *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); else - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, + *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); break; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 9bb45346dc72..1aa73241c999 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -859,15 +859,451 @@ static struct bpf_test tests[] = { .result = REJECT, }, { - "check non-u32 access to cb", + "check cb access: byte", .insns = { - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1, + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check cb access: byte, oob 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: byte, oob 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) - 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: byte, oob 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: byte, oob 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) - 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: byte, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "check cb access: half", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check cb access: half, unaligned", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: half, oob 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: half, oob 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) - 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: half, oob 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: half, oob 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) - 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: half, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "check cb access: word", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check cb access: word, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: word, unaligned 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: word, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: word, unaligned 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: double", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check cb access: double, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: double, unaligned 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "check cb access: double, oob 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, oob 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, oob 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) - 8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, oob 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, oob 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, oob 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) - 8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check cb access: double, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", - .errstr_unpriv = "R1 leaks addr", .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, { "check out of range skb->cb access", -- cgit v1.2.3 From b95a5c4db09bc7c253636cb84dc9b12c577fd5a0 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 21 Jan 2017 17:26:11 +0100 Subject: bpf: add a longest prefix match trie map implementation This trie implements a longest prefix match algorithm that can be used to match IP addresses to a stored set of ranges. Internally, data is stored in an unbalanced trie of nodes that has a maximum height of n, where n is the prefixlen the trie was created with. Tries may be created with prefix lengths that are multiples of 8, in the range from 8 to 2048. The key used for lookup and update operations is a struct bpf_lpm_trie_key, and the value is a uint64_t. The code carries more information about the internal implementation. Signed-off-by: Daniel Mack Reviewed-by: David Herrmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 + kernel/bpf/Makefile | 2 +- kernel/bpf/lpm_trie.c | 503 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 511 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/lpm_trie.c (limited to 'kernel/bpf') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54a5894bb4ea..bd3068485410 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -63,6 +63,12 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -89,6 +95,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_ARRAY, BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + BPF_MAP_TYPE_LPM_TRIE, }; enum bpf_prog_type { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 1276474ac3cd..e1ce4f4fd7fd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,7 +1,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c new file mode 100644 index 000000000000..ba19241d1979 --- /dev/null +++ b/kernel/bpf/lpm_trie.c @@ -0,0 +1,503 @@ +/* + * Longest prefix match list implementation + * + * Copyright (c) 2016,2017 Daniel Mack + * Copyright (c) 2016 David Herrmann + * + * This file is subject to the terms and conditions of version 2 of the GNU + * General Public License. See the file COPYING in the main directory of the + * Linux distribution for more details. + */ + +#include +#include +#include +#include +#include +#include + +/* Intermediate node */ +#define LPM_TREE_NODE_FLAG_IM BIT(0) + +struct lpm_trie_node; + +struct lpm_trie_node { + struct rcu_head rcu; + struct lpm_trie_node __rcu *child[2]; + u32 prefixlen; + u32 flags; + u8 data[0]; +}; + +struct lpm_trie { + struct bpf_map map; + struct lpm_trie_node __rcu *root; + size_t n_entries; + size_t max_prefixlen; + size_t data_size; + raw_spinlock_t lock; +}; + +/* This trie implements a longest prefix match algorithm that can be used to + * match IP addresses to a stored set of ranges. + * + * Data stored in @data of struct bpf_lpm_key and struct lpm_trie_node is + * interpreted as big endian, so data[0] stores the most significant byte. + * + * Match ranges are internally stored in instances of struct lpm_trie_node + * which each contain their prefix length as well as two pointers that may + * lead to more nodes containing more specific matches. Each node also stores + * a value that is defined by and returned to userspace via the update_elem + * and lookup functions. + * + * For instance, let's start with a trie that was created with a prefix length + * of 32, so it can be used for IPv4 addresses, and one single element that + * matches 192.168.0.0/16. The data array would hence contain + * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will + * stick to IP-address notation for readability though. + * + * As the trie is empty initially, the new node (1) will be places as root + * node, denoted as (R) in the example below. As there are no other node, both + * child pointers are %NULL. + * + * +----------------+ + * | (1) (R) | + * | 192.168.0.0/16 | + * | value: 1 | + * | [0] [1] | + * +----------------+ + * + * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already + * a node with the same data and a smaller prefix (ie, a less specific one), + * node (2) will become a child of (1). In child index depends on the next bit + * that is outside of what (1) matches, and that bit is 0, so (2) will be + * child[0] of (1): + * + * +----------------+ + * | (1) (R) | + * | 192.168.0.0/16 | + * | value: 1 | + * | [0] [1] | + * +----------------+ + * | + * +----------------+ + * | (2) | + * | 192.168.0.0/24 | + * | value: 2 | + * | [0] [1] | + * +----------------+ + * + * The child[1] slot of (1) could be filled with another node which has bit #17 + * (the next bit after the ones that (1) matches on) set to 1. For instance, + * 192.168.128.0/24: + * + * +----------------+ + * | (1) (R) | + * | 192.168.0.0/16 | + * | value: 1 | + * | [0] [1] | + * +----------------+ + * | | + * +----------------+ +------------------+ + * | (2) | | (3) | + * | 192.168.0.0/24 | | 192.168.128.0/24 | + * | value: 2 | | value: 3 | + * | [0] [1] | | [0] [1] | + * +----------------+ +------------------+ + * + * Let's add another node (4) to the game for 192.168.1.0/24. In order to place + * it, node (1) is looked at first, and because (4) of the semantics laid out + * above (bit #17 is 0), it would normally be attached to (1) as child[0]. + * However, that slot is already allocated, so a new node is needed in between. + * That node does not have a value attached to it and it will never be + * returned to users as result of a lookup. It is only there to differentiate + * the traversal further. It will get a prefix as wide as necessary to + * distinguish its two children: + * + * +----------------+ + * | (1) (R) | + * | 192.168.0.0/16 | + * | value: 1 | + * | [0] [1] | + * +----------------+ + * | | + * +----------------+ +------------------+ + * | (4) (I) | | (3) | + * | 192.168.0.0/23 | | 192.168.128.0/24 | + * | value: --- | | value: 3 | + * | [0] [1] | | [0] [1] | + * +----------------+ +------------------+ + * | | + * +----------------+ +----------------+ + * | (2) | | (5) | + * | 192.168.0.0/24 | | 192.168.1.0/24 | + * | value: 2 | | value: 5 | + * | [0] [1] | | [0] [1] | + * +----------------+ +----------------+ + * + * 192.168.1.1/32 would be a child of (5) etc. + * + * An intermediate node will be turned into a 'real' node on demand. In the + * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie. + * + * A fully populated trie would have a height of 32 nodes, as the trie was + * created with a prefix length of 32. + * + * The lookup starts at the root node. If the current node matches and if there + * is a child that can be used to become more specific, the trie is traversed + * downwards. The last node in the traversal that is a non-intermediate one is + * returned. + */ + +static inline int extract_bit(const u8 *data, size_t index) +{ + return !!(data[index / 8] & (1 << (7 - (index % 8)))); +} + +/** + * longest_prefix_match() - determine the longest prefix + * @trie: The trie to get internal sizes from + * @node: The node to operate on + * @key: The key to compare to @node + * + * Determine the longest prefix of @node that matches the bits in @key. + */ +static size_t longest_prefix_match(const struct lpm_trie *trie, + const struct lpm_trie_node *node, + const struct bpf_lpm_trie_key *key) +{ + size_t prefixlen = 0; + size_t i; + + for (i = 0; i < trie->data_size; i++) { + size_t b; + + b = 8 - fls(node->data[i] ^ key->data[i]); + prefixlen += b; + + if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen) + return min(node->prefixlen, key->prefixlen); + + if (b < 8) + break; + } + + return prefixlen; +} + +/* Called from syscall or from eBPF program */ +static void *trie_lookup_elem(struct bpf_map *map, void *_key) +{ + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct lpm_trie_node *node, *found = NULL; + struct bpf_lpm_trie_key *key = _key; + + /* Start walking the trie from the root node ... */ + + for (node = rcu_dereference(trie->root); node;) { + unsigned int next_bit; + size_t matchlen; + + /* Determine the longest prefix of @node that matches @key. + * If it's the maximum possible prefix for this trie, we have + * an exact match and can return it directly. + */ + matchlen = longest_prefix_match(trie, node, key); + if (matchlen == trie->max_prefixlen) { + found = node; + break; + } + + /* If the number of bits that match is smaller than the prefix + * length of @node, bail out and return the node we have seen + * last in the traversal (ie, the parent). + */ + if (matchlen < node->prefixlen) + break; + + /* Consider this node as return candidate unless it is an + * artificially added intermediate one. + */ + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + found = node; + + /* If the node match is fully satisfied, let's see if we can + * become more specific. Determine the next bit in the key and + * traverse down. + */ + next_bit = extract_bit(key->data, node->prefixlen); + node = rcu_dereference(node->child[next_bit]); + } + + if (!found) + return NULL; + + return found->data + trie->data_size; +} + +static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, + const void *value) +{ + struct lpm_trie_node *node; + size_t size = sizeof(struct lpm_trie_node) + trie->data_size; + + if (value) + size += trie->map.value_size; + + node = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); + if (!node) + return NULL; + + node->flags = 0; + + if (value) + memcpy(node->data + trie->data_size, value, + trie->map.value_size); + + return node; +} + +/* Called from syscall or from eBPF program */ +static int trie_update_elem(struct bpf_map *map, + void *_key, void *value, u64 flags) +{ + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct lpm_trie_node *node, *im_node, *new_node = NULL; + struct lpm_trie_node __rcu **slot; + struct bpf_lpm_trie_key *key = _key; + unsigned long irq_flags; + unsigned int next_bit; + size_t matchlen = 0; + int ret = 0; + + if (unlikely(flags > BPF_EXIST)) + return -EINVAL; + + if (key->prefixlen > trie->max_prefixlen) + return -EINVAL; + + raw_spin_lock_irqsave(&trie->lock, irq_flags); + + /* Allocate and fill a new node */ + + if (trie->n_entries == trie->map.max_entries) { + ret = -ENOSPC; + goto out; + } + + new_node = lpm_trie_node_alloc(trie, value); + if (!new_node) { + ret = -ENOMEM; + goto out; + } + + trie->n_entries++; + + new_node->prefixlen = key->prefixlen; + RCU_INIT_POINTER(new_node->child[0], NULL); + RCU_INIT_POINTER(new_node->child[1], NULL); + memcpy(new_node->data, key->data, trie->data_size); + + /* Now find a slot to attach the new node. To do that, walk the tree + * from the root and match as many bits as possible for each node until + * we either find an empty slot or a slot that needs to be replaced by + * an intermediate node. + */ + slot = &trie->root; + + while ((node = rcu_dereference_protected(*slot, + lockdep_is_held(&trie->lock)))) { + matchlen = longest_prefix_match(trie, node, key); + + if (node->prefixlen != matchlen || + node->prefixlen == key->prefixlen || + node->prefixlen == trie->max_prefixlen) + break; + + next_bit = extract_bit(key->data, node->prefixlen); + slot = &node->child[next_bit]; + } + + /* If the slot is empty (a free child pointer or an empty root), + * simply assign the @new_node to that slot and be done. + */ + if (!node) { + rcu_assign_pointer(*slot, new_node); + goto out; + } + + /* If the slot we picked already exists, replace it with @new_node + * which already has the correct data array set. + */ + if (node->prefixlen == matchlen) { + new_node->child[0] = node->child[0]; + new_node->child[1] = node->child[1]; + + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + trie->n_entries--; + + rcu_assign_pointer(*slot, new_node); + kfree_rcu(node, rcu); + + goto out; + } + + /* If the new node matches the prefix completely, it must be inserted + * as an ancestor. Simply insert it between @node and *@slot. + */ + if (matchlen == key->prefixlen) { + next_bit = extract_bit(node->data, matchlen); + rcu_assign_pointer(new_node->child[next_bit], node); + rcu_assign_pointer(*slot, new_node); + goto out; + } + + im_node = lpm_trie_node_alloc(trie, NULL); + if (!im_node) { + ret = -ENOMEM; + goto out; + } + + im_node->prefixlen = matchlen; + im_node->flags |= LPM_TREE_NODE_FLAG_IM; + memcpy(im_node->data, node->data, trie->data_size); + + /* Now determine which child to install in which slot */ + if (extract_bit(key->data, matchlen)) { + rcu_assign_pointer(im_node->child[0], node); + rcu_assign_pointer(im_node->child[1], new_node); + } else { + rcu_assign_pointer(im_node->child[0], new_node); + rcu_assign_pointer(im_node->child[1], node); + } + + /* Finally, assign the intermediate node to the determined spot */ + rcu_assign_pointer(*slot, im_node); + +out: + if (ret) { + if (new_node) + trie->n_entries--; + + kfree(new_node); + kfree(im_node); + } + + raw_spin_unlock_irqrestore(&trie->lock, irq_flags); + + return ret; +} + +static int trie_delete_elem(struct bpf_map *map, void *key) +{ + /* TODO */ + return -ENOSYS; +} + +static struct bpf_map *trie_alloc(union bpf_attr *attr) +{ + size_t cost, cost_per_node; + struct lpm_trie *trie; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || + attr->map_flags != BPF_F_NO_PREALLOC || + attr->key_size < sizeof(struct bpf_lpm_trie_key) + 1 || + attr->key_size > sizeof(struct bpf_lpm_trie_key) + 256 || + attr->value_size == 0) + return ERR_PTR(-EINVAL); + + trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN); + if (!trie) + return ERR_PTR(-ENOMEM); + + /* copy mandatory map attributes */ + trie->map.map_type = attr->map_type; + trie->map.key_size = attr->key_size; + trie->map.value_size = attr->value_size; + trie->map.max_entries = attr->max_entries; + trie->data_size = attr->key_size - + offsetof(struct bpf_lpm_trie_key, data); + trie->max_prefixlen = trie->data_size * 8; + + cost_per_node = sizeof(struct lpm_trie_node) + + attr->value_size + trie->data_size; + cost = sizeof(*trie) + attr->max_entries * cost_per_node; + trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + ret = bpf_map_precharge_memlock(trie->map.pages); + if (ret) { + kfree(trie); + return ERR_PTR(ret); + } + + raw_spin_lock_init(&trie->lock); + + return &trie->map; +} + +static void trie_free(struct bpf_map *map) +{ + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct lpm_trie_node __rcu **slot; + struct lpm_trie_node *node; + + raw_spin_lock(&trie->lock); + + /* Always start at the root and walk down to a node that has no + * children. Then free that node, nullify its reference in the parent + * and start over. + */ + + for (;;) { + slot = &trie->root; + + for (;;) { + node = rcu_dereference_protected(*slot, + lockdep_is_held(&trie->lock)); + if (!node) + goto unlock; + + if (rcu_access_pointer(node->child[0])) { + slot = &node->child[0]; + continue; + } + + if (rcu_access_pointer(node->child[1])) { + slot = &node->child[1]; + continue; + } + + kfree(node); + RCU_INIT_POINTER(*slot, NULL); + break; + } + } + +unlock: + raw_spin_unlock(&trie->lock); +} + +static const struct bpf_map_ops trie_ops = { + .map_alloc = trie_alloc, + .map_free = trie_free, + .map_lookup_elem = trie_lookup_elem, + .map_update_elem = trie_update_elem, + .map_delete_elem = trie_delete_elem, +}; + +static struct bpf_map_type_list trie_type __read_mostly = { + .ops = &trie_ops, + .type = BPF_MAP_TYPE_LPM_TRIE, +}; + +static int __init register_trie_map(void) +{ + bpf_register_map_type(&trie_type); + return 0; +} +late_initcall(register_trie_map); -- cgit v1.2.3 From d140199af510ad4749dc5e38b7922135258ba5fd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Jan 2017 01:26:46 +0100 Subject: bpf, lpm: fix kfree of im_node in trie_update_elem We need to initialize im_node to NULL, otherwise in case of error path it gets passed to kfree() as uninitialized pointer. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index ba19241d1979..144e9763102f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -262,7 +262,7 @@ static int trie_update_elem(struct bpf_map *map, void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); - struct lpm_trie_node *node, *im_node, *new_node = NULL; + struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; struct lpm_trie_node __rcu **slot; struct bpf_lpm_trie_key *key = _key; unsigned long irq_flags; -- cgit v1.2.3 From 3fadc80115837b86f989d17c4aa92bb5cb7bc1b6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Jan 2017 01:06:30 +0100 Subject: bpf: enable verifier to better track const alu ops William reported couple of issues in relation to direct packet access. Typical scheme is to check for data + [off] <= data_end, where [off] can be either immediate or coming from a tracked register that contains an immediate, depending on the branch, we can then access the data. However, in case of calculating [off] for either the mentioned test itself or for access after the test in a more "complex" way, then the verifier will stop tracking the CONST_IMM marked register and will mark it as UNKNOWN_VALUE one. Adding that UNKNOWN_VALUE typed register to a pkt() marked register, the verifier then bails out in check_packet_ptr_add() as it finds the registers imm value below 48. In the first below example, that is due to evaluate_reg_imm_alu() not handling right shifts and thus marking the register as UNKNOWN_VALUE via helper __mark_reg_unknown_value() that resets imm to 0. In the second case the same happens at the time when r4 is set to r4 &= r5, where it transitions to UNKNOWN_VALUE from evaluate_reg_imm_alu(). Later on r4 we shift right by 3 inside evaluate_reg_alu(), where the register's imm turns into 3. That is, for registers with type UNKNOWN_VALUE, imm of 0 means that we don't know what value the register has, and for imm > 0 it means that the value has [imm] upper zero bits. F.e. when shifting an UNKNOWN_VALUE register by 3 to the right, no matter what value it had, we know that the 3 upper most bits must be zero now. This is to make sure that ALU operations with unknown registers don't overflow. Meaning, once we know that we have more than 48 upper zero bits, or, in other words cannot go beyond 0xffff offset with ALU ops, such an addition will track the target register as a new pkt() register with a new id, but 0 offset and 0 range, so for that a new data/data_end test will be required. Is the source register a CONST_IMM one that is to be added to the pkt() register, or the source instruction is an add instruction with immediate value, then it will get added if it stays within max 0xffff bounds. >From there, pkt() type, can be accessed should reg->off + imm be within the access range of pkt(). [...] from 28 to 30: R0=imm1,min_value=1,max_value=1 R1=pkt(id=0,off=0,r=22) R2=pkt_end R3=imm144,min_value=144,max_value=144 R4=imm0,min_value=0,max_value=0 R5=inv48,min_value=2054,max_value=2054 R10=fp 30: (bf) r5 = r3 31: (07) r5 += 23 32: (77) r5 >>= 3 33: (bf) r6 = r1 34: (0f) r6 += r5 cannot add integer value with 0 upper zero bits to ptr_to_packet [...] from 52 to 80: R0=imm1,min_value=1,max_value=1 R1=pkt(id=0,off=0,r=34) R2=pkt_end R3=inv R4=imm272 R5=inv56,min_value=17,max_value=17 R6=pkt(id=0,off=26,r=34) R10=fp 80: (07) r4 += 71 81: (18) r5 = 0xfffffff8 83: (5f) r4 &= r5 84: (77) r4 >>= 3 85: (0f) r1 += r4 cannot add integer value with 3 upper zero bits to ptr_to_packet Thus to get above use-cases working, evaluate_reg_imm_alu() has been extended for further ALU ops. This is fine, because we only operate strictly within realm of CONST_IMM types, so here we don't care about overflows as they will happen in the simulated but also real execution and interaction with pkt() in check_packet_ptr_add() will check actual imm value once added to pkt(), but it's irrelevant before. With regards to 06c1c049721a ("bpf: allow helpers access to variable memory") that works on UNKNOWN_VALUE registers, the verifier becomes now a bit smarter as it can better resolve ALU ops, so we need to adapt two test cases there, as min/max bound tracking only becomes necessary when registers were spilled to stack. So while mask was set before to track upper bound for UNKNOWN_VALUE case, it's now resolved directly as CONST_IMM, and such contructs are only necessary when f.e. registers are spilled. For commit 6b17387307ba ("bpf: recognize 64bit immediate loads as consts") that initially enabled dw load tracking only for nfp jit/ analyzer, I did couple of tests on large, complex programs and we don't increase complexity badly (my tests were in ~3% range on avg). I've added a couple of tests similar to affected code above, and it works fine with verifier now. Reported-by: William Tu Signed-off-by: Daniel Borkmann Cc: Gianluca Borello Cc: William Tu Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 64 +++++++++++++++------- tools/testing/selftests/bpf/test_verifier.c | 82 +++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 19 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8f69df7e8167..fb3513b35c0b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1566,22 +1566,54 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); + u64 dst_imm = dst_reg->imm; - /* dst_reg->type == CONST_IMM here, simulate execution of 'add'/'or' - * insn. Don't care about overflow or negative values, just add them + /* dst_reg->type == CONST_IMM here. Simulate execution of insns + * containing ALU ops. Don't care about overflow or negative + * values, just add/sub/... them; registers are in u64. */ - if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) - dst_reg->imm += insn->imm; - else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X && - src_reg->type == CONST_IMM) - dst_reg->imm += src_reg->imm; - else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) - dst_reg->imm |= insn->imm; - else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X && - src_reg->type == CONST_IMM) - dst_reg->imm |= src_reg->imm; - else + if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) { + dst_imm += insn->imm; + } else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm += src_reg->imm; + } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) { + dst_imm -= insn->imm; + } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm -= src_reg->imm; + } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) { + dst_imm *= insn->imm; + } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm *= src_reg->imm; + } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) { + dst_imm |= insn->imm; + } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm |= src_reg->imm; + } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) { + dst_imm &= insn->imm; + } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm &= src_reg->imm; + } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) { + dst_imm >>= insn->imm; + } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm >>= src_reg->imm; + } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) { + dst_imm <<= insn->imm; + } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) { + dst_imm <<= src_reg->imm; + } else { mark_reg_unknown_value(regs, insn->dst_reg); + goto out; + } + + dst_reg->imm = dst_imm; +out: return 0; } @@ -2225,14 +2257,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; if (insn->src_reg == 0) { - /* generic move 64-bit immediate into a register, - * only analyzer needs to collect the ld_imm value. - */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; - if (!env->analyzer_ops) - return 0; - regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = imm; return 0; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1aa73241c999..0d0912c7f03c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2325,6 +2325,84 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test11 (shift, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 144), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test12 (and, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 144), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test13 (branches, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_3, 14), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 24), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -4208,6 +4286,8 @@ static struct bpf_test tests[] = { .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_MOV64_IMM(BPF_REG_4, 0), @@ -4251,6 +4331,8 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), -- cgit v1.2.3 From a67edbf4fb6deadcfe57a04a134abed4a5ba3bb5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Jan 2017 02:28:18 +0100 Subject: bpf: add initial bpf tracepoints This work adds a number of tracepoints to paths that are either considered slow-path or exception-like states, where monitoring or inspecting them would be desirable. For bpf(2) syscall, tracepoints have been placed for main commands when they succeed. In XDP case, tracepoint is for exceptions, that is, f.e. on abnormal BPF program exit such as unknown or XDP_ABORTED return code, or when error occurs during XDP_TX action and the packet could not be forwarded. Both have been split into separate event headers, and can be further extended. Worst case, if they unexpectedly should get into our way in future, they can also removed [1]. Of course, these tracepoints (like any other) can be analyzed by eBPF itself, etc. Example output: # ./perf record -a -e bpf:* sleep 10 # ./perf script sock_example 6197 [005] 283.980322: bpf:bpf_map_create: map type=ARRAY ufd=4 key=4 val=8 max=256 flags=0 sock_example 6197 [005] 283.980721: bpf:bpf_prog_load: prog=a5ea8fa30ea6849c type=SOCKET_FILTER ufd=5 sock_example 6197 [005] 283.988423: bpf:bpf_prog_get_type: prog=a5ea8fa30ea6849c type=SOCKET_FILTER sock_example 6197 [005] 283.988443: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[06 00 00 00] val=[00 00 00 00 00 00 00 00] [...] sock_example 6197 [005] 288.990868: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[01 00 00 00] val=[14 00 00 00 00 00 00 00] swapper 0 [005] 289.338243: bpf:bpf_prog_put_rcu: prog=a5ea8fa30ea6849c type=SOCKET_FILTER [1] https://lwn.net/Articles/705270/ Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 +- .../net/ethernet/netronome/nfp/nfp_net_common.c | 15 +- drivers/net/ethernet/qlogic/qede/qede_fp.c | 4 + drivers/net/virtio_net.c | 12 +- include/linux/bpf_trace.h | 7 + include/trace/events/bpf.h | 347 +++++++++++++++++++++ include/trace/events/xdp.h | 53 ++++ kernel/bpf/core.c | 9 + kernel/bpf/inode.c | 17 +- kernel/bpf/syscall.c | 19 +- 11 files changed, 483 insertions(+), 15 deletions(-) create mode 100644 include/linux/bpf_trace.h create mode 100644 include/trace/events/bpf.h create mode 100644 include/trace/events/xdp.h (limited to 'kernel/bpf') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e362f99334d0..f15ddba3659a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -926,10 +927,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, cq->ring, &doorbell_pending))) goto consumed; + trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ba50583ea3ed..3d2e1a1886a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "en.h" #include "en_tc.h" @@ -640,7 +641,7 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, +static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, const struct xdp_buff *xdp) { @@ -662,7 +663,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; mlx5e_page_release(rq, di, true); - return; + return false; } if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { @@ -673,7 +674,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); - return; + return false; } dma_len -= MLX5E_XDP_MIN_INLINE; @@ -703,6 +704,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; + return true; } /* returns true if packet was consumed by xdp */ @@ -728,11 +730,13 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, *len = xdp.data_end - xdp.data; return false; case XDP_TX: - mlx5e_xmit_xdp_frame(rq, di, &xdp); + if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp))) + trace_xdp_exception(rq->netdev, prog, act); return true; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; mlx5e_page_release(rq, di, true); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 67afd95ffb93..6ac43abf561b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -42,6 +42,7 @@ */ #include +#include #include #include #include @@ -1459,7 +1460,7 @@ nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring, dev_kfree_skb_any(skb); } -static void +static bool nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, struct nfp_net_tx_ring *tx_ring, struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off, @@ -1473,13 +1474,13 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, if (unlikely(nfp_net_tx_full(tx_ring, 1))) { nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); - return; + return false; } new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr); if (unlikely(!new_frag)) { nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); - return; + return false; } nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); @@ -1509,6 +1510,7 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, tx_ring->wr_p++; tx_ring->wr_ptr_add++; + return true; } static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len) @@ -1613,12 +1615,15 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) case XDP_PASS: break; case XDP_TX: - nfp_net_tx_xdp_buf(nn, rx_ring, tx_ring, rxbuf, - pkt_off, pkt_len); + if (unlikely(!nfp_net_tx_xdp_buf(nn, rx_ring, + tx_ring, rxbuf, + pkt_off, pkt_len))) + trace_xdp_exception(nn->netdev, xdp_prog, act); continue; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(nn->netdev, xdp_prog, act); case XDP_DROP: nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 1a6ca4884fad..445d4d2492c3 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1016,6 +1017,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, /* We need the replacement buffer before transmit. */ if (qede_alloc_rx_buffer(rxq, true)) { qede_recycle_rx_bd_ring(rxq, 1); + trace_xdp_exception(edev->ndev, prog, act); return false; } @@ -1026,6 +1028,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(bd->data); + trace_xdp_exception(edev->ndev, prog, act); } /* Regardless, we've consumed an Rx BD */ @@ -1035,6 +1038,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(edev->ndev, prog, act); case XDP_DROP: qede_recycle_rx_bd_ring(rxq, cqe->bd_num); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 37db91d1a0a3..f9bf94887ff1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -330,7 +331,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } -static void virtnet_xdp_xmit(struct virtnet_info *vi, +static bool virtnet_xdp_xmit(struct virtnet_info *vi, struct receive_queue *rq, struct send_queue *sq, struct xdp_buff *xdp, @@ -382,10 +383,12 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi, put_page(page); } else /* small buffer */ kfree_skb(data); - return; // On error abort to avoid unnecessary kick + /* On error abort to avoid unnecessary kick */ + return false; } virtqueue_kick(sq->vq); + return true; } static u32 do_xdp_prog(struct virtnet_info *vi, @@ -421,11 +424,14 @@ static u32 do_xdp_prog(struct virtnet_info *vi, vi->xdp_queue_pairs + smp_processor_id(); xdp.data = buf; - virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data); + if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, + data))) + trace_xdp_exception(vi->dev, xdp_prog, act); return XDP_TX; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(vi->dev, xdp_prog, act); case XDP_DROP: return XDP_DROP; } diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h new file mode 100644 index 000000000000..b22efbdd2eb4 --- /dev/null +++ b/include/linux/bpf_trace.h @@ -0,0 +1,7 @@ +#ifndef __LINUX_BPF_TRACE_H__ +#define __LINUX_BPF_TRACE_H__ + +#include +#include + +#endif /* __LINUX_BPF_TRACE_H__ */ diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h new file mode 100644 index 000000000000..c3a53fd47ff1 --- /dev/null +++ b/include/trace/events/bpf.h @@ -0,0 +1,347 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf + +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_H + +#include +#include +#include +#include + +#define __PROG_TYPE_MAP(FN) \ + FN(SOCKET_FILTER) \ + FN(KPROBE) \ + FN(SCHED_CLS) \ + FN(SCHED_ACT) \ + FN(TRACEPOINT) \ + FN(XDP) \ + FN(PERF_EVENT) \ + FN(CGROUP_SKB) \ + FN(CGROUP_SOCK) \ + FN(LWT_IN) \ + FN(LWT_OUT) \ + FN(LWT_XMIT) + +#define __MAP_TYPE_MAP(FN) \ + FN(HASH) \ + FN(ARRAY) \ + FN(PROG_ARRAY) \ + FN(PERF_EVENT_ARRAY) \ + FN(PERCPU_HASH) \ + FN(PERCPU_ARRAY) \ + FN(STACK_TRACE) \ + FN(CGROUP_ARRAY) \ + FN(LRU_HASH) \ + FN(LRU_PERCPU_HASH) \ + FN(LPM_TRIE) + +#define __PROG_TYPE_TP_FN(x) \ + TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x); +#define __PROG_TYPE_SYM_FN(x) \ + { BPF_PROG_TYPE_##x, #x }, +#define __PROG_TYPE_SYM_TAB \ + __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 } +__PROG_TYPE_MAP(__PROG_TYPE_TP_FN) + +#define __MAP_TYPE_TP_FN(x) \ + TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x); +#define __MAP_TYPE_SYM_FN(x) \ + { BPF_MAP_TYPE_##x, #x }, +#define __MAP_TYPE_SYM_TAB \ + __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 } +__MAP_TYPE_MAP(__MAP_TYPE_TP_FN) + +DECLARE_EVENT_CLASS(bpf_prog_event, + + TP_PROTO(const struct bpf_prog *prg), + + TP_ARGS(prg), + + TP_STRUCT__entry( + __array(u8, prog_tag, 8) + __field(u32, type) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); + memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); + __entry->type = prg->type; + ), + + TP_printk("prog=%s type=%s", + __print_hex_str(__entry->prog_tag, 8), + __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB)) +); + +DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type, + + TP_PROTO(const struct bpf_prog *prg), + + TP_ARGS(prg) +); + +DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu, + + TP_PROTO(const struct bpf_prog *prg), + + TP_ARGS(prg) +); + +TRACE_EVENT(bpf_prog_load, + + TP_PROTO(const struct bpf_prog *prg, int ufd), + + TP_ARGS(prg, ufd), + + TP_STRUCT__entry( + __array(u8, prog_tag, 8) + __field(u32, type) + __field(int, ufd) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); + memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); + __entry->type = prg->type; + __entry->ufd = ufd; + ), + + TP_printk("prog=%s type=%s ufd=%d", + __print_hex_str(__entry->prog_tag, 8), + __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB), + __entry->ufd) +); + +TRACE_EVENT(bpf_map_create, + + TP_PROTO(const struct bpf_map *map, int ufd), + + TP_ARGS(map, ufd), + + TP_STRUCT__entry( + __field(u32, type) + __field(u32, size_key) + __field(u32, size_value) + __field(u32, max_entries) + __field(u32, flags) + __field(int, ufd) + ), + + TP_fast_assign( + __entry->type = map->map_type; + __entry->size_key = map->key_size; + __entry->size_value = map->value_size; + __entry->max_entries = map->max_entries; + __entry->flags = map->map_flags; + __entry->ufd = ufd; + ), + + TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x", + __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), + __entry->ufd, __entry->size_key, __entry->size_value, + __entry->max_entries, __entry->flags) +); + +DECLARE_EVENT_CLASS(bpf_obj_prog, + + TP_PROTO(const struct bpf_prog *prg, int ufd, + const struct filename *pname), + + TP_ARGS(prg, ufd, pname), + + TP_STRUCT__entry( + __array(u8, prog_tag, 8) + __field(int, ufd) + __string(path, pname->name) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); + memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); + __assign_str(path, pname->name); + __entry->ufd = ufd; + ), + + TP_printk("prog=%s path=%s ufd=%d", + __print_hex_str(__entry->prog_tag, 8), + __get_str(path), __entry->ufd) +); + +DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog, + + TP_PROTO(const struct bpf_prog *prg, int ufd, + const struct filename *pname), + + TP_ARGS(prg, ufd, pname) +); + +DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog, + + TP_PROTO(const struct bpf_prog *prg, int ufd, + const struct filename *pname), + + TP_ARGS(prg, ufd, pname) +); + +DECLARE_EVENT_CLASS(bpf_obj_map, + + TP_PROTO(const struct bpf_map *map, int ufd, + const struct filename *pname), + + TP_ARGS(map, ufd, pname), + + TP_STRUCT__entry( + __field(u32, type) + __field(int, ufd) + __string(path, pname->name) + ), + + TP_fast_assign( + __assign_str(path, pname->name); + __entry->type = map->map_type; + __entry->ufd = ufd; + ), + + TP_printk("map type=%s ufd=%d path=%s", + __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), + __entry->ufd, __get_str(path)) +); + +DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map, + + TP_PROTO(const struct bpf_map *map, int ufd, + const struct filename *pname), + + TP_ARGS(map, ufd, pname) +); + +DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map, + + TP_PROTO(const struct bpf_map *map, int ufd, + const struct filename *pname), + + TP_ARGS(map, ufd, pname) +); + +DECLARE_EVENT_CLASS(bpf_map_keyval, + + TP_PROTO(const struct bpf_map *map, int ufd, + const void *key, const void *val), + + TP_ARGS(map, ufd, key, val), + + TP_STRUCT__entry( + __field(u32, type) + __field(u32, key_len) + __dynamic_array(u8, key, map->key_size) + __field(bool, key_trunc) + __field(u32, val_len) + __dynamic_array(u8, val, map->value_size) + __field(bool, val_trunc) + __field(int, ufd) + ), + + TP_fast_assign( + memcpy(__get_dynamic_array(key), key, map->key_size); + memcpy(__get_dynamic_array(val), val, map->value_size); + __entry->type = map->map_type; + __entry->key_len = min(map->key_size, 16U); + __entry->key_trunc = map->key_size != __entry->key_len; + __entry->val_len = min(map->value_size, 16U); + __entry->val_trunc = map->value_size != __entry->val_len; + __entry->ufd = ufd; + ), + + TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]", + __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), + __entry->ufd, + __print_hex(__get_dynamic_array(key), __entry->key_len), + __entry->key_trunc ? " ..." : "", + __print_hex(__get_dynamic_array(val), __entry->val_len), + __entry->val_trunc ? " ..." : "") +); + +DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem, + + TP_PROTO(const struct bpf_map *map, int ufd, + const void *key, const void *val), + + TP_ARGS(map, ufd, key, val) +); + +DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem, + + TP_PROTO(const struct bpf_map *map, int ufd, + const void *key, const void *val), + + TP_ARGS(map, ufd, key, val) +); + +TRACE_EVENT(bpf_map_delete_elem, + + TP_PROTO(const struct bpf_map *map, int ufd, + const void *key), + + TP_ARGS(map, ufd, key), + + TP_STRUCT__entry( + __field(u32, type) + __field(u32, key_len) + __dynamic_array(u8, key, map->key_size) + __field(bool, key_trunc) + __field(int, ufd) + ), + + TP_fast_assign( + memcpy(__get_dynamic_array(key), key, map->key_size); + __entry->type = map->map_type; + __entry->key_len = min(map->key_size, 16U); + __entry->key_trunc = map->key_size != __entry->key_len; + __entry->ufd = ufd; + ), + + TP_printk("map type=%s ufd=%d key=[%s%s]", + __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), + __entry->ufd, + __print_hex(__get_dynamic_array(key), __entry->key_len), + __entry->key_trunc ? " ..." : "") +); + +TRACE_EVENT(bpf_map_next_key, + + TP_PROTO(const struct bpf_map *map, int ufd, + const void *key, const void *key_next), + + TP_ARGS(map, ufd, key, key_next), + + TP_STRUCT__entry( + __field(u32, type) + __field(u32, key_len) + __dynamic_array(u8, key, map->key_size) + __dynamic_array(u8, nxt, map->key_size) + __field(bool, key_trunc) + __field(int, ufd) + ), + + TP_fast_assign( + memcpy(__get_dynamic_array(key), key, map->key_size); + memcpy(__get_dynamic_array(nxt), key_next, map->key_size); + __entry->type = map->map_type; + __entry->key_len = min(map->key_size, 16U); + __entry->key_trunc = map->key_size != __entry->key_len; + __entry->ufd = ufd; + ), + + TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", + __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), + __entry->ufd, + __print_hex(__get_dynamic_array(key), __entry->key_len), + __entry->key_trunc ? " ..." : "", + __print_hex(__get_dynamic_array(nxt), __entry->key_len), + __entry->key_trunc ? " ..." : "") +); + +#endif /* _TRACE_BPF_H */ + +#include diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h new file mode 100644 index 000000000000..1b61357d3f57 --- /dev/null +++ b/include/trace/events/xdp.h @@ -0,0 +1,53 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xdp + +#if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_XDP_H + +#include +#include +#include + +#define __XDP_ACT_MAP(FN) \ + FN(ABORTED) \ + FN(DROP) \ + FN(PASS) \ + FN(TX) + +#define __XDP_ACT_TP_FN(x) \ + TRACE_DEFINE_ENUM(XDP_##x); +#define __XDP_ACT_SYM_FN(x) \ + { XDP_##x, #x }, +#define __XDP_ACT_SYM_TAB \ + __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, 0 } +__XDP_ACT_MAP(__XDP_ACT_TP_FN) + +TRACE_EVENT(xdp_exception, + + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, u32 act), + + TP_ARGS(dev, xdp, act), + + TP_STRUCT__entry( + __string(name, dev->name) + __array(u8, prog_tag, 8) + __field(u32, act) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); + memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __assign_str(name, dev->name); + __entry->act = act; + ), + + TP_printk("prog=%s device=%s action=%s", + __print_hex_str(__entry->prog_tag, 8), + __get_str(name), + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) +); + +#endif /* _TRACE_XDP_H */ + +#include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 503d4211988a..fddd76b1b627 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1173,3 +1173,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, { return -EFAULT; } + +/* All definitions of tracepoints related to BPF. */ +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); +EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 0b030c9126d3..fddcae801724 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include enum bpf_type { BPF_TYPE_UNSPEC = 0, @@ -281,6 +282,13 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) ret = bpf_obj_do_pin(pname, raw, type); if (ret != 0) bpf_any_put(raw, type); + if ((trace_bpf_obj_pin_prog_enabled() || + trace_bpf_obj_pin_map_enabled()) && !ret) { + if (type == BPF_TYPE_PROG) + trace_bpf_obj_pin_prog(raw, ufd, pname); + if (type == BPF_TYPE_MAP) + trace_bpf_obj_pin_map(raw, ufd, pname); + } out: putname(pname); return ret; @@ -342,8 +350,15 @@ int bpf_obj_get_user(const char __user *pathname) else goto out; - if (ret < 0) + if (ret < 0) { bpf_any_put(raw, type); + } else if (trace_bpf_obj_get_prog_enabled() || + trace_bpf_obj_get_map_enabled()) { + if (type == BPF_TYPE_PROG) + trace_bpf_obj_get_prog(raw, ret, pname); + if (type == BPF_TYPE_MAP) + trace_bpf_obj_get_map(raw, ret, pname); + } out: putname(pname); return ret; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d6b29e4e2c3..05ad086ab71d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -10,6 +10,7 @@ * General Public License for more details. */ #include +#include #include #include #include @@ -215,6 +216,7 @@ static int map_create(union bpf_attr *attr) /* failed to allocate fd */ goto free_map; + trace_bpf_map_create(map, err); return err; free_map: @@ -339,6 +341,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; + trace_bpf_map_lookup_elem(map, ufd, key, value); err = 0; free_value: @@ -421,6 +424,8 @@ static int map_update_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); + if (!err) + trace_bpf_map_update_elem(map, ufd, key, value); free_value: kfree(value); free_key: @@ -466,6 +471,8 @@ static int map_delete_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); + if (!err) + trace_bpf_map_delete_elem(map, ufd, key); free_key: kfree(key); err_put: @@ -518,6 +525,7 @@ static int map_get_next_key(union bpf_attr *attr) if (copy_to_user(unext_key, next_key, map->key_size) != 0) goto free_next_key; + trace_bpf_map_next_key(map, ufd, key, next_key); err = 0; free_next_key: @@ -671,8 +679,10 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) void bpf_prog_put(struct bpf_prog *prog) { - if (atomic_dec_and_test(&prog->aux->refcnt)) + if (atomic_dec_and_test(&prog->aux->refcnt)) { + trace_bpf_prog_put_rcu(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + } } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -781,7 +791,11 @@ struct bpf_prog *bpf_prog_get(u32 ufd) struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - return __bpf_prog_get(ufd, &type); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type); + + if (!IS_ERR(prog)) + trace_bpf_prog_get_type(prog); + return prog; } EXPORT_SYMBOL_GPL(bpf_prog_get_type); @@ -863,6 +877,7 @@ static int bpf_prog_load(union bpf_attr *attr) /* failed to allocate fd */ goto free_used_maps; + trace_bpf_prog_load(prog, err); return err; free_used_maps: -- cgit v1.2.3 From 63dfef75ed75364901d7caa52c6420cec3e73519 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sat, 4 Feb 2017 08:37:29 -0800 Subject: bpf: enable verifier to add 0 to packet ptr The patch fixes the case when adding a zero value to the packet pointer. The zero value could come from src_reg equals type BPF_K or CONST_IMM. The patch fixes both, otherwise the verifer reports the following error: [...] R0=imm0,min_value=0,max_value=0 R1=pkt(id=0,off=0,r=4) R2=pkt_end R3=fp-12 R4=imm4,min_value=4,max_value=4 R5=pkt(id=0,off=4,r=4) 269: (bf) r2 = r0 // r2 becomes imm0 270: (77) r2 >>= 3 271: (bf) r4 = r1 // r4 becomes pkt ptr 272: (0f) r4 += r2 // r4 += 0 addition of negative constant to packet pointer is not allowed Signed-off-by: William Tu Signed-off-by: Mihai Budiu Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fb3513b35c0b..1a754e5d2695 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1397,7 +1397,7 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env, imm = insn->imm; add_imm: - if (imm <= 0) { + if (imm < 0) { verbose("addition of negative constant to packet pointer is not allowed\n"); return -EACCES; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df194e1d56c2..71f6407cde60 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2403,6 +2403,29 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), + BPF_MOV64_IMM(BPF_REG_5, 12), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { -- cgit v1.2.3 From c502faf94153bd0fedc5389a936f728a659cc6ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 8 Feb 2017 01:19:43 +0100 Subject: bpf, lpm: fix overflows in trie_alloc checks Cap the maximum (total) value size and bail out if larger than KMALLOC_MAX_SIZE as otherwise it doesn't make any sense to proceed further, since we're guaranteed to fail to allocate elements anyway in lpm_trie_node_alloc(); likleyhood of failure is still high for large values, though, similarly as with htab case in non-prealloc. Next, make sure that cost vars are really u64 instead of size_t, so that we don't overflow on 32 bit and charge only tiny map.pages against memlock while allowing huge max_entries; cap also the max cost like we do with other map types. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 144e9763102f..e0f6a0bd279b 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -394,10 +394,21 @@ static int trie_delete_elem(struct bpf_map *map, void *key) return -ENOSYS; } +#define LPM_DATA_SIZE_MAX 256 +#define LPM_DATA_SIZE_MIN 1 + +#define LPM_VAL_SIZE_MAX (KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \ + sizeof(struct lpm_trie_node)) +#define LPM_VAL_SIZE_MIN 1 + +#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X)) +#define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) +#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) + static struct bpf_map *trie_alloc(union bpf_attr *attr) { - size_t cost, cost_per_node; struct lpm_trie *trie; + u64 cost = sizeof(*trie), cost_per_node; int ret; if (!capable(CAP_SYS_ADMIN)) @@ -406,9 +417,10 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->map_flags != BPF_F_NO_PREALLOC || - attr->key_size < sizeof(struct bpf_lpm_trie_key) + 1 || - attr->key_size > sizeof(struct bpf_lpm_trie_key) + 256 || - attr->value_size == 0) + attr->key_size < LPM_KEY_SIZE_MIN || + attr->key_size > LPM_KEY_SIZE_MAX || + attr->value_size < LPM_VAL_SIZE_MIN || + attr->value_size > LPM_VAL_SIZE_MAX) return ERR_PTR(-EINVAL); trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN); @@ -426,18 +438,24 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) cost_per_node = sizeof(struct lpm_trie_node) + attr->value_size + trie->data_size; - cost = sizeof(*trie) + attr->max_entries * cost_per_node; + cost += (u64) attr->max_entries * cost_per_node; + if (cost >= U32_MAX - PAGE_SIZE) { + ret = -E2BIG; + goto out_err; + } + trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; ret = bpf_map_precharge_memlock(trie->map.pages); - if (ret) { - kfree(trie); - return ERR_PTR(ret); - } + if (ret) + goto out_err; raw_spin_lock_init(&trie->lock); return &trie->map; +out_err: + kfree(trie); + return ERR_PTR(ret); } static void trie_free(struct bpf_map *map) -- cgit v1.2.3 From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Feb 2017 20:28:24 -0800 Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command to the given cgroup the descendent cgroup will be able to override effective bpf program that was inherited from this cgroup. By default it's not passed, therefore override is disallowed. Examples: 1. prog X attached to /A with default prog Y fails to attach to /A/B and /A/B/C Everything under /A runs prog X 2. prog X attached to /A with allow_override. prog Y fails to attach to /A/B with default (non-override) prog M attached to /A/B with allow_override. Everything under /A/B runs prog M only. 3. prog X attached to /A with allow_override. prog Y fails to attach to /A with default. The user has to detach first to switch the mode. In the future this behavior may be extended with a chain of non-overridable programs. Also fix the bug where detach from cgroup where nothing is attached was not throwing error. Return ENOENT in such case. Add several testcases and adjust libbpf. Fixes: 3007098494be ("cgroup: add support for eBPF programs") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: Daniel Mack Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 13 ++++---- include/uapi/linux/bpf.h | 7 +++++ kernel/bpf/cgroup.c | 59 +++++++++++++++++++++++++++------- kernel/bpf/syscall.c | 20 ++++++++---- kernel/cgroup.c | 9 +++--- samples/bpf/test_cgrp2_attach.c | 2 +- samples/bpf/test_cgrp2_attach2.c | 68 +++++++++++++++++++++++++++++++++++++--- samples/bpf/test_cgrp2_sock.c | 2 +- samples/bpf/test_cgrp2_sock2.c | 2 +- tools/lib/bpf/bpf.c | 4 ++- tools/lib/bpf/bpf.h | 3 +- 11 files changed, 151 insertions(+), 38 deletions(-) (limited to 'kernel/bpf') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 92bc89ae7e20..c970a25d2a49 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -21,20 +21,19 @@ struct cgroup_bpf { */ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; + bool disallow_override[MAX_BPF_ATTACH_TYPE]; }; void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type); +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool overridable); /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type); +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eb0e87dbe9f..d2b0ac799d03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -116,6 +116,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -171,6 +177,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a515f7b007c6..da0f53690295 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) e = rcu_dereference_protected(parent->bpf.effective[type], lockdep_is_held(&cgroup_mutex)); rcu_assign_pointer(cgrp->bpf.effective[type], e); + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; } } @@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) * * Must be called with cgroup_mutex held. */ -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type) +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool new_overridable) { - struct bpf_prog *old_prog, *effective; + struct bpf_prog *old_prog, *effective = NULL; struct cgroup_subsys_state *pos; + bool overridable = true; - old_prog = xchg(cgrp->bpf.prog + type, prog); + if (parent) { + overridable = !parent->bpf.disallow_override[type]; + effective = rcu_dereference_protected(parent->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + } + + if (prog && effective && !overridable) + /* if parent has non-overridable prog attached, disallow + * attaching new programs to descendent cgroup + */ + return -EPERM; + + if (prog && effective && overridable != new_overridable) + /* if parent has overridable prog attached, only + * allow overridable programs in descendent cgroup + */ + return -EPERM; - effective = (!prog && parent) ? - rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)) : - prog; + old_prog = cgrp->bpf.prog[type]; + + if (prog) { + overridable = new_overridable; + effective = prog; + if (old_prog && + cgrp->bpf.disallow_override[type] == new_overridable) + /* disallow attaching non-overridable on top + * of existing overridable in this cgroup + * and vice versa + */ + return -EPERM; + } + + if (!prog && !old_prog) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + + cgrp->bpf.prog[type] = prog; css_for_each_descendant_pre(pos, &cgrp->self) { struct cgroup *desc = container_of(pos, struct cgroup, self); /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) + if (desc->bpf.prog[type] && desc != cgrp) { pos = css_rightmost_descendant(pos); - else + } else { rcu_assign_pointer(desc->bpf.effective[type], effective); + desc->bpf.disallow_override[type] = !overridable; + } } if (prog) @@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + return 0; } /** diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 19b6129eab23..bbb016adbaeb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr) #ifdef CONFIG_CGROUP_BPF -#define BPF_PROG_ATTACH_LAST_FIELD attach_type +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags static int bpf_prog_attach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; struct bpf_prog *prog; struct cgroup *cgrp; - enum bpf_prog_type ptype; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + return -EINVAL; + switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: @@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - cgroup_bpf_update(cgrp, prog, attr->attach_type); + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + if (ret) + bpf_prog_put(prog); cgroup_put(cgrp); - return 0; + return ret; } #define BPF_PROG_DETACH_LAST_FIELD attach_type @@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { struct cgroup *cgrp; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - cgroup_bpf_update(cgrp, NULL, attr->attach_type); + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); cgroup_put(cgrp); break; @@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return -EINVAL; } - return 0; + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 688dd02af985..53bbca7c4859 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void) subsys_initcall(cgroup_namespaces_init); #ifdef CONFIG_CGROUP_BPF -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type) +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable) { struct cgroup *parent = cgroup_parent(cgrp); + int ret; mutex_lock(&cgroup_mutex); - __cgroup_bpf_update(cgrp, parent, prog, type); + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); mutex_unlock(&cgroup_mutex); + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 504058631ffc..4bfcaf93fcf3 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, type); + ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 6e69be37f87f..3049b1f26267 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -79,11 +79,12 @@ int main(int argc, char **argv) if (join_cgroup(FOO)) goto err; - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo"); goto err; } + printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); assert(system(PING_CMD) != 0); /* Create cgroup /foo/bar, get fd, and join it */ @@ -94,24 +95,27 @@ int main(int argc, char **argv) if (join_cgroup(BAR)) goto err; + printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } + printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { log_err("Detaching program from /foo/bar"); goto err; } + printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" + "This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -121,8 +125,60 @@ int main(int argc, char **argv) goto err; } + printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" + "This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching prog to /foo/bar"); + goto err; + } + + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo/bar"); + goto err; + } + + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { + errno = 0; + log_err("Unexpected success in double detach from /foo"); + goto err; + } + + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching non-overridable prog to /foo"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo"); + goto err; + } + + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching different non-overridable prog to /foo"); + goto err; + } + goto out; err: @@ -132,5 +188,9 @@ out: close(foo); close(bar); cleanup_cgroup_environment(); + if (!rc) + printf("PASS\n"); + else + printf("FAIL\n"); return rc; } diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index 0791b949cbe4..c3cfb23e23b5 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -75,7 +75,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index 455ef0d06e93..db036077b644 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) } ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, - BPF_CGROUP_INET_SOCK_CREATE); + BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3ddb58a36d3c..ae752fa4eaa7 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; @@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; + attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2f9853dd882..4ac6c4b84100 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, + unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -- cgit v1.2.3 From 7e57fbb2a341b5d44d30e71a6d782c5e6dbc429c Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Tue, 14 Feb 2017 00:02:35 +0100 Subject: bpf: reduce compiler warnings by adding fallthrough comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warnings: kernel/bpf/verifier.c: In function ‘may_access_direct_pkt_data’: kernel/bpf/verifier.c:702:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (t == BPF_WRITE) ^ kernel/bpf/verifier.c:704:2: note: here case BPF_PROG_TYPE_SCHED_CLS: ^~~~ kernel/bpf/verifier.c: In function ‘reg_set_min_max_inv’: kernel/bpf/verifier.c:2057:23: warning: this statement may fall through [-Wimplicit-fallthrough=] true_reg->min_value = 0; ~~~~~~~~~~~~~~~~~~~~^~~ kernel/bpf/verifier.c:2058:2: note: here case BPF_JSGT: ^~~~ kernel/bpf/verifier.c:2068:23: warning: this statement may fall through [-Wimplicit-fallthrough=] true_reg->min_value = 0; ~~~~~~~~~~~~~~~~~~~~^~~ kernel/bpf/verifier.c:2069:2: note: here case BPF_JSGE: ^~~~ kernel/bpf/verifier.c: In function ‘reg_set_min_max’: kernel/bpf/verifier.c:2009:24: warning: this statement may fall through [-Wimplicit-fallthrough=] false_reg->min_value = 0; ~~~~~~~~~~~~~~~~~~~~~^~~ kernel/bpf/verifier.c:2010:2: note: here case BPF_JSGT: ^~~~ kernel/bpf/verifier.c:2019:24: warning: this statement may fall through [-Wimplicit-fallthrough=] false_reg->min_value = 0; ~~~~~~~~~~~~~~~~~~~~~^~~ kernel/bpf/verifier.c:2020:2: note: here case BPF_JSGE: ^~~~ Reported-by: David Binderman Signed-off-by: Alexander Alemayhu Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1a754e5d2695..d2bded2b250c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -701,6 +701,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, /* dst_input() and dst_output() can't write for now */ if (t == BPF_WRITE) return false; + /* fallthrough */ case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: @@ -2007,6 +2008,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JGT: /* Unsigned comparison, the minimum value is 0. */ false_reg->min_value = 0; + /* fallthrough */ case BPF_JSGT: /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. @@ -2017,6 +2019,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JGE: /* Unsigned comparison, the minimum value is 0. */ false_reg->min_value = 0; + /* fallthrough */ case BPF_JSGE: /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. @@ -2055,6 +2058,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, case BPF_JGT: /* Unsigned comparison, the minimum value is 0. */ true_reg->min_value = 0; + /* fallthrough */ case BPF_JSGT: /* * If this is false, then the val is <= the register, if it is @@ -2066,6 +2070,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, case BPF_JGE: /* Unsigned comparison, the minimum value is 0. */ true_reg->min_value = 0; + /* fallthrough */ case BPF_JSGE: /* If this is false then constant < register, if it is true then * the register < constant. -- cgit v1.2.3 From c78f8bdfa11fcceb9723c61212e4bd8f76c87f9e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:48 +0100 Subject: bpf: mark all registered map/prog types as __ro_after_init All map types and prog types are registered to the BPF core through bpf_register_map_type() and bpf_register_prog_type() during init and remain unchanged thereafter. As by design we don't (and never will) have any pluggable code that can register to that at any later point in time, lets mark all the existing bpf_{map,prog}_type_list objects in the tree as __ro_after_init, so they can be moved to read-only section from then onwards. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 10 +++++----- kernel/bpf/hashtab.c | 8 ++++---- kernel/bpf/lpm_trie.c | 2 +- kernel/bpf/stackmap.c | 2 +- kernel/trace/bpf_trace.c | 6 +++--- net/core/filter.c | 18 +++++++++--------- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d55d95dcf49..6b6f41f0b211 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -269,7 +269,7 @@ static const struct bpf_map_ops array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list array_type __read_mostly = { +static struct bpf_map_type_list array_type __ro_after_init = { .ops = &array_ops, .type = BPF_MAP_TYPE_ARRAY, }; @@ -283,7 +283,7 @@ static const struct bpf_map_ops percpu_array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list percpu_array_type __read_mostly = { +static struct bpf_map_type_list percpu_array_type __ro_after_init = { .ops = &percpu_array_ops, .type = BPF_MAP_TYPE_PERCPU_ARRAY, }; @@ -409,7 +409,7 @@ static const struct bpf_map_ops prog_array_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, }; -static struct bpf_map_type_list prog_array_type __read_mostly = { +static struct bpf_map_type_list prog_array_type __ro_after_init = { .ops = &prog_array_ops, .type = BPF_MAP_TYPE_PROG_ARRAY, }; @@ -522,7 +522,7 @@ static const struct bpf_map_ops perf_event_array_ops = { .map_release = perf_event_fd_array_release, }; -static struct bpf_map_type_list perf_event_array_type __read_mostly = { +static struct bpf_map_type_list perf_event_array_type __ro_after_init = { .ops = &perf_event_array_ops, .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; @@ -564,7 +564,7 @@ static const struct bpf_map_ops cgroup_array_ops = { .map_fd_put_ptr = cgroup_fd_array_put_ptr, }; -static struct bpf_map_type_list cgroup_array_type __read_mostly = { +static struct bpf_map_type_list cgroup_array_type __ro_after_init = { .ops = &cgroup_array_ops, .type = BPF_MAP_TYPE_CGROUP_ARRAY, }; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index a753bbe7df0a..3ea87fb19a94 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1023,7 +1023,7 @@ static const struct bpf_map_ops htab_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_type __read_mostly = { +static struct bpf_map_type_list htab_type __ro_after_init = { .ops = &htab_ops, .type = BPF_MAP_TYPE_HASH, }; @@ -1037,7 +1037,7 @@ static const struct bpf_map_ops htab_lru_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_type __read_mostly = { +static struct bpf_map_type_list htab_lru_type __ro_after_init = { .ops = &htab_lru_ops, .type = BPF_MAP_TYPE_LRU_HASH, }; @@ -1124,7 +1124,7 @@ static const struct bpf_map_ops htab_percpu_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_percpu_type __ro_after_init = { .ops = &htab_percpu_ops, .type = BPF_MAP_TYPE_PERCPU_HASH, }; @@ -1138,7 +1138,7 @@ static const struct bpf_map_ops htab_lru_percpu_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { .ops = &htab_lru_percpu_ops, .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, }; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0f6a0bd279b..8bfe0afaee10 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -508,7 +508,7 @@ static const struct bpf_map_ops trie_ops = { .map_delete_elem = trie_delete_elem, }; -static struct bpf_map_type_list trie_type __read_mostly = { +static struct bpf_map_type_list trie_type __ro_after_init = { .ops = &trie_ops, .type = BPF_MAP_TYPE_LPM_TRIE, }; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index be8519148c25..22aa45cd0324 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -273,7 +273,7 @@ static const struct bpf_map_ops stack_map_ops = { .map_delete_elem = stack_map_delete_elem, }; -static struct bpf_map_type_list stack_map_type __read_mostly = { +static struct bpf_map_type_list stack_map_type __ro_after_init = { .ops = &stack_map_ops, .type = BPF_MAP_TYPE_STACK_TRACE, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 424daa4586d1..cee9802cf3e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -506,7 +506,7 @@ static const struct bpf_verifier_ops kprobe_prog_ops = { .is_valid_access = kprobe_prog_is_valid_access, }; -static struct bpf_prog_type_list kprobe_tl = { +static struct bpf_prog_type_list kprobe_tl __ro_after_init = { .ops = &kprobe_prog_ops, .type = BPF_PROG_TYPE_KPROBE, }; @@ -589,7 +589,7 @@ static const struct bpf_verifier_ops tracepoint_prog_ops = { .is_valid_access = tp_prog_is_valid_access, }; -static struct bpf_prog_type_list tracepoint_tl = { +static struct bpf_prog_type_list tracepoint_tl __ro_after_init = { .ops = &tracepoint_prog_ops, .type = BPF_PROG_TYPE_TRACEPOINT, }; @@ -648,7 +648,7 @@ static const struct bpf_verifier_ops perf_event_prog_ops = { .convert_ctx_access = pe_prog_convert_ctx_access, }; -static struct bpf_prog_type_list perf_event_tl = { +static struct bpf_prog_type_list perf_event_tl __ro_after_init = { .ops = &perf_event_prog_ops, .type = BPF_PROG_TYPE_PERF_EVENT, }; diff --git a/net/core/filter.c b/net/core/filter.c index 0b753cbb2536..e466e0040137 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3296,47 +3296,47 @@ static const struct bpf_verifier_ops cg_sock_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __read_mostly = { +static struct bpf_prog_type_list sk_filter_type __ro_after_init = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static struct bpf_prog_type_list sched_cls_type __read_mostly = { +static struct bpf_prog_type_list sched_cls_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; -static struct bpf_prog_type_list sched_act_type __read_mostly = { +static struct bpf_prog_type_list sched_act_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -static struct bpf_prog_type_list xdp_type __read_mostly = { +static struct bpf_prog_type_list xdp_type __ro_after_init = { .ops = &xdp_ops, .type = BPF_PROG_TYPE_XDP, }; -static struct bpf_prog_type_list cg_skb_type __read_mostly = { +static struct bpf_prog_type_list cg_skb_type __ro_after_init = { .ops = &cg_skb_ops, .type = BPF_PROG_TYPE_CGROUP_SKB, }; -static struct bpf_prog_type_list lwt_in_type __read_mostly = { +static struct bpf_prog_type_list lwt_in_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_IN, }; -static struct bpf_prog_type_list lwt_out_type __read_mostly = { +static struct bpf_prog_type_list lwt_out_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_OUT, }; -static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { +static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { .ops = &lwt_xmit_ops, .type = BPF_PROG_TYPE_LWT_XMIT, }; -static struct bpf_prog_type_list cg_sock_type __read_mostly = { +static struct bpf_prog_type_list cg_sock_type __ro_after_init = { .ops = &cg_sock_ops, .type = BPF_PROG_TYPE_CGROUP_SOCK }; -- cgit v1.2.3 From 9383191da4e40360a5d880fbe6bb03911c61621b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:49 +0100 Subject: bpf: remove stubs for cBPF from arch code Remove the dummy bpf_jit_compile() stubs for eBPF JITs and make that a single __weak function in the core that can be overridden similarly to the eBPF one. Also remove stale pr_err() mentions of bpf_jit_compile. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 5 ----- arch/powerpc/net/bpf_jit_comp64.c | 2 -- arch/s390/net/bpf_jit_comp.c | 8 -------- arch/x86/net/bpf_jit_comp.c | 8 ++------ include/linux/filter.h | 6 +----- kernel/bpf/core.c | 12 +++++++++++- 6 files changed, 14 insertions(+), 27 deletions(-) (limited to 'kernel/bpf') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b2fc97a2c56c..c444408d5a8c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -813,11 +813,6 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 73a5cf18fd84..f9ebd02260da 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -961,8 +961,6 @@ common_load: return 0; } -void bpf_jit_compile(struct bpf_prog *fp) { } - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 167b31b186c1..6454efd22e63 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1262,14 +1262,6 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) return 0; } -/* - * Classic BPF function stub. BPF programs will be converted into - * eBPF and then bpf_int_jit_compile() will be called. - */ -void bpf_jit_compile(struct bpf_prog *fp) -{ -} - /* * Compile eBPF program "fp" */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bb660e53cbd6..26123d0ae13a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1067,13 +1067,13 @@ common_load: ilen = prog - temp; if (ilen > BPF_MAX_INSN_SIZE) { - pr_err("bpf_jit_compile fatal insn size error\n"); + pr_err("bpf_jit: fatal insn size error\n"); return -EFAULT; } if (image) { if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpf_jit_compile fatal error\n"); + pr_err("bpf_jit: fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); @@ -1085,10 +1085,6 @@ common_load: return proglen; } -void bpf_jit_compile(struct bpf_prog *prog) -{ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; diff --git a/include/linux/filter.h b/include/linux/filter.h index e4eb2546339a..c7a70e0cc3a0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,6 +607,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, @@ -625,7 +626,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); -void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); @@ -669,10 +669,6 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } #else -static inline void bpf_jit_compile(struct bpf_prog *fp) -{ -} - static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fddd76b1b627..2831ba1e71c1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1154,12 +1154,22 @@ const struct bpf_func_proto bpf_tail_call_proto = { .arg3_type = ARG_ANYTHING, }; -/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ +/* Stub for JITs that only support cBPF. eBPF programs are interpreted. + * It is encouraged to implement bpf_int_jit_compile() instead, so that + * eBPF and implicitly also cBPF can get JITed! + */ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) { return prog; } +/* Stub for JITs that support eBPF. All cBPF code gets transformed into + * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). + */ +void __weak bpf_jit_compile(struct bpf_prog *prog) +{ +} + bool __weak bpf_helper_changes_pkt_data(void *func) { return false; -- cgit v1.2.3 From 74451e66d516c55e309e8d89a4a1e7596e46aacd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:50 +0100 Subject: bpf: make jited programs visible in traces Long standing issue with JITed programs is that stack traces from function tracing check whether a given address is kernel code through {__,}kernel_text_address(), which checks for code in core kernel, modules and dynamically allocated ftrace trampolines. But what is still missing is BPF JITed programs (interpreted programs are not an issue as __bpf_prog_run() will be attributed to them), thus when a stack trace is triggered, the code walking the stack won't see any of the JITed ones. The same for address correlation done from user space via reading /proc/kallsyms. This is read by tools like perf, but the latter is also useful for permanent live tracing with eBPF itself in combination with stack maps when other eBPF types are part of the callchain. See offwaketime example on dumping stack from a map. This work tries to tackle that issue by making the addresses and symbols known to the kernel. The lookup from *kernel_text_address() is implemented through a latched RB tree that can be read under RCU in fast-path that is also shared for symbol/size/offset lookup for a specific given address in kallsyms. The slow-path iteration through all symbols in the seq file done via RCU list, which holds a tiny fraction of all exported ksyms, usually below 0.1 percent. Function symbols are exported as bpf_prog_, in order to aide debugging and attribution. This facility is currently enabled for root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening is active in any mode. The rationale behind this is that still a lot of systems ship with world read permissions on kallsyms thus addresses should not get suddenly exposed for them. If that situation gets much better in future, we always have the option to change the default on this. Likewise, unprivileged programs are not allowed to add entries there either, but that is less of a concern as most such programs types relevant in this context are for root-only anyway. If enabled, call graphs and stack traces will then show a correct attribution; one example is illustrated below, where the trace is now visible in tooling such as perf script --kallsyms=/proc/kallsyms and friends. Before: 7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so) After: 7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux) [...] 7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 12 ++ arch/arm64/net/bpf_jit_comp.c | 15 --- arch/powerpc/net/bpf_jit_comp64.c | 1 + arch/s390/net/bpf_jit_comp.c | 18 --- arch/x86/net/bpf_jit_comp.c | 15 --- include/linux/bpf.h | 4 + include/linux/filter.h | 112 ++++++++++++++++++- kernel/bpf/core.c | 223 ++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 2 + kernel/extable.c | 9 +- kernel/kallsyms.c | 61 +++++++++-- net/Kconfig | 3 +- net/core/sysctl_net_core.c | 7 ++ 13 files changed, 419 insertions(+), 63 deletions(-) (limited to 'kernel/bpf') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index b80fbd4e5575..2ebabc93014a 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,6 +54,18 @@ Values : 1 - enable JIT hardening for unprivileged users only 2 - enable JIT hardening for all users +bpf_jit_kallsyms +---------------- + +When Berkeley Packet Filter Just in Time compiler is enabled, then compiled +images are unknown addresses to the kernel, meaning they neither show up in +traces nor in /proc/kallsyms. This enables export of these addresses, which +can be used for debugging/tracing. If bpf_jit_harden is enabled, this feature +is disabled. +Values : + 0 - disable JIT kallsyms export (default value) + 1 - enable JIT kallsyms export for privileged users only + dev_weight -------------- diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c444408d5a8c..05d12104d270 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -910,18 +910,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *prog) -{ - unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!prog->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(prog); -} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f9ebd02260da..c34166ef76fc 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1064,6 +1064,7 @@ out: return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 6454efd22e63..f1d0e62ec1dd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1339,21 +1339,3 @@ out: tmp : orig_fp); return fp; } - -/* - * Free eBPF program - */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 26123d0ae13a..18a62e208826 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1180,18 +1180,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 57d60dc5b600..909fc033173a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -8,10 +8,12 @@ #define _LINUX_BPF_H 1 #include + #include #include #include #include +#include struct perf_event; struct bpf_map; @@ -177,6 +179,8 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + struct latch_tree_node ksym_tnode; + struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/filter.h b/include/linux/filter.h index c7a70e0cc3a0..0c1cc9143cb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -54,6 +54,12 @@ struct bpf_prog_aux; #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +/* As per nm, we expose JITed images as text (code) section for + * kallsyms. That way, tools like perf can find it to match + * addresses. + */ +#define BPF_SYM_ELF_TYPE 't' + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { set_memory_rw((unsigned long)fp, fp->pages); } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ + set_memory_rw((unsigned long)hdr, hdr->pages); +} #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { @@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ +} #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +static inline struct bpf_binary_header * +bpf_jit_binary_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr = real_start & PAGE_MASK; + + return (void *)addr; +} + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { @@ -617,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; +extern int bpf_jit_kallsyms; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); @@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return fp->jited && bpf_jit_is_ebpf(); +} + static inline bool bpf_jit_blinding_enabled(void) { /* These are the prerequisites, should someone ever have the @@ -668,11 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } -#else + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + /* There are a couple of corner cases where kallsyms should + * not be enabled f.e. on hardening. + */ + if (bpf_jit_harden) + return false; + if (!bpf_jit_kallsyms) + return false; + if (bpf_jit_kallsyms == 1) + return true; + + return false; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym); +bool is_bpf_text_address(unsigned long addr); +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym); + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + const char *ret = __bpf_address_lookup(addr, size, off, sym); + + if (ret && modname) + *modname = NULL; + return ret; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp); +void bpf_prog_kallsyms_del(struct bpf_prog *fp); + +#else /* CONFIG_BPF_JIT */ + +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return false; +} + static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); } + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + return false; +} + +static inline const char * +__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + return NULL; +} + +static inline bool is_bpf_text_address(unsigned long addr) +{ + return false; +} + +static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *sym) +{ + return -ERANGE; +} + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} + +static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ +} #endif /* CONFIG_BPF_JIT */ #define BPF_ANC BIT(15) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2831ba1e71c1..f45827e205d3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include @@ -95,6 +98,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->aux = aux; fp->aux->prog = fp; + INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); + return fp; } EXPORT_SYMBOL_GPL(bpf_prog_alloc); @@ -290,6 +295,206 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +static __always_inline void +bpf_get_prog_addr_region(const struct bpf_prog *prog, + unsigned long *symbol_start, + unsigned long *symbol_end) +{ + const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); + unsigned long addr = (unsigned long)hdr; + + WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); + + *symbol_start = addr; + *symbol_end = addr + hdr->pages * PAGE_SIZE; +} + +static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + BUILD_BUG_ON(sizeof("bpf_prog_") + + sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN); + + sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); + sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); + *sym = 0; +} + +static __always_inline unsigned long +bpf_get_prog_addr_start(struct latch_tree_node *n) +{ + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + return symbol_start; +} + +static __always_inline bool bpf_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); +} + +static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + if (val < symbol_start) + return -1; + if (val >= symbol_end) + return 1; + + return 0; +} + +static const struct latch_tree_ops bpf_tree_ops = { + .less = bpf_tree_less, + .comp = bpf_tree_comp, +}; + +static DEFINE_SPINLOCK(bpf_lock); +static LIST_HEAD(bpf_kallsyms); +static struct latch_tree_root bpf_tree __cacheline_aligned; + +int bpf_jit_kallsyms __read_mostly; + +static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) +{ + WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); + list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); + latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); +} + +static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) +{ + if (list_empty(&aux->ksym_lnode)) + return; + + latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); + list_del_rcu(&aux->ksym_lnode); +} + +static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) +{ + return fp->jited && !bpf_prog_was_classic(fp); +} + +static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym_lnode) || + fp->aux->ksym_lnode.prev == LIST_POISON2; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp) || + !capable(CAP_SYS_ADMIN)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_add(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_del(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) +{ + struct latch_tree_node *n; + + if (!bpf_jit_kallsyms_enabled()) + return NULL; + + n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); + return n ? + container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : + NULL; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog *prog; + char *ret = NULL; + + rcu_read_lock(); + prog = bpf_prog_kallsyms_find(addr); + if (prog) { + bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); + bpf_get_prog_name(prog, sym); + + ret = sym; + if (size) + *size = symbol_end - symbol_start; + if (off) + *off = addr - symbol_start; + } + rcu_read_unlock(); + + return ret; +} + +bool is_bpf_text_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = bpf_prog_kallsyms_find(addr) != NULL; + rcu_read_unlock(); + + return ret; +} + +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog_aux *aux; + unsigned int it = 0; + int ret = -ERANGE; + + if (!bpf_jit_kallsyms_enabled()) + return ret; + + rcu_read_lock(); + list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { + if (it++ != symnum) + continue; + + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + bpf_get_prog_name(aux->prog, sym); + + *value = symbol_start; + *type = BPF_SYM_ELF_TYPE; + + ret = 0; + break; + } + rcu_read_unlock(); + + return ret; +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -326,6 +531,24 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) module_memfree(hdr); } +/* This symbol is only overridden by archs that have different + * requirements than the usual eBPF JITs, f.e. when they only + * implement cBPF JIT, do not set images read-only, etc. + */ +void __weak bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) { + struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); + + bpf_jit_binary_unlock_ro(hdr); + bpf_jit_binary_free(hdr); + + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); + } + + bpf_prog_unlock_free(fp); +} + int bpf_jit_harden __read_mostly; static int bpf_jit_blind_insn(const struct bpf_insn *from, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f74ca17af64a..461eb1e66a0f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -707,6 +707,7 @@ void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { trace_bpf_prog_put_rcu(prog); + bpf_prog_kallsyms_del(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } } @@ -903,6 +904,7 @@ static int bpf_prog_load(union bpf_attr *attr) /* failed to allocate fd */ goto free_used_maps; + bpf_prog_kallsyms_add(prog); trace_bpf_prog_load(prog, err); return err; diff --git a/kernel/extable.c b/kernel/extable.c index e3beec4a2339..bd82117ad424 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_ftrace_trampoline(addr)) return 1; + if (is_bpf_text_address(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -123,7 +126,11 @@ int kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; - return is_ftrace_trampoline(addr); + if (is_ftrace_trampoline(addr)) + return 1; + if (is_bpf_text_address(addr)) + return 1; + return 0; } /* diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fafd1a3ef0da..6a3b249a2ae1 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -300,10 +301,11 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { char namebuf[KSYM_NAME_LEN]; + if (is_ksym_addr(addr)) return !!get_symbol_pos(addr, symbolsize, offset); - - return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf); + return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || + !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } /* @@ -318,6 +320,8 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { + const char *ret; + namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -333,9 +337,13 @@ const char *kallsyms_lookup(unsigned long addr, return namebuf; } - /* See if it's in a module. */ - return module_address_lookup(addr, symbolsize, offset, modname, - namebuf); + /* See if it's in a module or a BPF JITed image. */ + ret = module_address_lookup(addr, symbolsize, offset, + modname, namebuf); + if (!ret) + ret = bpf_address_lookup(addr, symbolsize, + offset, modname, namebuf); + return ret; } int lookup_symbol_name(unsigned long addr, char *symname) @@ -471,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol); /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ struct kallsym_iter { loff_t pos; + loff_t pos_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; @@ -481,13 +490,27 @@ struct kallsym_iter { static int get_ksymbol_mod(struct kallsym_iter *iter) { - if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, - &iter->type, iter->name, iter->module_name, - &iter->exported) < 0) + int ret = module_get_kallsym(iter->pos - kallsyms_num_syms, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + if (ret < 0) { + iter->pos_mod_end = iter->pos; return 0; + } + return 1; } +static int get_ksymbol_bpf(struct kallsym_iter *iter) +{ + iter->module_name[0] = '\0'; + iter->exported = 0; + return bpf_get_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name) < 0 ? 0 : 1; +} + /* Returns space to next name. */ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) { @@ -508,16 +531,30 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; + if (new_pos == 0) + iter->pos_mod_end = 0; +} + +static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) +{ + iter->pos = pos; + + if (iter->pos_mod_end > 0 && + iter->pos_mod_end < iter->pos) + return get_ksymbol_bpf(iter); + + if (!get_ksymbol_mod(iter)) + return get_ksymbol_bpf(iter); + + return 1; } /* Returns false if pos at or past end of file. */ static int update_iter(struct kallsym_iter *iter, loff_t pos) { /* Module symbols can be accessed randomly. */ - if (pos >= kallsyms_num_syms) { - iter->pos = pos; - return get_ksymbol_mod(iter); - } + if (pos >= kallsyms_num_syms) + return update_iter_mod(iter, pos); /* If we're not on the desired position, reset to new position. */ if (pos != iter->pos) diff --git a/net/Kconfig b/net/Kconfig index f19c0c3b9589..102f781a0131 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -297,7 +297,8 @@ config BPF_JIT Note, admin should enable this feature changing: /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) config NET_FLOW_LIMIT bool diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa72eb0399c..4ead336e14ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -334,6 +334,13 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dointvec, }, + { + .procname = "bpf_jit_kallsyms", + .data = &bpf_jit_kallsyms, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, # endif #endif { -- cgit v1.2.3 From bc1750f366902449f36f15f4a692a495fe6bcdfe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Feb 2017 00:20:53 +0000 Subject: bpf: fix spelling mistake: "proccessed" -> "processed" trivial fix to spelling mistake in verbose log message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d2bded2b250c..3fc6e39b223e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2776,7 +2776,7 @@ static int do_check(struct bpf_verifier_env *env) class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { - verbose("BPF program is too large. Proccessed %d insn\n", + verbose("BPF program is too large. Processed %d insn\n", insn_processed); return -E2BIG; } -- cgit v1.2.3 From eba38a968258b5ad9d70722ab8c584e1753f4b16 Mon Sep 17 00:00:00 2001 From: Gary Lin Date: Wed, 1 Mar 2017 16:25:51 +0800 Subject: bpf: update the comment about the length of analysis Commit 07016151a446 ("bpf, verifier: further improve search pruning") increased the limit of processed instructions from 32k to 64k, but the comment still mentioned the 32k limit. This commit updates the comment to reflect the change. Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Gary Lin Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3fc6e39b223e..796b68d00119 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -33,7 +33,7 @@ * - out of bounds or malformed jumps * The second pass is all possible path descent from the 1st insn. * Since it's analyzing all pathes through the program, the length of the - * analysis is limited to 32k insn, which may be hit even if total number of + * analysis is limited to 64k insn, which may be hit even if total number of * insn is less then 4K, but there are too many branches that change stack/regs. * Number of 'branches to be analyzed' is limited to 1k * -- cgit v1.2.3 From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 2 +- arch/alpha/kernel/signal.c | 2 +- arch/alpha/kernel/traps.c | 2 +- arch/alpha/mm/fault.c | 2 +- arch/arc/kernel/traps.c | 2 +- arch/arc/mm/fault.c | 2 +- arch/arm/kernel/ptrace.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm/mm/init.c | 1 + arch/arm/mm/mmap.c | 2 +- arch/arm/vfp/vfpmodule.c | 2 +- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/arm64/mm/mmap.c | 2 +- arch/avr32/kernel/traps.c | 2 +- arch/blackfin/kernel/trace.c | 2 +- arch/blackfin/kernel/traps.c | 1 + arch/cris/mm/fault.c | 1 + arch/frv/kernel/traps.c | 2 +- arch/h8300/kernel/ptrace_s.c | 2 +- arch/hexagon/kernel/traps.c | 2 +- arch/hexagon/mm/vm_fault.c | 1 + arch/ia64/kernel/asm-offsets.c | 2 +- arch/ia64/kernel/brl_emu.c | 2 +- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/traps.c | 2 +- arch/ia64/kernel/unaligned.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/ia64/mm/init.c | 1 + arch/mips/kernel/branch.c | 2 +- arch/mips/kernel/signal_o32.c | 1 + arch/mips/mm/mmap.c | 2 +- arch/mips/sgi-ip22/ip22-berr.c | 2 +- arch/mips/sgi-ip22/ip22-reset.c | 2 +- arch/mn10300/kernel/fpu.c | 2 ++ arch/openrisc/mm/fault.c | 2 +- arch/parisc/kernel/sys_parisc.c | 1 + arch/parisc/kernel/unaligned.c | 2 +- arch/parisc/math-emu/driver.c | 3 ++- arch/powerpc/kvm/book3s_64_vio.c | 1 + arch/powerpc/mm/mmap.c | 2 +- arch/powerpc/mm/mmu_context_iommu.c | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- arch/s390/kernel/nmi.c | 3 +++ arch/s390/mm/mmap.c | 1 + arch/score/kernel/traps.c | 2 +- arch/sh/kernel/cpu/sh2a/fpu.c | 2 +- arch/sh/kernel/hw_breakpoint.c | 1 + arch/sh/kernel/traps.c | 2 ++ arch/sh/math-emu/math.c | 2 +- arch/sh/mm/asids-debugfs.c | 2 ++ arch/sh/mm/fault.c | 1 + arch/sparc/kernel/sys_sparc_32.c | 2 +- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/sparc/kernel/unaligned_32.c | 2 +- arch/tile/mm/mmap.c | 2 +- arch/um/drivers/line.c | 3 ++- arch/um/kernel/reboot.c | 2 +- arch/um/kernel/skas/mmu.c | 3 ++- arch/um/kernel/tlb.c | 3 ++- arch/um/kernel/trap.c | 2 +- arch/unicore32/kernel/fpu-ucf64.c | 2 +- arch/unicore32/kernel/traps.c | 1 + arch/unicore32/mm/fault.c | 2 +- arch/x86/entry/vsyscall/vsyscall_64.c | 1 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- arch/x86/kvm/mmu.c | 1 + arch/x86/mm/mmap.c | 2 +- arch/xtensa/kernel/traps.c | 2 +- drivers/android/binder.c | 2 +- drivers/block/drbd/drbd_int.h | 2 +- drivers/char/snsc_event.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +- drivers/gpu/drm/drm_lock.c | 2 ++ drivers/gpu/drm/ttm/ttm_lock.c | 2 +- drivers/infiniband/core/umem.c | 2 +- drivers/infiniband/hw/hfi1/user_pages.c | 2 +- drivers/infiniband/hw/qib/qib_user_pages.c | 1 + drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- drivers/isdn/i4l/isdn_tty.c | 1 + drivers/isdn/mISDN/l1oip_core.c | 2 ++ drivers/md/md.c | 1 + drivers/md/raid1.c | 3 +++ drivers/md/raid5.c | 2 ++ drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/mic/cosm/cosm_scif_server.c | 2 ++ drivers/misc/mic/cosm_client/cosm_scif_client.c | 2 ++ drivers/misc/mic/scif/scif_rma.c | 1 + drivers/net/slip/slip.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/parisc/power.c | 2 +- drivers/ps3/ps3-sys-manager.c | 1 + drivers/s390/char/fs3270.c | 1 + drivers/s390/char/keyboard.c | 2 +- drivers/scsi/bnx2fc/bnx2fc.h | 2 +- drivers/scsi/bnx2i/bnx2i.h | 2 +- drivers/scsi/libiscsi.c | 1 + drivers/staging/android/lowmemorykiller.c | 2 +- drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c | 2 +- drivers/staging/rtl8188eu/include/osdep_service.h | 2 +- drivers/staging/rtl8712/osdep_service.h | 2 +- drivers/staging/rtl8712/rtl8712_cmd.c | 1 + drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 1 + drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h | 2 +- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_erl0.c | 2 ++ drivers/target/iscsi/iscsi_target_login.c | 1 + drivers/target/iscsi/iscsi_target_nego.c | 1 + drivers/tty/pty.c | 2 +- drivers/tty/sysrq.c | 2 +- drivers/tty/tty_io.c | 2 +- drivers/tty/vt/keyboard.c | 2 +- drivers/tty/vt/vt.c | 2 +- drivers/tty/vt/vt_ioctl.c | 2 +- drivers/usb/atm/usbatm.c | 2 +- drivers/usb/core/devio.c | 1 + drivers/usb/gadget/function/f_mass_storage.c | 1 + drivers/vfio/vfio_iommu_spapr_tce.c | 1 + drivers/vfio/vfio_iommu_type1.c | 2 +- drivers/w1/w1_family.c | 2 +- drivers/w1/w1_int.c | 1 + fs/attr.c | 1 + fs/autofs4/waitq.c | 1 + fs/cifs/connect.c | 1 + fs/coda/upcall.c | 2 +- fs/coredump.c | 2 +- fs/exec.c | 1 + fs/file.c | 2 +- fs/fs_struct.c | 2 +- fs/jffs2/background.c | 2 +- fs/lockd/svc.c | 2 +- fs/ncpfs/inode.c | 1 + fs/ncpfs/sock.c | 1 + fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 2 +- fs/proc/fd.c | 2 +- fs/select.c | 4 ++-- include/linux/oom.h | 2 +- include/linux/ptrace.h | 1 + include/linux/sched/signal.h | 6 ++++++ include/linux/signalfd.h | 2 +- include/linux/taskstats_kern.h | 2 +- ipc/mqueue.c | 1 + kernel/bpf/syscall.c | 1 + kernel/cpu.c | 2 +- kernel/debug/gdbstub.c | 1 + kernel/debug/kdb/kdb_bt.c | 2 +- kernel/hung_task.c | 2 ++ kernel/rcu/update.c | 2 +- kernel/sched/sched.h | 1 + kernel/time/itimer.c | 1 + kernel/time/posix-cpu-timers.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/tracepoint.c | 2 +- kernel/tsacct.c | 2 +- kernel/user_namespace.c | 1 + lib/is_single_threaded.c | 3 +-- mm/filemap.c | 1 + mm/kmemleak.c | 2 +- mm/memory-failure.c | 2 +- mm/vmacache.c | 2 +- net/9p/client.c | 2 +- net/atm/common.c | 2 +- net/ax25/af_ax25.c | 2 +- net/caif/caif_socket.c | 2 +- net/core/stream.c | 1 + net/decnet/af_decnet.c | 2 +- net/irda/af_irda.c | 1 + net/netrom/af_netrom.c | 2 +- net/rose/af_rose.c | 2 +- net/sctp/socket.c | 1 + net/sunrpc/svc.c | 2 +- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 2 +- security/selinux/hooks.c | 2 +- 180 files changed, 204 insertions(+), 121 deletions(-) create mode 100644 include/linux/sched/signal.h (limited to 'kernel/bpf') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 9d27a7d333dc..568ca29f2ad9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 17308f925306..b8221f112eee 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -6,7 +6,7 @@ * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson */ -#include +#include #include #include #include diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index af2994206b4b..6448ab00043d 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 47948b4dd157..c25e8827e7cd 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -4,7 +4,7 @@ * Copyright (C) 1995 Linus Torvalds */ -#include +#include #include #include #include diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index c927aa84e652..ff83e78d0cfb 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -13,7 +13,7 @@ * Rahul Trivedi: Codito Technologies 2004 */ -#include +#include #include #include #include diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index e94e5aa33985..162c97528872 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ae738a6319f6..46f7bab81c40 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -10,7 +10,7 @@ * published by the Free Software Foundation. */ #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 9688ec0c6ef4..dc5f1a22b3c9 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 7d5f4c736a16..d7fe2de9cc9e 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c2b5b9892fd1..520c7778d330 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index bf4d3bc41a7a..2a9040dcf47e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 66353caa35b9..d448f9cd7715 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 569d5a650a4a..a71a48e71fff 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b883f1f75216..06da8ea16bbe 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a22161ccf447..64fc32ea3422 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7d47c2cdfd93..5b8779a849a2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 81283851c9af..30dd60ab8a65 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 01c171723bb3..1e0a2650c88b 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index eb4a3fcfbaff..50b541325025 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -14,7 +14,7 @@ #include #include /* print_modules */ #include -#include +#include #include #include diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index 719dd796c12c..01e546ad2f7a 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 1ed85ddadc0d..32676d7721b1 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 94183d3639ef..1fca464f1b9e 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 31221fb4348e..43c134d6081c 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c index ef5a9c13e76d..c0af930052c0 100644 --- a/arch/h8300/kernel/ptrace_s.c +++ b/arch/h8300/kernel/ptrace_s.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 110dab152f82..4496bcf605ef 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 489875fd2be4..3eec33c5cfd7 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 60ef83e6db71..8786c8b4f187 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -6,7 +6,7 @@ #define ASM_OFFSETS_C 1 -#include +#include #include #include #include diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index 8682df6263d6..987b11be0021 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 9509cc73b9c6..5ac51069e453 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -72,7 +72,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 8981ce98afb3..48ba46b025e1 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include /* For unblank_screen() */ #include diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 99348d7f2255..a13680ca1e61 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -15,7 +15,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 7f2feb21753c..15f09cfff335 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -4,7 +4,7 @@ * Copyright (C) 1998-2002 Hewlett-Packard Co * David Mosberger-Tang */ -#include +#include #include #include #include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 06cdaef54b2e..8f3efa682ee8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index ae037a304ee4..b11facd11c9d 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -7,7 +7,7 @@ * Copyright (C) 2001 MIPS Technologies, Inc. */ #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c index 5e169fc5ca5c..2b3572fb5f1b 100644 --- a/arch/mips/kernel/signal_o32.c +++ b/arch/mips/kernel/signal_o32.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index d6d92c02308d..374d71e61ef6 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index 3f6ccd53c15d..ff8e1935c873 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index a36f6b87548a..03a39ac5ead9 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c index 2578b7ae7dd5..50ce7b447fed 100644 --- a/arch/mn10300/kernel/fpu.c +++ b/arch/mn10300/kernel/fpu.c @@ -9,6 +9,8 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include + #include #include #include diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 53592a639744..e310ab499385 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index bf3294171230..ce07cd3f2507 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 0a21067ac0a3..a08ab481e556 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 09ef4136c693..2fb59d2e2b29 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -27,7 +27,8 @@ * Copyright (C) 2001 Hewlett-Packard */ -#include +#include + #include "float.h" #include "math-emu.h" diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index ab9d14c0e460..3e26cd4979f9 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 2f1e44362198..8013861aeaa7 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 7de7124ac91b..497130c5c742 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -10,7 +10,7 @@ * */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e29e4d5afa2d..870c0a82d560 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -19,7 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include +#include #include #include diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 26fa03fc9f3c..16321ad9e70c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 80c093e0c6f1..9bf8327154ee 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -13,6 +13,9 @@ #include #include #include +#include +#include + #include #include #include diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 7ae1282d5be9..5ea09403bb87 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 569ac02f68df..fa624f30f783 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 98bbaa447c93..352f894bece1 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -9,7 +9,7 @@ * * FIXME! These routines can be optimized in big endian case. */ -#include +#include #include #include #include diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 2197fc584186..afe965712a69 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 9513fa7840aa..3036dee854d1 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -8,6 +8,8 @@ #include #include #include +#include + #include #include /* print_modules */ #include diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index 5078cb809750..c86f4360c6ce 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index bf95fdaedd0c..110bd35165bf 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -20,6 +20,8 @@ #include #include #include +#include + #include #include diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 9bf876780cef..6fd1bf7481c7 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index fb7b185ee941..ae49639a484e 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 884c70331345..54d3999d8119 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index d20d4e3fd129..8367dce5f41b 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -8,7 +8,7 @@ #include -#include +#include #include #include #include diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index ef61c597898b..377e312dc27e 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 62087028a9ce..366e57f5e8d6 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -5,8 +5,9 @@ #include #include -#include +#include #include + #include "chan.h" #include #include diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index b60a9f8cda75..79218106a033 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -3,7 +3,7 @@ * Licensed under the GPL */ -#include +#include #include #include #include diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 3943e9d7d13d..7a1f2a936fd1 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -5,8 +5,9 @@ */ #include -#include +#include #include + #include #include #include diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 3777b82759bd..37508b190106 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -5,7 +5,8 @@ #include #include -#include +#include + #include #include #include diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ad8f206ab5e8..9711ae4aaa6a 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c index a53343a90ca2..12c8c9527b8e 100644 --- a/arch/unicore32/kernel/fpu-ucf64.c +++ b/arch/unicore32/kernel/fpu-ucf64.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index c54e32410ead..7f5e06f9a202 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -14,6 +14,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index b656d216a8a8..bbefcc46a45e 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 636c4b341f36..df91fb393a01 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -27,6 +27,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..d48af18e7baf 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1cda35277278..ac7810513d0e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index d2dc0438d654..5eabf34008f1 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include struct va_alignment __read_mostly va_align = { diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 282bf721a4d6..84abd66e680d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e33e7fbe870b..aae4d8d4be36 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4cb8f21ff4ef..724d1c50fc52 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index 59bcefd6ec7c..e452673dff66 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6a3470f84998..d1ce83d73a87 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 32d43f86a8f2..96bb6badb818 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -34,6 +34,8 @@ */ #include +#include + #include #include "drm_legacy.h" #include "drm_internal.h" diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c index f154fb1929bd..913f4318cdc0 100644 --- a/drivers/gpu/drm/ttm/ttm_lock.c +++ b/drivers/gpu/drm/ttm/ttm_lock.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #define TTM_WRITE_LOCK_PENDING (1 << 0) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index d525b1a2986a..27f155d2df8d 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 20f4ddcac3b0..68295a12b771 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -46,7 +46,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 75f08624ac05..ce83ba9a12ef 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "qib.h" diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index ba868be5af2e..c49db7c33979 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index 63eaa0a9f8a1..1b169559a240 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "isdn_common.h" #include "isdn_tty.h" #ifdef CONFIG_ISDN_AUDIO diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 67c21876c35f..6ceca7db62ad 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -234,6 +234,8 @@ #include #include #include +#include + #include #include "core.h" #include "l1oip.h" diff --git a/drivers/md/md.c b/drivers/md/md.c index 985374f20e2e..548d1b8014f8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -44,6 +44,7 @@ */ +#include #include #include #include diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7453d94eeed7..fbc2d7851b49 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -37,7 +37,10 @@ #include #include #include +#include + #include + #include "md.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2ce23b01dbb2..4fb09b3fcb41 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -55,6 +55,8 @@ #include #include #include +#include + #include #include "md.h" diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index cb290b8ca0c8..dd4617764f14 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c index 5696df4326b5..85f7d09cc65f 100644 --- a/drivers/misc/mic/cosm/cosm_scif_server.c +++ b/drivers/misc/mic/cosm/cosm_scif_server.c @@ -19,6 +19,8 @@ * */ #include +#include + #include "cosm_main.h" /* diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c index 03e98bf1ac15..aa530fcceaa9 100644 --- a/drivers/misc/mic/cosm_client/cosm_scif_client.c +++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include "../cosm/cosm_main.h" #define COSM_SCIF_MAX_RETRIES 10 diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index d0e9c60f944e..329727e00e97 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "scif_main.h" #include "scif_map.h" diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 08db4d687533..1da31dc47f86 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -66,7 +66,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index c5744b45ec8f..65689469c5a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index ef31b77404ef..e2a3112f1c98 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index f2ab435954f6..73e496a72113 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 85eca1cef063..c4518168fd02 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 82c913318b73..ba0e4f93503d 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index fdd4eb4e41b2..4fc8ed5fe067 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h index ed7f3228e234..89ef1a1678d1 100644 --- a/drivers/scsi/bnx2i/bnx2i.h +++ b/drivers/scsi/bnx2i/bnx2i.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 834d1212b6d5..07c08ce68d70 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index ec3b66561412..054660049395 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c index cf902154f0aa..bcf9f3dd0310 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "../../../include/linux/libcfs/libcfs.h" diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h index ee3f5ee06529..9e390648d93e 100644 --- a/drivers/staging/rtl8188eu/include/osdep_service.h +++ b/drivers/staging/rtl8188eu/include/osdep_service.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h index b8a170978434..5d33020554cd 100644 --- a/drivers/staging/rtl8712/osdep_service.h +++ b/drivers/staging/rtl8712/osdep_service.h @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index f19b6b27aa71..5346c657485d 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index cb0b7ca36b1e..8a0d214f6e9b 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h index 4055d4bf9f74..e63964f5a18a 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include /* for time_t */ diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index da2c73a255de..fa1d578d56bd 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index b54e72c7ab0f..a4d5e6749932 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -17,6 +17,8 @@ * GNU General Public License for more details. ******************************************************************************/ +#include + #include #include #include diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index eab274d17b5c..b03cc03423c1 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* TCP_NODELAY */ #include /* ipv6_addr_v4mapped() */ diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 46388c9e08da..29eb09e0cd15 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index a23fa5ed1d67..66b59a15780d 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 71136742e606..65db0aeb3d80 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -14,7 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a1fd3f7d487a..985d33f0f315 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -69,7 +69,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 397e1509fe51..6b3a2c00974d 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 9d3ce505e7ab..5c4933bb4b53 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -72,7 +72,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index a56edf2d58eb..0cbfe1ff6f6c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index 5a59da0dc98a..3e80aa3b917a 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ca425e8099ea..cfc3cff6e8d5 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 8f3659b65f53..4c8aacc232c0 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -207,6 +207,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 185d50ee1b12..cf3de91fbfe7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 7e94c7fc90ae..c26fa1f3ed86 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c index df1c9bb90eb5..2096f460498f 100644 --- a/drivers/w1/w1_family.c +++ b/drivers/w1/w1_family.c @@ -14,7 +14,7 @@ #include #include -#include /* schedule_timeout() */ +#include #include #include diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 4ce1b66d5092..2cae7b29bb5f 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/fs/attr.c b/fs/attr.c index c902b3d53508..135304146120 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 79fbd85db4ba..24a58bf9ca72 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "autofs_i.h" diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 777ad9f4fc3c..9bf25be05636 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index f6c6c8adbc01..e82357c89979 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include diff --git a/fs/coredump.c b/fs/coredump.c index 23a539b29225..c74ab43b8383 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/exec.c b/fs/exec.c index 9c80d011594e..aa228b98ad5b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index 69d6990e3021..ad6f094f2eff 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 7dca743b2ce1..543ed50f0387 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index e5c1783ab64a..453a6a1fff34 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include "nodelist.h" diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 7e4ea3b9f472..e7c8b9c76e48 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7eb89c23c847..d5606099712a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index 97b111d79489..bdea177aa405 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 484bebc20bca..bb79972dc638 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index efd66da99201..786a4a2cb2d7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -6,7 +6,7 @@ * Copyright (C) 1995, 1996, 1997 Olaf Kirch */ -#include +#include #include #include #include diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 00ce1531b2f5..c330495c3115 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index 305c0daf5d67..e2112270d75a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -15,7 +15,8 @@ */ #include -#include +#include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/oom.h b/include/linux/oom.h index b4e36e92bc87..8a266e2be5a6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -2,7 +2,7 @@ #define __INCLUDE_LINUX_OOM_H -#include +#include #include #include #include diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e0e539321ab9..422bc2e4cb6a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -3,6 +3,7 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include /* For send_sig(), same_thread_group(), etc. */ #include /* for IS_ERR_VALUE */ #include /* For BUG_ON. */ #include /* For task_active_pid_ns. */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h new file mode 100644 index 000000000000..da69cd05cd68 --- /dev/null +++ b/include/linux/sched/signal.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +#include + +#endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index eadbe227c256..4985048640a7 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,7 +8,7 @@ #define _LINUX_SIGNALFD_H #include - +#include #ifdef CONFIG_SIGNALFD diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 58de6edf751f..e2a5daf8d14f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -8,7 +8,7 @@ #define _LINUX_TASKSTATS_KERN_H #include -#include +#include #include #ifdef CONFIG_TASKSTATS diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 40e448de8a7e..4f7241fbeff3 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 461eb1e66a0f..7af0dcc5d755 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/cpu.c b/kernel/cpu.c index 0a5f630f5c54..0aae3183b029 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 19d9a578c753..7510dc687c0d 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -29,6 +29,7 @@ */ #include +#include #include #include #include diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index fe15fff5df53..9b976a42376d 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include "kdb_private.h" diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 40c07e4fa116..129247e56902 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include /* diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a0e90e0afc75..da128deb10ec 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e2307a6c29f1..641249471952 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index a95f13c31464..f6b961c5e58c 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index b4377a5e4269..a2475a9f57d8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -2,7 +2,7 @@ * Implement CPU time clocks for the POSIX clock interface. */ -#include +#include #include #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4fee1c3abd0b..0f411a4690a0 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1f9a31f934a4..9f6984d52fec 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include extern struct tracepoint * const __start___tracepoints_ptrs[]; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 571a2d3821d8..d9a03b80b75d 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86b7854fec8e..2f735cbe05e8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index 391fd23976a2..9745cfffcb69 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -9,8 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ - -#include +#include /* * Returns true if the task does not share ->mm with another thread/process. diff --git a/mm/filemap.c b/mm/filemap.c index 1944c631e3e6..1694623a6289 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da3436953022..2df6d3687b2a 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3d0f2fd4bf73..b74984db386e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/vmacache.c b/mm/vmacache.c index 7c233f8e20ee..4355d34c68a6 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2014 Davidlohr Bueso. */ -#include +#include #include #include diff --git a/net/9p/client.c b/net/9p/client.c index 3fc94a49ccd5..25cfd8a4bc36 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/atm/common.c b/net/atm/common.c index a3ca922d307b..9613381f5db0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -13,7 +13,7 @@ #include /* error codes */ #include #include -#include +#include #include /* struct timeval */ #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 90fcf5fc2e0a..a8e42cedf1db 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 92cbbd2afddb..adcad344c843 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/stream.c b/net/core/stream.c index f575bcf64af2..20231dbb1da0 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a90ed67027b0..e6e79eda9763 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -106,7 +106,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include +#include #include #include #include diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ab254041dab7..81adc29a448d 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ed212ffc1d9d..4bbf4526b885 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9ad301c46b88..b8a1df2c9785 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 465a9c8464f9..6f0a9be50f50 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b94efd93d3e4..a08aeb56b8e4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d18b9f910f..ee37b390260a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -85,7 +85,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 079c883aa96e..fd28a49dbe8f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9a8f12f8d5b7..b12f873f92ba 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3