diff options
author | Alexei Starovoitov <ast@kernel.org> | 2018-05-14 19:11:46 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2018-05-14 19:11:46 -0700 |
commit | fb40c9ddd66b9c9bb811bbee125b3cb3ba1faee7 (patch) | |
tree | 0febd11729744b0eb50eae2ba198d80f7a6d05ac | |
parent | 53ea24c20cea32b1dc70673402b496c4a5291d2d (diff) | |
parent | a82d8cd398716b41f0fbe3882e3fe3f5ccf9f9cf (diff) | |
download | blackbird-op-linux-fb40c9ddd66b9c9bb811bbee125b3cb3ba1faee7.tar.gz blackbird-op-linux-fb40c9ddd66b9c9bb811bbee125b3cb3ba1faee7.zip |
Merge branch 'bpf-jit-cleanups'
Daniel Borkmann says:
====================
This series follows up mostly with with some minor cleanups on top
of 'Move ld_abs/ld_ind to native BPF' as well as implements better
32/64 bit immediate load into register and saves tail call init on
cBPF for the arm64 JIT. Last but not least we add a couple of test
cases. For details please see individual patches. Thanks!
v1 -> v2:
- Minor fix in i64_i16_blocks() to remove 24 shift.
- Added last two patches.
- Added Acks from prior round.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 13 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 115 | ||||
-rw-r--r-- | arch/mips/net/ebpf_jit.c | 26 | ||||
-rw-r--r-- | arch/sparc/net/bpf_jit_comp_64.c | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 29 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/bpf_rand.h | 80 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_verifier.c | 62 |
7 files changed, 228 insertions, 98 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 82689b999257..d3ea6454e775 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size) #define SCRATCH_SIZE 80 /* total stack size used in JITed code */ -#define _STACK_SIZE \ - (ctx->prog->aux->stack_depth + \ - + SCRATCH_SIZE + \ - + 4 /* extra for skb_copy_bits buffer */) - -#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) +#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) +#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE-off-4) - -/* Offset of skb_copy_bits buffer */ -#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE) +#define STACK_VAR(off) (STACK_SIZE - off) #if __LINUX_ARM_ARCH__ < 7 diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0b40c8fb0706..a6fdaea07c63 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -21,7 +21,6 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/printk.h> -#include <linux/skbuff.h> #include <linux/slab.h> #include <asm/byteorder.h> @@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ctx->idx++; } +static inline void emit_a64_mov_i(const int is64, const int reg, + const s32 val, struct jit_ctx *ctx) +{ + u16 hi = val >> 16; + u16 lo = val & 0xffff; + + if (hi & 0x8000) { + if (hi == 0xffff) { + emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); + } else { + emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); + if (lo != 0xffff) + emit(A64_MOVK(is64, reg, lo, 0), ctx); + } + } else { + emit(A64_MOVZ(is64, reg, lo, 0), ctx); + if (hi) + emit(A64_MOVK(is64, reg, hi, 16), ctx); + } +} + +static int i64_i16_blocks(const u64 val, bool inverse) +{ + return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); +} + static inline void emit_a64_mov_i64(const int reg, const u64 val, struct jit_ctx *ctx) { - u64 tmp = val; - int shift = 0; - - emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx); - tmp >>= 16; - shift += 16; - while (tmp) { - if (tmp & 0xffff) - emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); - tmp >>= 16; - shift += 16; + u64 nrm_tmp = val, rev_tmp = ~val; + bool inverse; + int shift; + + if (!(nrm_tmp >> 32)) + return emit_a64_mov_i(0, reg, (u32)val, ctx); + + inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); + shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : + (fls64(nrm_tmp) - 1)), 16), 0); + if (inverse) + emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); + else + emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); + shift -= 16; + while (shift >= 0) { + if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) + emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); + shift -= 16; } } +/* + * This is an unoptimized 64 immediate emission used for BPF to BPF call + * addresses. It will always do a full 64 bit decomposition as otherwise + * more complexity in the last extra pass is required since we previously + * reserved 4 instructions for the address. + */ static inline void emit_addr_mov_i64(const int reg, const u64 val, struct jit_ctx *ctx) { @@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline void emit_a64_mov_i(const int is64, const int reg, - const s32 val, struct jit_ctx *ctx) -{ - u16 hi = val >> 16; - u16 lo = val & 0xffff; - - if (hi & 0x8000) { - if (hi == 0xffff) { - emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); - } else { - emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); - emit(A64_MOVK(is64, reg, lo, 0), ctx); - } - } else { - emit(A64_MOVZ(is64, reg, lo, 0), ctx); - if (hi) - emit(A64_MOVK(is64, reg, hi, 16), ctx); - } -} - static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { @@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Tail call offset to jump into */ #define PROLOGUE_OFFSET 7 -static int build_prologue(struct jit_ctx *ctx) +static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx) * | ... | BPF prog stack * | | * +-----+ <= (BPF_FP - prog->aux->stack_depth) - * |RSVD | JIT scratchpad + * |RSVD | padding * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) * | | * | ... | Function call stack @@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx) /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); - /* Initialize tail_call_cnt */ - emit(A64_MOVZ(1, tcc, 0, 0), ctx); + if (!ebpf_from_cbpf) { + /* Initialize tail_call_cnt */ + emit(A64_MOVZ(1, tcc, 0, 0), ctx); - cur_offset = ctx->idx - idx0; - if (cur_offset != PROLOGUE_OFFSET) { - pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", - cur_offset, PROLOGUE_OFFSET); - return -1; + cur_offset = ctx->idx - idx0; + if (cur_offset != PROLOGUE_OFFSET) { + pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + return -1; + } } - /* 4 byte extra for skb_copy_bits buffer */ - ctx->stack_size = prog->aux->stack_depth + 4; - ctx->stack_size = STACK_ALIGN(ctx->stack_size); + ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); @@ -786,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; + bool was_classic = bpf_prog_was_classic(prog); bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; @@ -840,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - if (build_prologue(&ctx)) { + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } @@ -863,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) skip_init_ctx: ctx.idx = 0; - build_prologue(&ctx); + build_prologue(&ctx, was_classic); if (build_body(&ctx)) { bpf_jit_binary_free(header); diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 7ba7df9c28fc..aeb7b1b0f202 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -95,7 +95,6 @@ enum reg_val_type { * struct jit_ctx - JIT context * @skf: The sk_filter * @stack_size: eBPF stack size - * @tmp_offset: eBPF $sp offset to 8-byte temporary memory * @idx: Instruction index * @flags: JIT flags * @offsets: Instruction offsets @@ -105,7 +104,6 @@ enum reg_val_type { struct jit_ctx { const struct bpf_prog *skf; int stack_size; - int tmp_offset; u32 idx; u32 flags; u32 *offsets; @@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx) locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; stack_adjust += locals_size; - ctx->tmp_offset = locals_size; ctx->stack_size = stack_adjust; @@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, emit_instr(ctx, lui, reg, upper >> 16); emit_instr(ctx, addiu, reg, reg, lower); } - } static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, @@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return 0; } -static void * __must_check -ool_skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) -{ - return skb_header_pointer(skb, offset, len, buffer); -} - -static int size_to_len(const struct bpf_insn *insn) -{ - switch (BPF_SIZE(insn->code)) { - case BPF_B: - return 1; - case BPF_H: - return 2; - case BPF_W: - return 4; - case BPF_DW: - return 8; - } - return 0; -} - static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) { if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 9f5918e0693a..222785af550b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -894,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const int i = insn - ctx->prog->insnsi; const s16 off = insn->off; const s32 imm = insn->imm; - u32 *func; if (insn->src_reg == BPF_REG_FP) ctx->saw_frame_pointer = true; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2cd344d1a6e5..2f700a1db851 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -301,9 +301,9 @@ do { \ * jmp *%edx for x86_32 */ #ifdef CONFIG_RETPOLINE -#ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 17 -# define RETPOLINE_RAX_BPF_JIT() \ +# ifdef CONFIG_X86_64 +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ do { \ EMIT1_off32(0xE8, 7); /* callq do_rop */ \ /* spec_trap: */ \ @@ -314,8 +314,8 @@ do { \ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ EMIT1(0xC3); /* retq */ \ } while (0) -#else -# define RETPOLINE_EDX_BPF_JIT() \ +# else /* !CONFIG_X86_64 */ +# define RETPOLINE_EDX_BPF_JIT() \ do { \ EMIT1_off32(0xE8, 7); /* call do_rop */ \ /* spec_trap: */ \ @@ -326,17 +326,16 @@ do { \ EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ EMIT1(0xC3); /* ret */ \ } while (0) -#endif +# endif #else /* !CONFIG_RETPOLINE */ - -#ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 2 -# define RETPOLINE_RAX_BPF_JIT() \ - EMIT2(0xFF, 0xE0); /* jmp *%rax */ -#else -# define RETPOLINE_EDX_BPF_JIT() \ - EMIT2(0xFF, 0xE2) /* jmp *%edx */ -#endif +# ifdef CONFIG_X86_64 +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +# else /* !CONFIG_X86_64 */ +# define RETPOLINE_EDX_BPF_JIT() \ + EMIT2(0xFF, 0xE2) /* jmp *%edx */ +# endif #endif #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h new file mode 100644 index 000000000000..59bf3e1a9371 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_rand.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_RAND__ +#define __BPF_RAND__ + +#include <stdint.h> +#include <stdlib.h> +#include <time.h> + +static inline uint64_t bpf_rand_mask(uint64_t mask) +{ + return (((uint64_t)(uint32_t)rand()) | + ((uint64_t)(uint32_t)rand() << 32)) & mask; +} + +#define bpf_rand_ux(x, m) \ +static inline uint64_t bpf_rand_u##x(int shift) \ +{ \ + return bpf_rand_mask((m)) << shift; \ +} + +bpf_rand_ux( 8, 0xffULL) +bpf_rand_ux(16, 0xffffULL) +bpf_rand_ux(24, 0xffffffULL) +bpf_rand_ux(32, 0xffffffffULL) +bpf_rand_ux(40, 0xffffffffffULL) +bpf_rand_ux(48, 0xffffffffffffULL) +bpf_rand_ux(56, 0xffffffffffffffULL) +bpf_rand_ux(64, 0xffffffffffffffffULL) + +static inline void bpf_semi_rand_init(void) +{ + srand(time(NULL)); +} + +static inline uint64_t bpf_semi_rand_get(void) +{ + switch (rand() % 39) { + case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0); + case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0); + case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0); + case 3: return 0x8000000000000000ULL | bpf_rand_u32(0); + case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0); + case 5: return 0x0000000100000000ULL | bpf_rand_u24(0); + case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0); + case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0); + case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24); + case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0); + case 10: return 0x0000000010000000ULL | bpf_rand_u32(0); + case 11: return 0xf000000000000000ULL | bpf_rand_u8(0); + case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8); + case 13: return 0x000000000f000000ULL | bpf_rand_u8(16); + case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32); + case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48); + case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1); + case 17: return 0xffff800000000000ULL | bpf_rand_u8(4); + case 18: return 0xffff800000000000ULL | bpf_rand_u8(20); + case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0); + case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0); + case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20); + case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40); + case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64); + case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64); + case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64); + case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64); + case 27: return 0x0000800000000000ULL; + case 28: return 0x8000000000000000ULL; + case 29: return 0x0000000000000000ULL; + case 30: return 0xffffffffffffffffULL; + case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64); + case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64); + case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64); + case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64); + case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64); + case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64); + case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64); + default: return bpf_rand_u64(0); + } +} + +#endif /* __BPF_RAND__ */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 275b4570b5b8..a877af00605d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -41,6 +41,7 @@ # endif #endif #include "bpf_rlimit.h" +#include "bpf_rand.h" #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE @@ -152,6 +153,30 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) insn[i] = BPF_EXIT_INSN(); } +static void bpf_fill_rand_ld_dw(struct bpf_test *self) +{ + struct bpf_insn *insn = self->insns; + uint64_t res = 0; + int i = 0; + + insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0); + while (i < self->retval) { + uint64_t val = bpf_semi_rand_get(); + struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) }; + + res ^= val; + insn[i++] = tmp[0]; + insn[i++] = tmp[1]; + insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1); + } + insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); + insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32); + insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1); + insn[i] = BPF_EXIT_INSN(); + res ^= (res >> 32); + self->retval = (uint32_t)res; +} + static struct bpf_test tests[] = { { "add+sub+mul", @@ -11974,6 +11999,42 @@ static struct bpf_test tests[] = { .result = ACCEPT, .retval = 10, }, + { + "ld_dw: xor semi-random 64 bit imms, test 1", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 4090, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 2", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2047, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 3", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 511, + }, + { + "ld_dw: xor semi-random 64 bit imms, test 4", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 5, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -12346,5 +12407,6 @@ int main(int argc, char **argv) return EXIT_FAILURE; } + bpf_semi_rand_init(); return do_test(unpriv, from, to); } |