diff options
70 files changed, 1453 insertions, 945 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e6661b205f72..54410a1d4065 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -454,6 +454,7 @@ tcp_recovery - INTEGER RACK: 0x1 enables the RACK loss detection for fast detection of lost retransmissions and tail drops. + RACK: 0x2 makes RACK's reordering window static (min_rtt/4). Default: 0x1 diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b5d590e378ac..e3654782a3e2 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -440,14 +440,8 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase, #ifdef REORDER_DEBUG if (hc->chan[channel].disorder_flag) { - struct sk_buff *skb; - int cnt; - skb = hc->chan[channel].disorder_skb; - hc->chan[channel].disorder_skb = nskb; - nskb = skb; - cnt = hc->chan[channel].disorder_cnt; - hc->chan[channel].disorder_cnt = rx_counter; - rx_counter = cnt; + swap(hc->chan[channel].disorder_skb, nskb); + swap(hc->chan[channel].disorder_cnt, rx_counter); } hc->chan[channel].disorder_flag ^= 1; if (nskb) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4e3d569bf32e..96416f5d97f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7775,7 +7775,7 @@ static const struct net_device_ops bnxt_netdev_ops = { #endif .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, - .ndo_xdp = bnxt_xdp, + .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, .ndo_get_phys_port_name = bnxt_get_phys_port_name diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 06ce63c00821..261e5847557a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -208,7 +208,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) return 0; } -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct bnxt *bp = netdev_priv(dev); int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 12a5ad66b564..414b748038ca 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -16,6 +16,6 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 89b7820d59ce..32ae63b6f20e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -467,7 +467,6 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), if (netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - int packet_was_received; /* Do not proceed if the interface is not in RUNNING state. */ if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) { @@ -570,18 +569,10 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag); } - packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP); - - if (packet_was_received) { - droq->stats.rx_bytes_received += len; - droq->stats.rx_pkts_received++; - } else { - droq->stats.rx_dropped++; - netif_info(lio, rx_err, lio->netdev, - "droq:%d error rx_dropped:%llu\n", - droq->q_no, droq->stats.rx_dropped); - } + napi_gro_receive(napi, skb); + droq->stats.rx_bytes_received += len; + droq->stats.rx_pkts_received++; } else { recv_buffer_free(skb); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 71989e180289..a063c36c4c58 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1741,7 +1741,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) return 0; } -static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nicvf *nic = netdev_priv(netdev); @@ -1774,7 +1774,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, - .ndo_xdp = nicvf_xdp, + .ndo_bpf = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 969f6b12952e..ebc55b6a6349 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1721,6 +1721,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, /* Iterate through the SGT entries and add data buffers to the skb */ sgt = vaddr + fd_off; + skb = NULL; for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) { /* Extension bit is not supported */ WARN_ON(qm_sg_entry_is_ext(&sgt[i])); @@ -1738,7 +1739,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size, DMA_FROM_DEVICE); - if (i == 0) { + if (!skb) { sz = dpaa_bp->size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); skb = build_skb(sg_vaddr, sz); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dfecaeda0654..05b94d87a6c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11648,12 +11648,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, } /** - * i40e_xdp - implements ndo_xdp for i40e + * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command **/ static int i40e_xdp(struct net_device *dev, - struct netdev_xdp *xdp) + struct netdev_bpf *xdp) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -11705,7 +11705,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, - .ndo_xdp = i40e_xdp, + .ndo_bpf = i40e_xdp, }; /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 507977994a03..e5dcb25be398 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10004,7 +10004,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return 0; } -static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -10113,7 +10113,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, - .ndo_xdp = ixgbe_xdp, + .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, .ndo_xdp_flush = ixgbe_xdp_flush, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d611df2f274d..736a6ccaf05e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2916,7 +2916,7 @@ static u32 mlx4_xdp_query(struct net_device *dev) return prog_id; } -static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -2958,7 +2958,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2995,7 +2995,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; struct mlx4_en_bond { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d1c3dc946486..f877f2f5f2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3830,7 +3830,7 @@ static u32 mlx5e_xdp_query(struct net_device *dev) return prog_id; } -static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -3882,7 +3882,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, + .ndo_bpf = mlx5e_xdp, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 2609a2487100..995e95410b11 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -77,17 +77,6 @@ nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return meta->l.prev != &nfp_prog->insns; } -static void nfp_prog_free(struct nfp_prog *nfp_prog) -{ - struct nfp_insn_meta *meta, *tmp; - - list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { - list_del(&meta->l); - kfree(meta); - } - kfree(nfp_prog); -} - static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) { if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { @@ -202,47 +191,6 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) } static void -__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, - u8 byte, bool equal, u16 addr, u8 defer, bool src_lmextn) -{ - u16 addr_lo, addr_hi; - u64 insn; - - addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); - addr_hi = addr != addr_lo; - - insn = OP_BBYTE_BASE | - FIELD_PREP(OP_BB_A_SRC, areg) | - FIELD_PREP(OP_BB_BYTE, byte) | - FIELD_PREP(OP_BB_B_SRC, breg) | - FIELD_PREP(OP_BB_I8, imm8) | - FIELD_PREP(OP_BB_EQ, equal) | - FIELD_PREP(OP_BB_DEFBR, defer) | - FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | - FIELD_PREP(OP_BB_ADDR_HI, addr_hi) | - FIELD_PREP(OP_BB_SRC_LMEXTN, src_lmextn); - - nfp_prog_push(nfp_prog, insn); -} - -static void -emit_br_byte_neq(struct nfp_prog *nfp_prog, - swreg src, u8 imm, u8 byte, u16 addr, u8 defer) -{ - struct nfp_insn_re_regs reg; - int err; - - err = swreg_to_restricted(reg_none(), src, reg_imm(imm), ®, true); - if (err) { - nfp_prog->error = err; - return; - } - - __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, - defer, reg.src_lmextn); -} - -static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, enum immed_shift shift, bool wr_both, @@ -1479,19 +1427,18 @@ static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg dst = reg_both(meta->insn.dst_reg * 2); switch (meta->insn.off) { - case offsetof(struct sk_buff, len): - if (size != FIELD_SIZEOF(struct sk_buff, len)) + case offsetof(struct __sk_buff, len): + if (size != FIELD_SIZEOF(struct __sk_buff, len)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); break; - case offsetof(struct sk_buff, data): - if (size != sizeof(void *)) + case offsetof(struct __sk_buff, data): + if (size != FIELD_SIZEOF(struct __sk_buff, data)) return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; - case offsetof(struct sk_buff, cb) + - offsetof(struct bpf_skb_data_end, data_end): - if (size != sizeof(void *)) + case offsetof(struct __sk_buff, data_end): + if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); @@ -1510,14 +1457,15 @@ static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, { swreg dst = reg_both(meta->insn.dst_reg * 2); - if (size != sizeof(void *)) - return -EINVAL; - switch (meta->insn.off) { - case offsetof(struct xdp_buff, data): + case offsetof(struct xdp_md, data): + if (size != FIELD_SIZEOF(struct xdp_md, data)) + return -EOPNOTSUPP; wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); break; - case offsetof(struct xdp_buff, data_end): + case offsetof(struct xdp_md, data_end): + if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + return -EOPNOTSUPP; emit_alu(nfp_prog, dst, plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); break; @@ -1547,7 +1495,7 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) { if (meta->ptr.type == PTR_TO_CTX) { - if (nfp_prog->act == NN_ACT_XDP) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) return mem_ldx_xdp(nfp_prog, meta, size); else return mem_ldx_skb(nfp_prog, meta, size); @@ -2022,34 +1970,6 @@ static void nfp_intro(struct nfp_prog *nfp_prog) plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); } -static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) -{ - const u8 act2code[] = { - [NN_ACT_TC_DROP] = 0x22, - [NN_ACT_TC_REDIR] = 0x24 - }; - /* Target for aborts */ - nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - wrp_immed(nfp_prog, reg_both(0), 0); - - /* Target for normal exits */ - nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); - /* Legacy TC mode: - * 0 0x11 -> pass, count as stat0 - * -1 drop 0x22 -> drop, count as stat1 - * redir 0x24 -> redir, count as stat1 - * ife mark 0x21 -> pass, count as stat1 - * ife + tx 0x24 -> redir, count as stat1 - */ - emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); - wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); - - emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), - SHF_SC_L_SHF, 16); -} - static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) { /* TC direct-action mode: @@ -2142,17 +2062,15 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) static void nfp_outro(struct nfp_prog *nfp_prog) { - switch (nfp_prog->act) { - case NN_ACT_DIRECT: + switch (nfp_prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: nfp_outro_tc_da(nfp_prog); break; - case NN_ACT_TC_DROP: - case NN_ACT_TC_REDIR: - nfp_outro_tc_legacy(nfp_prog); - break; - case NN_ACT_XDP: + case BPF_PROG_TYPE_XDP: nfp_outro_xdp(nfp_prog); break; + default: + WARN_ON(1); } } @@ -2198,28 +2116,6 @@ static int nfp_translate(struct nfp_prog *nfp_prog) return nfp_fixup_branches(nfp_prog); } -static int -nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, - unsigned int cnt) -{ - unsigned int i; - - for (i = 0; i < cnt; i++) { - struct nfp_insn_meta *meta; - - meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - return -ENOMEM; - - meta->insn = prog[i]; - meta->n = i; - - list_add_tail(&meta->l, &nfp_prog->insns); - } - - return 0; -} - /* --- Optimizations --- */ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) { @@ -2347,66 +2243,20 @@ static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) return 0; } -/** - * nfp_bpf_jit() - translate BPF code into NFP assembly - * @filter: kernel BPF filter struct - * @prog_mem: memory to store assembler instructions - * @act: action attached to this eBPF program - * @prog_start: offset of the first instruction when loaded - * @prog_done: where to jump on exit - * @prog_sz: size of @prog_mem in instructions - * @res: achieved parameters of translation results - */ -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, - enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res) +int nfp_bpf_jit(struct nfp_prog *nfp_prog) { - struct nfp_prog *nfp_prog; int ret; - nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); - if (!nfp_prog) - return -ENOMEM; - - INIT_LIST_HEAD(&nfp_prog->insns); - nfp_prog->act = act; - nfp_prog->start_off = prog_start; - nfp_prog->tgt_done = prog_done; - - ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); - if (ret) - goto out; - - ret = nfp_prog_verify(nfp_prog, filter); - if (ret) - goto out; - ret = nfp_bpf_optimize(nfp_prog); if (ret) - goto out; - - nfp_prog->num_regs = MAX_BPF_REG; - nfp_prog->regs_per_thread = 32; - - nfp_prog->prog = prog_mem; - nfp_prog->__prog_alloc_len = prog_sz; + return ret; ret = nfp_translate(nfp_prog); if (ret) { pr_err("Translation failed with error %d (translated: %u)\n", ret, nfp_prog->n_translated); - ret = -EINVAL; - goto out; + return -EINVAL; } - ret = nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)prog_mem); - - res->n_instr = nfp_prog->prog_len; - res->dense_mode = false; -out: - nfp_prog_free(nfp_prog); - - return ret; + return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 8e3e89cace8d..e379b78e86ef 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -54,28 +54,25 @@ static int nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { - struct tc_cls_bpf_offload cmd = { - .prog = prog, - }; + bool running, xdp_running; int ret; if (!nfp_net_ebpf_capable(nn)) return -EINVAL; - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) { - if (!nn->dp.bpf_offload_xdp) - return prog ? -EBUSY : 0; - cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY; - } else { - if (!prog) - return 0; - cmd.command = TC_CLSBPF_ADD; - } + running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + xdp_running = running && nn->dp.bpf_offload_xdp; + + if (!prog && !xdp_running) + return 0; + if (prog && running && !xdp_running) + return -EBUSY; - ret = nfp_net_bpf_offload(nn, &cmd); + ret = nfp_net_bpf_offload(nn, prog, running); /* Stop offload if replace not possible */ - if (ret && cmd.command == TC_CLSBPF_REPLACE) + if (ret && prog) nfp_bpf_xdp_offload(app, nn, NULL); + nn->dp.bpf_offload_xdp = prog && !ret; return ret; } @@ -85,34 +82,10 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } -static int -nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) -{ - struct nfp_net_bpf_priv *priv; - int ret; - - priv = kmalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - nn->app_priv = priv; - spin_lock_init(&priv->rx_filter_lock); - priv->nn = nn; - timer_setup(&priv->rx_filter_stats_timer, - nfp_net_filter_stats_timer, 0); - - ret = nfp_app_nic_vnic_alloc(app, nn, id); - if (ret) - kfree(priv); - - return ret; -} - static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) { if (nn->dp.bpf_offload_xdp) nfp_bpf_xdp_offload(app, nn, NULL); - kfree(nn->app_priv); } static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, @@ -121,19 +94,29 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; - if (!tc_can_offload(nn->dp.netdev)) + if (type != TC_SETUP_CLSBPF || + !tc_can_offload(nn->dp.netdev) || + !nfp_net_ebpf_capable(nn) || + cls_bpf->common.protocol != htons(ETH_P_ALL) || + cls_bpf->common.chain_index) + return -EOPNOTSUPP; + if (nn->dp.bpf_offload_xdp) + return -EBUSY; + + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + nn_err(nn, "only direct action with no legacy actions supported\n"); return -EOPNOTSUPP; + } - switch (type) { - case TC_SETUP_CLSBPF: - if (!nfp_net_ebpf_capable(nn) || - cls_bpf->common.protocol != htons(ETH_P_ALL) || - cls_bpf->common.chain_index) - return -EOPNOTSUPP; - if (nn->dp.bpf_offload_xdp) - return -EBUSY; - - return nfp_net_bpf_offload(nn, cls_bpf); + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + return nfp_net_bpf_offload(nn, cls_bpf->prog, true); + case TC_CLSBPF_ADD: + return nfp_net_bpf_offload(nn, cls_bpf->prog, false); + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_offload(nn, NULL, true); default: return -EOPNOTSUPP; } @@ -184,10 +167,14 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, - .vnic_alloc = nfp_bpf_vnic_alloc, + .vnic_alloc = nfp_app_nic_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, .xdp_offload = nfp_bpf_xdp_offload, + + .bpf_verifier_prep = nfp_bpf_verifier_prep, + .bpf_translate = nfp_bpf_translate, + .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index bc604030ff6c..082a15f6dfb5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -41,7 +41,6 @@ #include <linux/types.h> #include "../nfp_asm.h" -#include "../nfp_net.h" /* For branch fixup logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! @@ -65,13 +64,6 @@ enum pkt_vec { PKT_VEC_PKT_PTR = 2, }; -enum nfp_bpf_action_type { - NN_ACT_TC_DROP, - NN_ACT_TC_REDIR, - NN_ACT_DIRECT, - NN_ACT_XDP, -}; - #define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) #define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) @@ -147,9 +139,8 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @prog: machine code * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array - * @act: BPF program/action type (TC DA, TC with action, XDP etc.) - * @num_regs: number of registers used by this program - * @regs_per_thread: number of basic registers allocated per thread + * @verifier_meta: temporary storage for verifier's insn meta + * @type: BPF program type * @start_off: address of the first instruction in the memory * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) @@ -164,10 +155,9 @@ struct nfp_prog { unsigned int prog_len; unsigned int __prog_alloc_len; - enum nfp_bpf_action_type act; + struct nfp_insn_meta *verifier_meta; - unsigned int num_regs; - unsigned int regs_per_thread; + enum bpf_prog_type type; unsigned int start_off; unsigned int tgt_out; @@ -182,38 +172,21 @@ struct nfp_prog { struct list_head insns; }; -struct nfp_bpf_result { - unsigned int n_instr; - bool dense_mode; -}; - -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res); +int nfp_bpf_jit(struct nfp_prog *prog); -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); +extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; +struct netdev_bpf; +struct nfp_app; struct nfp_net; -struct tc_cls_bpf_offload; - -/** - * struct nfp_net_bpf_priv - per-vNIC BPF private data - * @rx_filter: Filter offload statistics - dropped packets/bytes - * @rx_filter_prev: Filter offload statistics - values from previous update - * @rx_filter_change: Jiffies when statistics last changed - * @rx_filter_stats_timer: Timer for polling filter offload statistics - * @rx_filter_lock: Lock protecting timer state changes (teardown) - */ -struct nfp_net_bpf_priv { - struct nfp_stat_pair rx_filter, rx_filter_prev; - unsigned long rx_filter_change; - struct timer_list rx_filter_stats_timer; - struct nfp_net *nn; - spinlock_t rx_filter_lock; -}; -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); -void nfp_net_filter_stats_timer(struct timer_list *t); +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog); +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 6d576f631392..b6cee71f49d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,109 +51,114 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" -void nfp_net_filter_stats_timer(struct timer_list *t) +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) { - struct nfp_net_bpf_priv *priv = from_timer(priv, t, - rx_filter_stats_timer); - struct nfp_net *nn = priv->nn; - struct nfp_stat_pair latest; - - spin_lock_bh(&priv->rx_filter_lock); + unsigned int i; - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; - spin_unlock_bh(&priv->rx_filter_lock); + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; - latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + meta->insn = prog[i]; + meta->n = i; - if (latest.pkts != priv->rx_filter.pkts) - priv->rx_filter_change = jiffies; + list_add_tail(&meta->l, &nfp_prog->insns); + } - priv->rx_filter = latest; + return 0; } -static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +static void nfp_prog_free(struct nfp_prog *nfp_prog) { - struct nfp_net_bpf_priv *priv = nn->app_priv; + struct nfp_insn_meta *meta, *tmp; - priv->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - priv->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); - priv->rx_filter_prev = priv->rx_filter; - priv->rx_filter_change = jiffies; + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); } -static int -nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - u64 bytes, pkts; + struct bpf_prog *prog = bpf->verifier.prog; + struct nfp_prog *nfp_prog; + int ret; - pkts = priv->rx_filter.pkts - priv->rx_filter_prev.pkts; - bytes = priv->rx_filter.bytes - priv->rx_filter_prev.bytes; - bytes -= pkts * ETH_HLEN; + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + prog->aux->offload->dev_priv = nfp_prog; - priv->rx_filter_prev = priv->rx_filter; + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->type = prog->type; + + ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); + if (ret) + goto err_free; - tcf_exts_stats_update(cls_bpf->exts, - bytes, pkts, priv->rx_filter_change); + nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); + bpf->verifier.ops = &nfp_bpf_analyzer_ops; return 0; -} -static int -nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) -{ - const struct tc_action *a; - LIST_HEAD(actions); +err_free: + nfp_prog_free(nfp_prog); - if (!cls_bpf->exts) - return NN_ACT_XDP; + return ret; +} - /* TC direct action */ - if (cls_bpf->exts_integrated) { - if (!tcf_exts_has_actions(cls_bpf->exts)) - return NN_ACT_DIRECT; +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int stack_size; + unsigned int max_instr; + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (prog->aux->stack_depth > stack_size) { + nn_info(nn, "stack too large: program %dB > FW stack %dB\n", + prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } - /* TC legacy mode */ - if (!tcf_exts_has_one_action(cls_bpf->exts)) - return -EOPNOTSUPP; + nfp_prog->stack_depth = prog->aux->stack_depth; + nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - tcf_exts_to_list(cls_bpf->exts, &actions); - list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) - return NN_ACT_TC_DROP; + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); - if (is_tcf_mirred_egress_redirect(a) && - tcf_mirred_ifindex(a) == nn->dp.netdev->ifindex) - return NN_ACT_TC_REDIR; - } + nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + if (!nfp_prog->prog) + return -ENOMEM; - return -EOPNOTSUPP; + return nfp_bpf_jit(nfp_prog); } -static int -nfp_net_bpf_offload_prepare(struct nfp_net *nn, - struct tc_cls_bpf_offload *cls_bpf, - struct nfp_bpf_result *res, - void **code, dma_addr_t *dma_addr, u16 max_instr) +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) { - unsigned int code_sz = max_instr * sizeof(u64); - enum nfp_bpf_action_type act; - unsigned int stack_size; - u16 start_off, done_off; - unsigned int max_mtu; - int ret; + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - ret = nfp_net_bpf_get_act(nn, cls_bpf); - if (ret < 0) - return ret; - act = ret; + kfree(nfp_prog->prog); + nfp_prog_free(nfp_prog); + + return 0; +} + +static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int max_mtu; + dma_addr_t dma_addr; + int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { @@ -161,141 +166,80 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, return -EOPNOTSUPP; } - start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; - if (cls_bpf->prog->aux->stack_depth > stack_size) { - nn_info(nn, "stack too large: program %dB > FW stack %dB\n", - cls_bpf->prog->aux->stack_depth, stack_size); - return -EOPNOTSUPP; - } - - *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL); - if (!*code) + dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, dma_addr)) return -ENOMEM; - ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, - max_instr, res); - if (ret) - goto out; + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); - return 0; + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); -out: - dma_free_coherent(nn->dp.dev, code_sz, *code, *dma_addr); - return ret; + dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + + return err; } -static void -nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, - void *code, dma_addr_t dma_addr, - unsigned int code_sz, unsigned int n_instr, - bool dense_mode) +static void nfp_net_bpf_start(struct nfp_net *nn) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - u64 bpf_addr = dma_addr; int err; - nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); - - if (dense_mode) - bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; - - nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); - nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); - - /* Load up the JITed code */ - err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); - if (err) - nn_err(nn, "FW command error while loading BPF: %d\n", err); - /* Enable passing packets through BPF function */ nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF; nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); if (err) nn_err(nn, "FW command error while enabling BPF: %d\n", err); - - dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); - - nfp_net_bpf_stats_reset(nn); - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; - spin_lock_bh(&priv->rx_filter_lock); nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; - spin_unlock_bh(&priv->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); - del_timer_sync(&priv->rx_filter_stats_timer); - nn->dp.bpf_offload_skip_sw = 0; - return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog) { - struct nfp_bpf_result res; - dma_addr_t dma_addr; - u16 max_instr; - void *code; int err; - max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); - - switch (cls_bpf->command) { - case TC_CLSBPF_REPLACE: - /* There is nothing stopping us from implementing seamless - * replace but the simple method of loading I adopted in - * the firmware does not handle atomic replace (i.e. we have to - * stop the BPF offload and re-enable it). Leaking-in a few - * frames which didn't have BPF applied in the hardware should - * be fine if software fallback is available, though. - */ - if (nn->dp.bpf_offload_skip_sw) - return -EBUSY; - - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, - &dma_addr, max_instr); - if (err) - return err; + if (prog && !prog->aux->offload) + return -EINVAL; - nfp_net_bpf_stop(nn); - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); - return 0; + if (prog && old_prog) { + u8 cap; - case TC_CLSBPF_ADD: - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); + if (!(cap & NFP_NET_BPF_CAP_RELO)) { + nn_err(nn, "FW does not support live reload\n"); return -EBUSY; + } + } - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, - &dma_addr, max_instr); - if (err) - return err; + /* Something else is loaded, different program type? */ + if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); - return 0; - - case TC_CLSBPF_DESTROY: + if (old_prog && !prog) return nfp_net_bpf_stop(nn); - case TC_CLSBPF_STATS: - return nfp_net_bpf_stats_update(nn, cls_bpf); + err = nfp_net_bpf_load(nn, prog); + if (err) + return err; - default: - return -EOPNOTSUPP; - } + if (!old_prog) + nfp_net_bpf_start(nn); + + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index a8c7615546a9..8d43491ddd6b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -40,12 +40,6 @@ #include "main.h" -/* Analyzer/verifier definitions */ -struct nfp_bpf_analyzer_priv { - struct nfp_prog *prog; - struct nfp_insn_meta *meta; -}; - static struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -81,7 +75,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; u64 imm; - if (nfp_prog->act == NN_ACT_XDP) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) return 0; if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { @@ -94,13 +88,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } imm = reg0->var_off.value; - if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) { - pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, imm); - return -EINVAL; - } - - if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT && + if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS && + imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", @@ -176,11 +165,11 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { - struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; - struct nfp_insn_meta *meta = priv->meta; + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; - meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); - priv->meta = meta; + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); + nfp_prog->verifier_meta = meta; if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { @@ -189,39 +178,18 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) } if (meta->insn.code == (BPF_JMP | BPF_EXIT)) - return nfp_bpf_check_exit(priv->prog, env); + return nfp_bpf_check_exit(nfp_prog, env); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) - return nfp_bpf_check_ptr(priv->prog, meta, env, + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.src_reg); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) - return nfp_bpf_check_ptr(priv->prog, meta, env, + return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); return 0; } -static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { +const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { .insn_hook = nfp_verify_insn, }; - -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) -{ - struct nfp_bpf_analyzer_priv *priv; - int ret; - - nfp_prog->stack_depth = prog->aux->stack_depth; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->prog = nfp_prog; - priv->meta = nfp_prog_first_meta(nfp_prog); - - ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); - - kfree(priv); - - return ret; -} diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index e46e7c60d491..e0283bb24f06 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -142,8 +142,8 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, { u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); struct nfp_flower_priv *priv = app->priv; - struct nfp_reprs *reprs, *old_reprs; enum nfp_port_type port_type; + struct nfp_reprs *reprs; const u8 queue = 0; int i, err; @@ -194,11 +194,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, reprs->reprs[i]->name); } - old_reprs = nfp_app_reprs_set(app, repr_type, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, repr_type, reprs); return 0; err_reprs_clean: @@ -222,8 +218,8 @@ static int nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) { struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; - struct nfp_reprs *reprs, *old_reprs; struct sk_buff *ctrl_skb; + struct nfp_reprs *reprs; unsigned int i; int err; @@ -280,11 +276,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) phys_port, reprs->reprs[phys_port]->name); } - old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); /* The MAC_REPR control message should be sent after the MAC * representors are registered using nfp_app_reprs_set(). This is diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 3644d74fe304..955a9f44d244 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -106,14 +106,8 @@ nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, old = rcu_dereference_protected(app->reprs[type], lockdep_is_held(&app->pf->lock)); - if (reprs && old) { - old = ERR_PTR(-EBUSY); - goto exit_unlock; - } - rcu_assign_pointer(app->reprs[type], reprs); -exit_unlock: return old; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 857bb33020ba..54b67c9b8d5b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -42,6 +42,7 @@ struct bpf_prog; struct net_device; +struct netdev_bpf; struct pci_dev; struct sk_buff; struct sk_buff; @@ -83,6 +84,9 @@ extern const struct nfp_app_type app_flower; * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) * @xdp_offload: offload an XDP program + * @bpf_verifier_prep: verifier prep for dev-specific BPF programs + * @bpf_translate: translate call for dev-specific BPF programs + * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -118,6 +122,12 @@ struct nfp_app_type { bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); + int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); + int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); + int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -271,6 +281,33 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } +static inline int +nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf_verifier_prep) + return -EOPNOTSUPP; + return app->type->bpf_verifier_prep(app, nn, bpf); +} + +static inline int +nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_translate) + return -EOPNOTSUPP; + return app->type->bpf_translate(app, nn, prog); +} + +static inline int +nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_destroy) + return -EOPNOTSUPP; + return app->type->bpf_destroy(app, nn, prog); +} + static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index f8fa63b66739..35eaccbece36 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -346,6 +346,32 @@ exit_release_fw: return err < 0 ? err : 1; } +static void +nfp_nsp_init_ports(struct pci_dev *pdev, struct nfp_pf *pf, + struct nfp_nsp *nsp) +{ + bool needs_reinit = false; + int i; + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + if (!pf->eth_tbl) + return; + + if (!nfp_nsp_has_mac_reinit(nsp)) + return; + + for (i = 0; i < pf->eth_tbl->count; i++) + needs_reinit |= pf->eth_tbl->ports[i].override_changed; + if (!needs_reinit) + return; + + kfree(pf->eth_tbl); + if (nfp_nsp_mac_reinit(nsp)) + dev_warn(&pdev->dev, "MAC reinit failed\n"); + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); +} + static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) { struct nfp_nsp *nsp; @@ -366,7 +392,7 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) if (err < 0) goto exit_close_nsp; - pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + nfp_nsp_init_ports(pdev, pf, nsp); pf->nspi = __nfp_nsp_identify(nsp); if (pf->nspi) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 3d411f0d15b6..7f9857c276b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -476,7 +476,6 @@ struct nfp_stat_pair { * @dev: Backpointer to struct device * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? - * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @bpf_offload_xdp: Offloaded BPF program is XDP * @chained_metadata_format: Firemware will use new metadata format * @rx_dma_dir: Mapping direction for RX buffers @@ -502,7 +501,6 @@ struct nfp_net_dp { struct net_device *netdev; u8 is_vf:1; - u8 bpf_offload_skip_sw:1; u8 bpf_offload_xdp:1; u8 chained_metadata_format:1; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 185a3dd35a3f..232044b1b7aa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3378,7 +3378,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, return 0; } -static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nfp_net *nn = netdev_priv(netdev); @@ -3393,6 +3393,14 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; return 0; + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); + case BPF_OFFLOAD_TRANSLATE: + return nfp_app_bpf_translate(nn->app, nn, + xdp->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_app_bpf_destroy(nn->app, nn, + xdp->offload.prog); default: return -EINVAL; } @@ -3441,7 +3449,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_get_phys_port_name = nfp_port_get_phys_port_name, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, - .ndo_xdp = nfp_net_xdp, + .ndo_bpf = nfp_net_xdp, }; /** diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index c67b90c8d8b7..60c8d733a37d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -244,6 +244,30 @@ nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } +static void +nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *c) +{ + unsigned int modes; + + ethtool_link_ksettings_add_link_mode(c, supported, FEC_NONE); + if (!nfp_eth_can_support_fec(eth_port)) { + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_NONE); + return; + } + + modes = nfp_eth_supported_fec_modes(eth_port); + if (modes & NFP_FEC_BASER) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_BASER); + } + + if (modes & NFP_FEC_REED_SOLOMON) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_RS); + } +} + /** * nfp_net_get_link_ksettings - Get Link Speed settings * @netdev: network interface device structure @@ -278,9 +302,11 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); - if (eth_port) + if (eth_port) { cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; + nfp_net_set_fec_link_mode(eth_port, cmd); + } if (!netif_carrier_ok(netdev)) return 0; @@ -328,7 +354,7 @@ nfp_net_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; if (netif_running(netdev)) { - netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until reboot.\n"); + netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until driver reload.\n"); return -EBUSY; } @@ -686,6 +712,91 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) } } +static int nfp_port_fec_ethtool_to_nsp(u32 fec) +{ + switch (fec) { + case ETHTOOL_FEC_AUTO: + return NFP_FEC_AUTO_BIT; + case ETHTOOL_FEC_OFF: + return NFP_FEC_DISABLED_BIT; + case ETHTOOL_FEC_RS: + return NFP_FEC_REED_SOLOMON_BIT; + case ETHTOOL_FEC_BASER: + return NFP_FEC_BASER_BIT; + default: + /* NSP only supports a single mode at a time */ + return -EOPNOTSUPP; + } +} + +static u32 nfp_port_fec_nsp_to_ethtool(u32 fec) +{ + u32 result = 0; + + if (fec & NFP_FEC_AUTO) + result |= ETHTOOL_FEC_AUTO; + if (fec & NFP_FEC_BASER) + result |= ETHTOOL_FEC_BASER; + if (fec & NFP_FEC_REED_SOLOMON) + result |= ETHTOOL_FEC_RS; + if (fec & NFP_FEC_DISABLED) + result |= ETHTOOL_FEC_OFF; + + return result ?: ETHTOOL_FEC_NONE; +} + +static int +nfp_port_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + param->active_fec = ETHTOOL_FEC_NONE_BIT; + param->fec = ETHTOOL_FEC_NONE_BIT; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return 0; + + param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported); + param->active_fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec); + + return 0; +} + +static int +nfp_port_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err, fec; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return -EOPNOTSUPP; + + fec = nfp_port_fec_ethtool_to_nsp(param->fec); + if (fec < 0) + return fec; + + err = nfp_eth_set_fec(port->app->cpp, eth_port->index, fec); + if (!err) + /* Only refresh if we did something */ + nfp_net_refresh_port_table(port); + + return err < 0 ? err : 0; +} + /* RX network flow classification (RSS, filters, etc) */ static u32 ethtool_flow_to_nfp_flag(u32 flow_type) @@ -1144,6 +1255,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_channels = nfp_net_set_channels, .get_link_ksettings = nfp_net_get_link_ksettings, .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; const struct ethtool_ops nfp_port_ethtool_ops = { @@ -1155,6 +1268,10 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .set_dump = nfp_app_set_dump, .get_dump_flag = nfp_app_get_dump_flag, .get_dump_data = nfp_app_get_dump_data, + .get_link_ksettings = nfp_net_get_link_ksettings, + .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index ff373acd28f3..c505014121c4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -597,7 +597,7 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, return -EIO; } if (eth_port->override_changed) { - nfp_warn(cpp, "Port #%d config changed, unregistering. Reboot required before port will be operational again.\n", port->eth_id); + nfp_warn(cpp, "Port #%d config changed, unregistering. Driver reload required before port will be operational again.\n", port->eth_id); port->type = NFP_PORT_INVALID; } @@ -611,6 +611,7 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) struct nfp_eth_table *eth_table; struct nfp_net *nn, *next; struct nfp_port *port; + int err; lockdep_assert_held(&pf->lock); @@ -640,6 +641,11 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) kfree(eth_table); + /* Resync repr state. This may cause reprs to be removed. */ + err = nfp_reprs_resync_phys_ports(pf->app); + if (err) + return err; + /* Shoot off the ports which became invalid */ list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { if (!nn->port || nn->port->type != NFP_PORT_INVALID) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index d540a9dc77b3..1bce8c131bb9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -390,3 +390,50 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) return reprs; } + +int nfp_reprs_resync_phys_ports(struct nfp_app *app) +{ + struct nfp_reprs *reprs, *old_reprs; + struct nfp_repr *repr; + int i; + + old_reprs = + rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT], + lockdep_is_held(&app->pf->lock)); + if (!old_reprs) + return 0; + + reprs = nfp_reprs_alloc(old_reprs->num_reprs); + if (!reprs) + return -ENOMEM; + + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type == NFP_PORT_INVALID) + continue; + + reprs->reprs[i] = old_reprs->reprs[i]; + } + + old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); + synchronize_rcu(); + + /* Now we free up removed representors */ + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type != NFP_PORT_INVALID) + continue; + + nfp_app_repr_stop(app, repr); + nfp_repr_clean(repr); + } + + kfree(old_reprs); + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 32179cad062a..5d4d897bc9c6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -124,5 +124,6 @@ void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type); struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs); +int nfp_reprs_resync_phys_ports(struct nfp_app *app); #endif /* NFP_NET_REPR_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 37364555c42b..14a6d1ba51a9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -477,6 +477,11 @@ int nfp_nsp_device_soft_reset(struct nfp_nsp *state) return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0); } +int nfp_nsp_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0); +} + int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw) { return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, fw->size, fw->data, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index e2f028027c6f..650ca1a5bd21 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -48,6 +48,12 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state); int nfp_nsp_wait(struct nfp_nsp *state); int nfp_nsp_device_soft_reset(struct nfp_nsp *state); int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw); +int nfp_nsp_mac_reinit(struct nfp_nsp *state); + +static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 20; +} enum nfp_eth_interface { NFP_INTERFACE_NONE = 0, @@ -73,6 +79,18 @@ enum nfp_eth_aneg { NFP_ANEG_DISABLED, }; +enum nfp_eth_fec { + NFP_FEC_AUTO_BIT = 0, + NFP_FEC_BASER_BIT, + NFP_FEC_REED_SOLOMON_BIT, + NFP_FEC_DISABLED_BIT, +}; + +#define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) +#define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) +#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) +#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT) + /** * struct nfp_eth_table - ETH table information * @count: number of table entries @@ -87,6 +105,7 @@ enum nfp_eth_aneg { * @speed: interface speed (in Mbps) * @interface: interface (module) plugged in * @media: media type of the @interface + * @fec: forward error correction mode * @aneg: auto negotiation mode * @mac_addr: interface MAC address * @label_port: port id @@ -99,6 +118,7 @@ enum nfp_eth_aneg { * @port_type: one of %PORT_* defines for ethtool * @port_lanes: total number of lanes on the port (sum of lanes of all subports) * @is_split: is interface part of a split port + * @fec_modes_supported: bitmap of FEC modes supported */ struct nfp_eth_table { unsigned int count; @@ -114,6 +134,7 @@ struct nfp_eth_table { unsigned int interface; enum nfp_eth_media media; + enum nfp_eth_fec fec; enum nfp_eth_aneg aneg; u8 mac_addr[ETH_ALEN]; @@ -133,6 +154,8 @@ struct nfp_eth_table { unsigned int port_lanes; bool is_split; + + unsigned int fec_modes_supported; } ports[0]; }; @@ -143,6 +166,19 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp); int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable); int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed); +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); + +static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) +{ + return !!eth_port->fec_modes_supported; +} + +static inline unsigned int +nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port) +{ + return eth_port->fec_modes_supported; +} struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx); int nfp_eth_config_commit_end(struct nfp_nsp *nsp); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index f6f7c085f8e0..7ca589660e4d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -55,6 +55,8 @@ #define NSP_ETH_PORT_INDEX GENMASK_ULL(15, 8) #define NSP_ETH_PORT_LABEL GENMASK_ULL(53, 48) #define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) +#define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) +#define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) #define NSP_ETH_PORT_LANES_MASK cpu_to_le64(NSP_ETH_PORT_LANES) @@ -67,6 +69,7 @@ #define NSP_ETH_STATE_MEDIA GENMASK_ULL(21, 20) #define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) #define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) +#define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) #define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) #define NSP_ETH_CTRL_ENABLED BIT_ULL(1) @@ -75,6 +78,7 @@ #define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) #define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) #define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) +#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) enum nfp_eth_raw { NSP_ETH_RAW_PORT = 0, @@ -152,6 +156,7 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, unsigned int index, struct nfp_eth_table_port *dst) { unsigned int rate; + unsigned int fec; u64 port, state; port = le64_to_cpu(src->port); @@ -183,6 +188,18 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state); dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state); + + if (nfp_nsp_get_abi_ver_minor(nsp) < 22) + return; + + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port); + dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT; + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port); + dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT; + if (dst->fec_modes_supported) + dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; + + dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state); } static void @@ -469,10 +486,10 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) return nfp_eth_config_commit_end(nsp); } -/* Force inline, FIELD_* macroes require masks to be compilation-time known */ -static __always_inline int +static int nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, - const u64 mask, unsigned int val, const u64 ctrl_bit) + const u64 mask, const unsigned int shift, + unsigned int val, const u64 ctrl_bit) { union eth_table_entry *entries = nfp_nsp_config_entries(nsp); unsigned int idx = nfp_nsp_config_idx(nsp); @@ -489,11 +506,11 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, /* Check if we are already in requested state */ reg = le64_to_cpu(entries[idx].raw[raw_idx]); - if (val == FIELD_GET(mask, reg)) + if (val == (reg & mask) >> shift) return 0; reg &= ~mask; - reg |= FIELD_PREP(mask, val); + reg |= (val << shift) & mask; entries[idx].raw[raw_idx] = cpu_to_le64(reg); entries[idx].control |= cpu_to_le64(ctrl_bit); @@ -503,6 +520,13 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, return 0; } +#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ + ({ \ + __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ + nfp_eth_set_bit_config(nsp, raw_idx, mask, __bf_shf(mask), \ + val, ctrl_bit); \ + }) + /** * __nfp_eth_set_aneg() - set PHY autonegotiation control bit * @nsp: NFP NSP handle returned from nfp_eth_config_start() @@ -515,12 +539,59 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, */ int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_ANEG, mode, NSP_ETH_CTRL_SET_ANEG); } /** + * __nfp_eth_set_fec() - set PHY forward error correction control bit + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @mode: Desired fec mode + * + * Set the PHY module forward error correction mode. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +static int __nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_FEC, mode, + NSP_ETH_CTRL_SET_FEC); +} + +/** + * nfp_eth_set_fec() - set PHY forward error correction control mode + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @mode: Desired fec mode + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode) +{ + struct nfp_nsp *nsp; + int err; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + err = __nfp_eth_set_fec(nsp, mode); + if (err) { + nfp_eth_config_cleanup_end(nsp); + return err; + } + + return nfp_eth_config_commit_end(nsp); +} + +/** * __nfp_eth_set_speed() - set interface speed/rate * @nsp: NFP NSP handle returned from nfp_eth_config_start() * @speed: Desired speed (per lane) @@ -544,7 +615,7 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) return -EINVAL; } - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_RATE, rate, NSP_ETH_CTRL_SET_RATE); } @@ -561,6 +632,6 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) */ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a3a70ade411f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -503,7 +503,7 @@ void qede_fill_rss_params(struct qede_dev *edev, void qede_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti); void qede_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti); -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp); #ifdef CONFIG_DCB void qede_set_dcbnl_ops(struct net_device *ndev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f79e36e4060a..c1a0708a7d7c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1065,7 +1065,7 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog) return 0; } -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct qede_dev *edev = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e5ee9f274a71..8f9b3eb82137 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -556,7 +556,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = qede_rx_flow_steer, #endif @@ -594,7 +594,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, }; /* ------------------------------------------------------------------------- diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 3c55ea357f35..feb92ecd1880 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -502,6 +502,7 @@ plip_receive(unsigned short nibble_timeout, struct net_device *dev, *data_p = (c0 >> 3) & 0x0f; write_data (dev, 0x10); /* send ACK */ *ns_p = PLIP_NB_1; + /* fall through */ case PLIP_NB_1: cx = nibble_timeout; @@ -597,6 +598,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, printk(KERN_DEBUG "%s: receive start\n", dev->name); rcv->state = PLIP_PK_LENGTH_LSB; rcv->nibble = PLIP_NB_BEGIN; + /* fall through */ case PLIP_PK_LENGTH_LSB: if (snd->state != PLIP_PK_DONE) { @@ -617,6 +619,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, return TIMEOUT; } rcv->state = PLIP_PK_LENGTH_MSB; + /* fall through */ case PLIP_PK_LENGTH_MSB: if (plip_receive(nibble_timeout, dev, @@ -639,6 +642,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, rcv->state = PLIP_PK_DATA; rcv->byte = 0; rcv->checksum = 0; + /* fall through */ case PLIP_PK_DATA: lbuf = rcv->skb->data; @@ -651,6 +655,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, rcv->checksum += lbuf[--rcv->byte]; } while (rcv->byte); rcv->state = PLIP_PK_CHECKSUM; + /* fall through */ case PLIP_PK_CHECKSUM: if (plip_receive(nibble_timeout, dev, @@ -663,6 +668,7 @@ plip_receive_packet(struct net_device *dev, struct net_local *nl, return ERROR; } rcv->state = PLIP_PK_DONE; + /* fall through */ case PLIP_PK_DONE: /* Inform the upper layer for the arrival of a packet. */ @@ -708,6 +714,7 @@ plip_send(unsigned short nibble_timeout, struct net_device *dev, case PLIP_NB_BEGIN: write_data (dev, data & 0x0f); *ns_p = PLIP_NB_1; + /* fall through */ case PLIP_NB_1: write_data (dev, 0x10 | (data & 0x0f)); @@ -722,6 +729,7 @@ plip_send(unsigned short nibble_timeout, struct net_device *dev, } write_data (dev, 0x10 | (data >> 4)); *ns_p = PLIP_NB_2; + /* fall through */ case PLIP_NB_2: write_data (dev, (data >> 4)); @@ -810,6 +818,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, &snd->nibble, snd->length.b.lsb)) return TIMEOUT; snd->state = PLIP_PK_LENGTH_MSB; + /* fall through */ case PLIP_PK_LENGTH_MSB: if (plip_send(nibble_timeout, dev, @@ -818,6 +827,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, snd->state = PLIP_PK_DATA; snd->byte = 0; snd->checksum = 0; + /* fall through */ case PLIP_PK_DATA: do { @@ -829,6 +839,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, snd->checksum += lbuf[--snd->byte]; } while (snd->byte); snd->state = PLIP_PK_CHECKSUM; + /* fall through */ case PLIP_PK_CHECKSUM: if (plip_send(nibble_timeout, dev, @@ -839,6 +850,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, dev_kfree_skb(snd->skb); dev->stats.tx_packets++; snd->state = PLIP_PK_DONE; + /* fall through */ case PLIP_PK_DONE: /* Close the connection */ @@ -927,6 +939,7 @@ plip_interrupt(void *dev_id) switch (nl->connection) { case PLIP_CN_CLOSING: netif_wake_queue (dev); + /* fall through */ case PLIP_CN_NONE: case PLIP_CN_SEND: rcv->state = PLIP_PK_TRIGGER; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8125956f62a1..1a326b697221 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1141,7 +1141,7 @@ static u32 tun_xdp_query(struct net_device *dev) return 0; } -static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -1185,7 +1185,7 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, - .ndo_xdp = tun_xdp, + .ndo_bpf = tun_xdp, }; static void tun_flow_init(struct tun_struct *tun) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fc059f193e7d..edf984406ba0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2088,7 +2088,7 @@ static u32 virtnet_xdp_query(struct net_device *dev) return 0; } -static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -2115,7 +2115,7 @@ static const struct net_device_ops virtnet_netdev = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif - .ndo_xdp = virtnet_xdp, + .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 520aeebe0d93..c397934f91dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -15,6 +15,7 @@ #include <linux/err.h> #include <linux/rbtree_latch.h> #include <linux/numa.h> +#include <linux/wait.h> struct perf_event; struct bpf_prog; @@ -182,6 +183,16 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); }; +struct bpf_dev_offload { + struct bpf_prog *prog; + struct net_device *netdev; + void *dev_priv; + struct list_head offloads; + bool dev_state; + bool verifier_running; + wait_queue_head_t verifier_done; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -199,6 +210,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_dev_offload *offload; union { struct work_struct work; struct rcu_head rcu; @@ -317,11 +329,14 @@ extern const struct file_operations bpf_prog_fops; #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, + struct net_device *netdev); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -415,6 +430,14 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, + enum bpf_prog_type type, + struct net_device *netdev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i) { @@ -491,6 +514,30 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } #endif /* CONFIG_BPF_SYSCALL */ +int bpf_prog_offload_compile(struct bpf_prog *prog); +void bpf_prog_offload_destroy(struct bpf_prog *prog); +u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); + +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return aux->offload; +} +#else +static inline int bpf_prog_offload_init(struct bpf_prog *prog, + union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return false; +} +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ + #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3b0976aaac75..07b96aaca256 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -152,8 +152,7 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ - void *analyzer_priv; /* pointer to external analyzer's private data */ + const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -169,7 +168,13 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return env->cur_state->regs; } -int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, - void *priv); +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); +#else +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +{ + return -EOPNOTSUPP; +} +#endif #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7de7656550c2..fda527ccb263 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,10 +779,10 @@ enum tc_setup_type { TC_SETUP_CBS, }; -/* These structures hold the attributes of xdp state that are being passed - * to the netdevice through the xdp op. +/* These structures hold the attributes of bpf state that are being passed + * to the netdevice through the bpf op. */ -enum xdp_netdev_command { +enum bpf_netdev_command { /* Set or clear a bpf program used in the earliest stages of packet * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee * is responsible for calling bpf_prog_put on any old progs that are @@ -797,12 +797,17 @@ enum xdp_netdev_command { * is equivalent to XDP_ATTACHED_DRV. */ XDP_QUERY_PROG, + /* BPF program for offload callbacks, invoked at program load time. */ + BPF_OFFLOAD_VERIFIER_PREP, + BPF_OFFLOAD_TRANSLATE, + BPF_OFFLOAD_DESTROY, }; +struct bpf_ext_analyzer_ops; struct netlink_ext_ack; -struct netdev_xdp { - enum xdp_netdev_command command; +struct netdev_bpf { + enum bpf_netdev_command command; union { /* XDP_SETUP_PROG */ struct { @@ -815,6 +820,15 @@ struct netdev_xdp { u8 prog_attached; u32 prog_id; }; + /* BPF_OFFLOAD_VERIFIER_PREP */ + struct { + struct bpf_prog *prog; + const struct bpf_ext_analyzer_ops *ops; /* callee set */ + } verifier; + /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ + struct { + struct bpf_prog *prog; + } offload; }; }; @@ -1124,9 +1138,10 @@ struct dev_ifalias { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. - * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); * This function is used to set or query state related to XDP on the - * netdevice. See definition of enum xdp_netdev_command for details. + * netdevice and manage BPF offload. See definition of + * enum bpf_netdev_command for details. * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. @@ -1315,8 +1330,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); - int (*ndo_xdp)(struct net_device *dev, - struct netdev_xdp *xdp); + int (*ndo_bpf)(struct net_device *dev, + struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); void (*ndo_xdp_flush)(struct net_device *dev); @@ -3311,10 +3326,10 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c431385b272..22f40c96a15b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -210,8 +210,13 @@ struct tcp_sock { u64 mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ u32 end_seq; /* Ending TCP sequence of the skb */ - u8 advanced; /* mstamp advanced since last lost marking */ - u8 reord; /* reordering detected */ + u32 last_delivered; /* tp->delivered at last reo_wnd adj */ + u8 reo_wnd_steps; /* Allowed reordering window */ +#define TCP_RACK_RECOVERY_THRESH 16 + u8 reo_wnd_persist:5, /* No. of recovery since last adj */ + dsack_seen:1, /* Whether DSACK seen after last adj */ + advanced:1, /* mstamp advanced since last lost marking */ + reord:1; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ u32 chrono_start; /* Start time in jiffies of a TCP chrono */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 50e276dc4c01..e54332968417 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,7 +116,7 @@ struct dsa_switch_tree { struct raw_notifier_head nh; /* Tree identifier */ - u32 tree; + unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; @@ -209,7 +209,7 @@ struct dsa_switch { * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; - int index; + unsigned int index; /* Listener for switch fabric events */ struct notifier_block nb; diff --git a/include/net/tcp.h b/include/net/tcp.h index c2bf2a822b10..babfd4da1515 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -246,6 +246,7 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ +#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -1901,6 +1902,7 @@ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9820677c2ff..4455dd195201 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -260,6 +260,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -894,6 +895,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -907,6 +912,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b3cf5639ac8f..19fc02660e0c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -160,6 +160,7 @@ enum { IFLA_XDP, IFLA_EVENT, IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, __IFLA_MAX }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ffe397daad49..501e4c4e2a03 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -258,6 +258,8 @@ enum ovs_vport_attr { /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ OVS_VPORT_ATTR_PAD, + OVS_VPORT_ATTR_IFINDEX, + OVS_VPORT_ATTR_NETNSID, __OVS_VPORT_ATTR_MAX }; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 16e95c8e749e..e691da0b3bab 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_BPF_SYSCALL) += disasm.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o +obj-$(CONFIG_BPF_SYSCALL) += offload.o ifeq ($(CONFIG_STREAM_PARSER),y) obj-$(CONFIG_BPF_SYSCALL) += sockmap.o endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7fe448799d76..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1380,7 +1380,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * valid program, which in this case would simply not * be JITed, but falls back to the interpreter. */ - fp = bpf_int_jit_compile(fp); + if (!bpf_prog_is_dev_bound(fp->aux)) { + fp = bpf_int_jit_compile(fp); + } else { + *err = bpf_prog_offload_compile(fp); + if (*err) + return fp; + } bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at @@ -1549,6 +1555,8 @@ static void bpf_prog_free_deferred(struct work_struct *work) struct bpf_prog_aux *aux; aux = container_of(work, struct bpf_prog_aux, work); + if (bpf_prog_is_dev_bound(aux)) + bpf_prog_offload_destroy(aux->prog); bpf_jit_free(aux->prog); } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c new file mode 100644 index 000000000000..2816feb38be1 --- /dev/null +++ b/kernel/bpf/offload.c @@ -0,0 +1,194 @@ +#include <linux/bpf.h> +#include <linux/bpf_verifier.h> +#include <linux/bug.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/printk.h> +#include <linux/rtnetlink.h> + +/* protected by RTNL */ +static LIST_HEAD(bpf_prog_offload_devs); + +int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_dev_offload *offload; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (attr->prog_flags) + return -EINVAL; + + offload = kzalloc(sizeof(*offload), GFP_USER); + if (!offload) + return -ENOMEM; + + offload->prog = prog; + init_waitqueue_head(&offload->verifier_done); + + rtnl_lock(); + offload->netdev = __dev_get_by_index(net, attr->prog_target_ifindex); + if (!offload->netdev) { + rtnl_unlock(); + kfree(offload); + return -EINVAL; + } + + prog->aux->offload = offload; + list_add_tail(&offload->offloads, &bpf_prog_offload_devs); + rtnl_unlock(); + + return 0; +} + +static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, + struct netdev_bpf *data) +{ + struct net_device *netdev = prog->aux->offload->netdev; + + ASSERT_RTNL(); + + if (!netdev) + return -ENODEV; + if (!netdev->netdev_ops->ndo_bpf) + return -EOPNOTSUPP; + + data->command = cmd; + + return netdev->netdev_ops->ndo_bpf(netdev, data); +} + +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +{ + struct netdev_bpf data = {}; + int err; + + data.verifier.prog = env->prog; + + rtnl_lock(); + err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data); + if (err) + goto exit_unlock; + + env->dev_ops = data.verifier.ops; + + env->prog->aux->offload->dev_state = true; + env->prog->aux->offload->verifier_running = true; +exit_unlock: + rtnl_unlock(); + return err; +} + +static void __bpf_prog_offload_destroy(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + struct netdev_bpf data = {}; + + data.offload.prog = prog; + + if (offload->verifier_running) + wait_event(offload->verifier_done, !offload->verifier_running); + + if (offload->dev_state) + WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); + + offload->dev_state = false; + list_del_init(&offload->offloads); + offload->netdev = NULL; +} + +void bpf_prog_offload_destroy(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + + offload->verifier_running = false; + wake_up(&offload->verifier_done); + + rtnl_lock(); + __bpf_prog_offload_destroy(prog); + rtnl_unlock(); + + kfree(offload); +} + +static int bpf_prog_offload_translate(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + struct netdev_bpf data = {}; + int ret; + + data.offload.prog = prog; + + offload->verifier_running = false; + wake_up(&offload->verifier_done); + + rtnl_lock(); + ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); + rtnl_unlock(); + + return ret; +} + +static unsigned int bpf_prog_warn_on_exec(const void *ctx, + const struct bpf_insn *insn) +{ + WARN(1, "attempt to execute device eBPF program on the host!"); + return 0; +} + +int bpf_prog_offload_compile(struct bpf_prog *prog) +{ + prog->bpf_func = bpf_prog_warn_on_exec; + + return bpf_prog_offload_translate(prog); +} + +u32 bpf_prog_offload_ifindex(struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + u32 ifindex; + + rtnl_lock(); + ifindex = offload->netdev ? offload->netdev->ifindex : 0; + rtnl_unlock(); + + return ifindex; +} + +const struct bpf_prog_ops bpf_offload_prog_ops = { +}; + +static int bpf_offload_notification(struct notifier_block *notifier, + ulong event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct bpf_dev_offload *offload, *tmp; + + ASSERT_RTNL(); + + switch (event) { + case NETDEV_UNREGISTER: + list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, + offloads) { + if (offload->netdev == netdev) + __bpf_prog_offload_destroy(offload->prog); + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block bpf_offload_notifier = { + .notifier_call = bpf_offload_notification, +}; + +static int __init bpf_offload_init(void) +{ + register_netdevice_notifier(&bpf_offload_notifier); + return 0; +} + +subsys_initcall(bpf_offload_init); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 323be2473c4b..416d70cdfc76 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -824,7 +824,10 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) return -EINVAL; - prog->aux->ops = bpf_prog_types[type]; + if (!bpf_prog_is_dev_bound(prog->aux)) + prog->aux->ops = bpf_prog_types[type]; + else + prog->aux->ops = &bpf_offload_prog_ops; prog->type = type; return 0; } @@ -1054,7 +1057,22 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) +static bool bpf_prog_can_attach(struct bpf_prog *prog, + enum bpf_prog_type *attach_type, + struct net_device *netdev) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + + if (prog->type != *attach_type) + return false; + if (offload && offload->netdev != netdev) + return false; + + return true; +} + +static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, + struct net_device *netdev) { struct fd f = fdget(ufd); struct bpf_prog *prog; @@ -1062,7 +1080,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) prog = ____bpf_prog_get(f); if (IS_ERR(prog)) return prog; - if (type && prog->type != *type) { + if (attach_type && !bpf_prog_can_attach(prog, attach_type, netdev)) { prog = ERR_PTR(-EINVAL); goto out; } @@ -1075,12 +1093,12 @@ out: struct bpf_prog *bpf_prog_get(u32 ufd) { - return __bpf_prog_get(ufd, NULL); + return __bpf_prog_get(ufd, NULL, NULL); } struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, NULL); if (!IS_ERR(prog)) trace_bpf_prog_get_type(prog); @@ -1088,8 +1106,19 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) } EXPORT_SYMBOL_GPL(bpf_prog_get_type); +struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, + struct net_device *netdev) +{ + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, netdev); + + if (!IS_ERR(prog)) + trace_bpf_prog_get_type(prog); + return prog; +} +EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); + /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_name +#define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex static int bpf_prog_load(union bpf_attr *attr) { @@ -1152,6 +1181,12 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; + if (attr->prog_target_ifindex) { + err = bpf_prog_offload_init(prog, attr); + if (err) + goto free_prog; + } + /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) @@ -1583,6 +1618,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } + if (bpf_prog_is_dev_bound(prog->aux)) { + info.status |= BPF_PROG_STATUS_DEV_BOUND; + info.ifindex = bpf_prog_offload_ifindex(prog); + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 04357ad5a812..add845fe788a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -949,9 +949,6 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (env->analyzer_ops) - return 0; - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) @@ -3736,10 +3733,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { - if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) - return 0; + if (env->dev_ops && env->dev_ops->insn_hook) + return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); - return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); + return 0; } static int do_check(struct bpf_verifier_env *env) @@ -4516,6 +4513,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; + if (env->prog->aux->offload) { + ret = bpf_prog_offload_verifier_prep(env); + if (ret) + goto err_unlock; + } + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; @@ -4592,72 +4595,3 @@ err_free_env: kfree(env); return ret; } - -static const struct bpf_verifier_ops * const bpf_analyzer_ops[] = { -#ifdef CONFIG_NET - [BPF_PROG_TYPE_XDP] = &xdp_analyzer_ops, - [BPF_PROG_TYPE_SCHED_CLS] = &tc_cls_act_analyzer_ops, -#endif -}; - -int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, - void *priv) -{ - struct bpf_verifier_env *env; - int ret; - - if (prog->type >= ARRAY_SIZE(bpf_analyzer_ops) || - !bpf_analyzer_ops[prog->type]) - return -EOPNOTSUPP; - - env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); - if (!env) - return -ENOMEM; - - env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * - prog->len); - ret = -ENOMEM; - if (!env->insn_aux_data) - goto err_free_env; - env->prog = prog; - env->ops = bpf_analyzer_ops[env->prog->type]; - env->analyzer_ops = ops; - env->analyzer_priv = priv; - - /* grab the mutex to protect few globals used by verifier */ - mutex_lock(&bpf_verifier_lock); - - env->strict_alignment = false; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - env->strict_alignment = true; - - env->explored_states = kcalloc(env->prog->len, - sizeof(struct bpf_verifier_state_list *), - GFP_KERNEL); - ret = -ENOMEM; - if (!env->explored_states) - goto skip_full_check; - - ret = check_cfg(env); - if (ret < 0) - goto skip_full_check; - - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - - ret = do_check(env); - if (env->cur_state) { - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; - } - -skip_full_check: - while (!pop_stack(env, NULL, NULL)); - free_states(env); - - mutex_unlock(&bpf_verifier_lock); - vfree(env->insn_aux_data); -err_free_env: - kfree(env); - return ret; -} -EXPORT_SYMBOL_GPL(bpf_analyzer); diff --git a/net/core/dev.c b/net/core/dev.c index 1423cf4d695c..30b5fe32c525 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4545,7 +4545,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) +static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); struct bpf_prog *new = xdp->prog; @@ -7090,26 +7090,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); xdp.command = XDP_QUERY_PROG; /* Query must always succeed. */ - WARN_ON(xdp_op(dev, &xdp) < 0); + WARN_ON(bpf_op(dev, &xdp) < 0); if (prog_id) *prog_id = xdp.prog_id; return xdp.prog_attached; } -static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, +static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -7120,7 +7120,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, xdp.flags = flags; xdp.prog = prog; - return xdp_op(dev, &xdp); + return bpf_op(dev, &xdp); } /** @@ -7137,32 +7137,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, { const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - xdp_op_t xdp_op, xdp_chk; + bpf_op_t bpf_op, bpf_chk; int err; ASSERT_RTNL(); - xdp_op = xdp_chk = ops->ndo_xdp; - if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + bpf_op = bpf_chk = ops->ndo_bpf; + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) return -EOPNOTSUPP; - if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) - xdp_op = generic_xdp_install; - if (xdp_op == xdp_chk) - xdp_chk = generic_xdp_install; + if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) + bpf_op = generic_xdp_install; + if (bpf_op == bpf_chk) + bpf_chk = generic_xdp_install; if (fd >= 0) { - if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) + if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, xdp_op, NULL)) + __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (bpf_op == ops->ndo_bpf) + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + dev); + else + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - err = dev_xdp_install(dev, xdp_op, extack, flags, prog); + err = dev_xdp_install(dev, bpf_op, extack, flags, prog); if (err < 0 && prog) bpf_prog_put(prog); diff --git a/net/core/filter.c b/net/core/filter.c index a0112168d6f9..1afa17935954 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3777,25 +3777,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } -static bool -tc_cls_act_is_valid_access_analyzer(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - switch (off) { - case offsetof(struct sk_buff, len): - return true; - case offsetof(struct sk_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct sk_buff, cb) + - offsetof(struct bpf_skb_data_end, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; -} - static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -3830,21 +3811,6 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } -static bool xdp_is_valid_access_analyzer(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ - switch (off) { - case offsetof(struct xdp_buff, data): - info->reg_type = PTR_TO_PACKET; - return true; - case offsetof(struct xdp_buff, data_end): - info->reg_type = PTR_TO_PACKET_END; - return true; - } - return false; -} - void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; @@ -4516,10 +4482,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .gen_prologue = tc_cls_act_prologue, }; -const struct bpf_verifier_ops tc_cls_act_analyzer_ops = { - .is_valid_access = tc_cls_act_is_valid_access_analyzer, -}; - const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; @@ -4530,10 +4492,6 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .convert_ctx_access = xdp_convert_ctx_access, }; -const struct bpf_verifier_ops xdp_analyzer_ops = { - .is_valid_access = xdp_is_valid_access_analyzer, -}; - const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6cfdc7c84c48..b797832565d3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } +EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index de24d394c69e..dc5ad84ac096 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -921,7 +921,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ - + + nla_total_size(4) /* IFLA_IF_NETNSID */ + + 0; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -1269,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) *prog_id = generic_xdp_prog->aux->id; return XDP_ATTACHED_SKB; } - if (!ops->ndo_xdp) + if (!ops->ndo_bpf) return XDP_ATTACHED_NONE; - return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); + return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) @@ -1370,13 +1371,14 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, } static int rtnl_fill_link_netnsid(struct sk_buff *skb, - const struct net_device *dev) + const struct net_device *dev, + struct net *src_net) { if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); + int id = peernet2id_alloc(src_net, link_net); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; @@ -1427,10 +1429,11 @@ static int rtnl_fill_link_af(struct sk_buff *skb, return 0; } -static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, +static int rtnl_fill_ifinfo(struct sk_buff *skb, + struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event, int *new_nsid) + u32 event, int *new_nsid, int tgt_netnsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1448,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; + if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid)) + goto nla_put_failure; + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, @@ -1513,7 +1519,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev)) + if (rtnl_fill_link_netnsid(skb, dev, src_net)) goto nla_put_failure; if (new_nsid && @@ -1571,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_XDP] = { .type = NLA_NESTED }, [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, + [IFLA_IF_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1674,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev, return false; } +static struct net *get_target_net(struct sk_buff *skb, int netnsid) +{ + struct net *net; + + net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + if (!net) + return ERR_PTR(-EINVAL); + + /* For now, the caller is required to have CAP_NET_ADMIN in + * the user namespace owning the target net ns. + */ + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EACCES); + } + return net; +} + static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; int h, s_h; int idx = 0, s_idx; struct net_device *dev; @@ -1686,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) const struct rtnl_link_ops *kind_ops = NULL; unsigned int flags = NLM_F_MULTI; int master_idx = 0; + int netnsid = -1; int err; int hdrlen; @@ -1704,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) { + tgt_net = net; + netnsid = -1; + } + } + if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1719,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; - head = &net->dev_index_head[h]; + head = &tgt_net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) goto cont; if (idx < s_idx) goto cont; - err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + err = rtnl_fill_ifinfo(skb, dev, net, + RTM_NEWLINK, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0, NULL); + ext_filter_mask, 0, NULL, + netnsid); if (err < 0) { if (likely(skb->len)) @@ -1748,6 +1786,8 @@ out_err: cb->args[0] = h; cb->seq = net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -2360,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2454,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); @@ -2585,6 +2631,9 @@ replay: if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2818,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; struct ifinfomsg *ifm; char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; + int netnsid = -1; int err; u32 ext_filter_mask = 0; @@ -2830,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); + } + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + dev = __dev_get_by_name(tgt_net, ifname); else - return -EINVAL; + goto out; + err = -ENODEV; if (dev == NULL) - return -ENODEV; + goto out; + err = -ENOBUFS; nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) - return -ENOBUFS; + goto out; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); + err = rtnl_fill_ifinfo(nskb, dev, net, + RTM_NEWLINK, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, 0, 0, ext_filter_mask, + 0, NULL, netnsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); +out: + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -2948,8 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, - new_nsid); + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), + type, 0, 0, change, 0, 0, event, + new_nsid, -1); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 797d1156b4e6..283104e5ca6a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,65 +21,77 @@ #include "dsa_priv.h" -static LIST_HEAD(dsa_switch_trees); +static LIST_HEAD(dsa_tree_list); static DEFINE_MUTEX(dsa2_mutex); static const struct devlink_ops dsa_devlink_ops = { }; -static struct dsa_switch_tree *dsa_get_dst(u32 tree) +static struct dsa_switch_tree *dsa_tree_find(int index) { struct dsa_switch_tree *dst; - list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->tree == tree) { - kref_get(&dst->refcount); + list_for_each_entry(dst, &dsa_tree_list, list) + if (dst->index == index) return dst; - } + return NULL; } -static void dsa_free_dst(struct kref *ref) +static struct dsa_switch_tree *dsa_tree_alloc(int index) { - struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree, - refcount); + struct dsa_switch_tree *dst; - list_del(&dst->list); - kfree(dst); + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return NULL; + + dst->index = index; + + INIT_LIST_HEAD(&dst->list); + list_add_tail(&dsa_tree_list, &dst->list); + + /* Initialize the reference counter to the number of switches, not 1 */ + kref_init(&dst->refcount); + refcount_set(&dst->refcount.refcount, 0); + + return dst; } -static void dsa_put_dst(struct dsa_switch_tree *dst) +static void dsa_tree_free(struct dsa_switch_tree *dst) { - kref_put(&dst->refcount, dsa_free_dst); + list_del(&dst->list); + kfree(dst); } -static struct dsa_switch_tree *dsa_add_dst(u32 tree) +static struct dsa_switch_tree *dsa_tree_touch(int index) { struct dsa_switch_tree *dst; - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = dsa_tree_find(index); if (!dst) - return NULL; - dst->tree = tree; - INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); - kref_init(&dst->refcount); + dst = dsa_tree_alloc(index); return dst; } -static void dsa_dst_add_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) +static void dsa_tree_get(struct dsa_switch_tree *dst) { kref_get(&dst->refcount); - dst->ds[index] = ds; } -static void dsa_dst_del_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) +static void dsa_tree_release(struct kref *ref) { - dst->ds[index] = NULL; - kref_put(&dst->refcount, dsa_free_dst); + struct dsa_switch_tree *dst; + + dst = container_of(ref, struct dsa_switch_tree, refcount); + + dsa_tree_free(dst); +} + +static void dsa_tree_put(struct dsa_switch_tree *dst) +{ + kref_put(&dst->refcount, dsa_tree_release); } /* For platform data configurations, we need to have a valid name argument to @@ -454,20 +466,56 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dst->cpu_dp = NULL; - pr_info("DSA: tree %d unapplied\n", dst->tree); + pr_info("DSA: tree %d unapplied\n", dst->index); dst->applied = false; } -static int dsa_cpu_parse(struct dsa_port *port, u32 index, - struct dsa_switch_tree *dst, - struct dsa_switch *ds) +static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, + unsigned int index) { + dst->ds[index] = NULL; + dsa_tree_put(dst); +} + +static int dsa_tree_add_switch(struct dsa_switch_tree *dst, + struct dsa_switch *ds) +{ + unsigned int index = ds->index; + + if (dst->ds[index]) + return -EBUSY; + + dsa_tree_get(dst); + dst->ds[index] = ds; + + return 0; +} + +static int dsa_port_parse_user(struct dsa_port *dp, const char *name) +{ + if (!name) + name = "eth%d"; + + dp->type = DSA_PORT_TYPE_USER; + dp->name = name; + + return 0; +} + +static int dsa_port_parse_dsa(struct dsa_port *dp) +{ + dp->type = DSA_PORT_TYPE_DSA; + + return 0; +} + +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - if (!dst->cpu_dp) - dst->cpu_dp = port; - tag_protocol = ds->ops->get_tag_protocol(ds); tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(tag_ops)) { @@ -475,11 +523,21 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, return PTR_ERR(tag_ops); } - dst->cpu_dp->tag_ops = tag_ops; + dp->type = DSA_PORT_TYPE_CPU; + dp->rcv = tag_ops->rcv; + dp->tag_ops = tag_ops; + dp->master = master; + dp->dst = dst; + + return 0; +} - /* Make a few copies for faster access in master receive hot path */ - dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->cpu_dp->dst = dst; +static int dsa_cpu_parse(struct dsa_port *port, u32 index, + struct dsa_switch_tree *dst, + struct dsa_switch *ds) +{ + if (!dst->cpu_dp) + dst->cpu_dp = port; return 0; } @@ -504,7 +562,7 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) } - pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); + pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index); return 0; } @@ -549,7 +607,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) } } - pr_info("DSA: tree %d parsed\n", dst->tree); + pr_info("DSA: tree %d parsed\n", dst->index); return 0; } @@ -557,8 +615,10 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) { struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); - struct device_node *link = of_parse_phandle(dn, "link", 0); const char *name = of_get_property(dn, "label", NULL); + bool link = of_property_read_bool(dn, "link"); + + dp->dn = dn; if (ethernet) { struct net_device *master; @@ -567,24 +627,17 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (!master) return -EPROBE_DEFER; - dp->type = DSA_PORT_TYPE_CPU; - dp->master = master; - } else if (link) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - if (!name) - name = "eth%d"; - - dp->type = DSA_PORT_TYPE_USER; - dp->name = name; + return dsa_port_parse_cpu(dp, master); } - dp->dn = dn; + if (link) + return dsa_port_parse_dsa(dp); - return 0; + return dsa_port_parse_user(dp, name); } -static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) +static int dsa_switch_parse_ports_of(struct dsa_switch *ds, + struct device_node *dn) { struct device_node *ports, *port; struct dsa_port *dp; @@ -615,6 +668,39 @@ static int dsa_parse_ports_of(struct device_node *dn, struct dsa_switch *ds) return 0; } +static int dsa_switch_parse_member_of(struct dsa_switch *ds, + struct device_node *dn) +{ + u32 m[2] = { 0, 0 }; + int sz; + + /* Don't error out if this optional property isn't found */ + sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2); + if (sz < 0 && sz != -EINVAL) + return sz; + + ds->index = m[1]; + if (ds->index >= DSA_MAX_SWITCHES) + return -EINVAL; + + ds->dst = dsa_tree_touch(m[0]); + if (!ds->dst) + return -ENOMEM; + + return 0; +} + +static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) +{ + int err; + + err = dsa_switch_parse_member_of(ds, dn); + if (err) + return err; + + return dsa_switch_parse_ports_of(ds, dn); +} + static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { @@ -627,20 +713,17 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, dev_put(master); - dp->type = DSA_PORT_TYPE_CPU; - dp->master = master; - } else if (!strcmp(name, "dsa")) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - dp->type = DSA_PORT_TYPE_USER; + return dsa_port_parse_cpu(dp, master); } - dp->name = name; + if (!strcmp(name, "dsa")) + return dsa_port_parse_dsa(dp); - return 0; + return dsa_port_parse_user(dp, name); } -static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +static int dsa_switch_parse_ports(struct dsa_switch *ds, + struct dsa_chip_data *cd) { bool valid_name_found = false; struct dsa_port *dp; @@ -670,40 +753,19 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) return 0; } -static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) +static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) { - int err; - - *tree = *index = 0; - - err = of_property_read_u32_index(np, "dsa,member", 0, tree); - if (err) { - /* Does not exist, but it is optional */ - if (err == -EINVAL) - return 0; - return err; - } + ds->cd = cd; - err = of_property_read_u32_index(np, "dsa,member", 1, index); - if (err) - return err; - - if (*index >= DSA_MAX_SWITCHES) - return -EINVAL; - - return 0; -} - -static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) -{ - if (!pd) - return -ENODEV; - - /* We do not support complex trees with dsa_chip_data */ - *tree = 0; - *index = 0; + /* We don't support interconnected switches nor multiple trees via + * platform data, so this is the unique switch of the tree. + */ + ds->index = 0; + ds->dst = dsa_tree_touch(0); + if (!ds->dst) + return -ENOMEM; - return 0; + return dsa_switch_parse_ports(ds, cd); } static int _dsa_register_switch(struct dsa_switch *ds) @@ -711,58 +773,37 @@ static int _dsa_register_switch(struct dsa_switch *ds) struct dsa_chip_data *pdata = ds->dev->platform_data; struct device_node *np = ds->dev->of_node; struct dsa_switch_tree *dst; - u32 tree, index; + unsigned int index; int i, err; - if (np) { - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; + if (np) + err = dsa_switch_parse_of(ds, np); + else if (pdata) + err = dsa_switch_parse(ds, pdata); + else + err = -ENODEV; - err = dsa_parse_ports_of(np, ds); - if (err) - return err; - } else { - err = dsa_parse_member(pdata, &tree, &index); - if (err) - return err; - - err = dsa_parse_ports(pdata, ds); - if (err) - return err; - } - - dst = dsa_get_dst(tree); - if (!dst) { - dst = dsa_add_dst(tree); - if (!dst) - return -ENOMEM; - } - - if (dst->ds[index]) { - err = -EBUSY; - goto out; - } + if (err) + return err; - ds->dst = dst; - ds->index = index; - ds->cd = pdata; + index = ds->index; + dst = ds->dst; /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) ds->rtable[i] = DSA_RTABLE_NONE; - dsa_dst_add_ds(dst, ds, index); + err = dsa_tree_add_switch(dst, ds); + if (err) + return err; err = dsa_dst_complete(dst); if (err < 0) goto out_del_dst; - if (err == 1) { - /* Not all switches registered yet */ - err = 0; - goto out; - } + /* Not all switches registered yet */ + if (err == 1) + return 0; if (dst->applied) { pr_info("DSA: Disjoint trees?\n"); @@ -779,13 +820,10 @@ static int _dsa_register_switch(struct dsa_switch *ds) goto out_del_dst; } - dsa_put_dst(dst); return 0; out_del_dst: - dsa_dst_del_ds(dst, ds, ds->index); -out: - dsa_put_dst(dst); + dsa_tree_remove_switch(dst, index); return err; } @@ -827,10 +865,11 @@ EXPORT_SYMBOL_GPL(dsa_register_switch); static void _dsa_unregister_switch(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; + unsigned int index = ds->index; dsa_dst_unapply(dst); - dsa_dst_del_ds(dst, ds, ds->index); + dsa_tree_remove_switch(dst, index); } void dsa_unregister_switch(struct dsa_switch *ds) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9b75d0ac4092..814ced75a0cc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", - ds->dst->tree, ds->index); + ds->dst->index, ds->index); ds->slave_mii_bus->parent = ds->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7a0f316eb86..c4cb19ed4628 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -447,6 +447,7 @@ void tcp_init_sock(struct sock *sk) tcp_assign_congestion_control(sk); tp->tsoffset = 0; + tp->rack.reo_wnd_steps = 1; sk->sk_state = TCP_CLOSE; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8393b405ea98..0ada8bfc2ebd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -856,6 +856,7 @@ void tcp_disable_fack(struct tcp_sock *tp) static void tcp_dsack_seen(struct tcp_sock *tp) { tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; + tp->rack.dsack_seen = 1; } static void tcp_update_reordering(struct sock *sk, const int metric, @@ -2408,6 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS(sock_net(sk), mib_idx); + } else if (tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_persist--; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2427,6 +2430,8 @@ static bool tcp_try_undo_dsack(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { + tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH, + tp->rack.reo_wnd_persist + 1); DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); @@ -3644,6 +3649,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + tcp_rack_update_reo_wnd(sk, &rs); + if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); /* If needed, reset TLP/RTO timer; RACK may later override this. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3c65c1a3f944..4bb86580decd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -551,6 +551,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->syn_data_acked = 0; newtp->rack.mstamp = 0; newtp->rack.advanced = 0; + newtp->rack.reo_wnd_steps = 1; + newtp->rack.last_delivered = 0; + newtp->rack.reo_wnd_persist = 0; + newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b98b2f7f07e7..a9d917e4dad5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -381,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index ac3e9c6d3a3d..d3ea89020c69 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -44,6 +44,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); + u32 min_rtt = tcp_min_rtt(tp); struct sk_buff *skb, *n; u32 reo_wnd; @@ -54,8 +55,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) - reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); + if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); + reo_wnd = min(reo_wnd, tp->srtt_us >> 3); + } list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) { @@ -160,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk) if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) tcp_rearm_rto(sk); } + +/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. + * + * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * by srtt), since there is possibility that spurious retransmission was + * due to reordering delay longer than reo_wnd. + * + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) + * no. of successful recoveries (accounts for full DSACK-based loss + * recovery undo). After that, reset it to default (min_rtt/4). + * + * At max, reo_wnd is incremented only once per rtt. So that the new + * DSACK on which we are reacting, is due to the spurious retx (approx) + * after the reo_wnd has been updated last time. + * + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than + * absolute value to account for change in rtt. + */ +void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + !rs->prior_delivered) + return; + + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ + if (before(rs->prior_delivered, tp->rack.last_delivered)) + tp->rack.dsack_seen = 0; + + /* Adjust the reo_wnd if update is pending */ + if (tp->rack.dsack_seen) { + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, + tp->rack.reo_wnd_steps + 1); + tp->rack.dsack_seen = 0; + tp->rack.last_delivered = tp->delivered; + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; + } else if (!tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_steps = 1; + } +} diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 035a1ef1f2d8..16df6dd44b98 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -107,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { - struct net *net = sock_net(sk); + const struct net *net = sock_net(sk); + int mss; /* Black hole detection */ - if (net->ipv4.sysctl_tcp_mtu_probing) { - if (!icsk->icsk_mtup.enabled) { - icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } else { - struct net *net = sock_net(sk); - struct tcp_sock *tp = tcp_sk(sk); - int mss; - - mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tp->tcp_header_len); - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } + if (!net->ipv4.sysctl_tcp_mtu_probing) + return; + + if (!icsk->icsk_mtup.enabled) { + icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; + } else { + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index c3aec6227c91..4d38ac044cee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1848,7 +1848,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 portid, u32 seq, u32 flags, u8 cmd) + struct net *net, u32 portid, u32 seq, + u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -1864,9 +1865,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, - ovs_vport_name(vport))) + ovs_vport_name(vport)) || + nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; + if (!net_eq(net, dev_net(vport->dev))) { + int id = peernet2id_alloc(net, dev_net(vport->dev)); + + if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) + goto nla_put_failure; + } + ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats, @@ -1896,8 +1905,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void) } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, - u32 seq, u8 cmd) +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -1906,7 +1915,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; @@ -1920,6 +1929,8 @@ static struct vport *lookup_vport(struct net *net, struct datapath *dp; struct vport *vport; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return ERR_PTR(-EOPNOTSUPP); if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) @@ -1944,6 +1955,7 @@ static struct vport *lookup_vport(struct net *net, return vport; } else return ERR_PTR(-EINVAL); + } /* Called with ovs_mutex */ @@ -1983,6 +1995,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) return -EINVAL; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return -EOPNOTSUPP; port_no = a[OVS_VPORT_ATTR_PORT_NO] ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; @@ -2032,8 +2046,9 @@ restart: goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) update_headroom(dp); @@ -2090,8 +2105,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); ovs_unlock(); @@ -2128,8 +2144,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_DEL); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_DEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2169,8 +2186,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); rcu_read_unlock(); @@ -2202,6 +2220,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, + sock_net(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -2228,6 +2247,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, + [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, }; static const struct genl_ops dp_vport_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 480600649d0b..4a104ef9e12c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -200,8 +200,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, uint32_t cutlen); const char *ovs_dp_name(const struct datapath *dp); -struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, - u8 cmd); +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *, struct sw_flow_key *); diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 653d073bae45..f3ee2f2825c0 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport) struct datapath *dp; dp = vport->dp; - notify = ovs_vport_cmd_build_info(vport, 0, 0, - OVS_VPORT_CMD_DEL); + notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp), + 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bc3edde1b9d7..dc9bd9a0070b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -374,7 +374,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) } static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, - const struct tcf_proto *tp) + u32 gen_flags, const struct tcf_proto *tp) { struct bpf_prog *fp; char *name = NULL; @@ -382,7 +382,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, + qdisc_dev(tp->q)); + else + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -440,7 +444,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : - cls_bpf_prog_from_efd(tb, prog, tp); + cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); if (ret < 0) return ret; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index f0288269dae8..aa7017098b2a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -50,6 +50,37 @@ #include "main.h" +void p_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "error"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + +void p_info(const char *fmt, ...) +{ + va_list ap; + + if (json_output) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + static bool is_bpffs(char *path) { struct statfs st_fs; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d315d01be645..ff5ad05b137b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -71,6 +71,9 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; +void p_err(const char *fmt, ...); +void p_info(const char *fmt, ...); + bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __attribute__((noreturn)); @@ -97,35 +100,4 @@ int prog_parse_fd(int *argc, char ***argv); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); void print_hex_data_json(uint8_t *data, size_t len); -static inline void p_err(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - if (json_output) { - jsonw_start_object(json_wtr); - jsonw_name(json_wtr, "error"); - jsonw_vprintf_enquote(json_wtr, fmt, ap); - jsonw_end_object(json_wtr); - } else { - fprintf(stderr, "Error: "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - } - va_end(ap); -} - -static inline void p_info(const char *fmt, ...) -{ - va_list ap; - - if (json_output) - return; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - va_end(ap); -} - #endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 250f80fd46aa..d3ab808dc882 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,6 +41,7 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <net/if.h> #include <sys/types.h> #include <sys/stat.h> @@ -229,6 +230,21 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + jsonw_name(json_wtr, "dev"); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + jsonw_printf(json_wtr, "\"ifindex:%d\"", + info->ifindex); + else + jsonw_printf(json_wtr, "\"%s\"", name); + } else { + jsonw_printf(json_wtr, "\"unknown\""); + } + } + if (info->load_time) { char buf[32]; @@ -274,6 +290,21 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + printf(" "); + + if (info->status & BPF_PROG_STATUS_DEV_BOUND) { + printf("dev "); + if (info->ifindex) { + char name[IF_NAMESIZE]; + + if (!if_indextoname(info->ifindex, name)) + printf("ifindex:%d ", info->ifindex); + else + printf("%s ", name); + } else { + printf("unknown "); + } + } printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7cebba491011..e92f62cf933a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -259,6 +259,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -893,6 +894,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -906,6 +911,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { |