diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
168 files changed, 24218 insertions, 4586 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 87e90b5d4d7d..5b11557f1ae4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -210,7 +210,7 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP && !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { - err = mlx4_restart_one(persist->pdev, false, NULL); + err = mlx4_restart_one(persist->pdev); mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); } diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c index 88316c743820..64ed725aec28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -99,8 +99,7 @@ static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, readl(cr_space + offset); err = devlink_region_snapshot_create(crdump->region_crspace, - cr_res_size, crspace_data, - id, &kvfree); + crspace_data, id, &kvfree); if (err) { kvfree(crspace_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -139,9 +138,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, readl(health_buf_start + offset); err = devlink_region_snapshot_create(crdump->region_fw_health, - HEALTH_BUFFER_SIZE, - health_data, - id, &kvfree); + health_data, id, &kvfree); if (err) { kvfree(health_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -185,7 +182,7 @@ int mlx4_crdump_collect(struct mlx4_dev *dev) crdump_enable_crspace_access(dev, cr_space); /* Get the available snapshot ID for the dumps */ - id = devlink_region_shapshot_id_get(devlink); + id = devlink_region_snapshot_id_get(devlink); /* Try to capture dumps */ mlx4_crdump_collect_crspace(dev, cr_space, id); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 94c59939a8cf..8bf1f08fdee2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -611,7 +611,7 @@ static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg) } #define MLX4_LINK_MODES_SZ \ - (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8) + (sizeof_field(struct mlx4_ptys_reg, eth_proto_cap) * 8) enum ethtool_report { SUPPORTED = 0, @@ -639,7 +639,7 @@ static unsigned long *ptys2ethtool_link_mode(struct ptys2ethtool_config *cfg, #define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ ({ \ struct ptys2ethtool_config *cfg; \ - const unsigned int modes[] = { __VA_ARGS__ }; \ + static const unsigned int modes[] = { __VA_ARGS__ }; \ unsigned int i; \ cfg = &ptys2ethtool_map[reg_]; \ cfg->speed = speed_; \ @@ -1745,6 +1745,7 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); break; case ETHTOOL_GRXCLSRLALL: + cmd->data = MAX_NUM_OF_FS_RULES; while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { err = mlx4_en_get_flow(dev, cmd, i); if (!err) @@ -1811,6 +1812,7 @@ static int mlx4_en_set_channels(struct net_device *dev, struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_port_profile new_prof; struct mlx4_en_priv *tmp; + int total_tx_count; int port_up = 0; int xdp_count; int err = 0; @@ -1825,13 +1827,12 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; - if (channel->tx_count * priv->prof->num_up + xdp_count > - priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) { + total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count; + if (total_tx_count > MAX_TX_RINGS) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", - channel->tx_count * priv->prof->num_up + xdp_count, - MAX_TX_RINGS); + total_tx_count, MAX_TX_RINGS); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c1438ae52a11..43dcbd8214c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -91,6 +91,7 @@ int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_port_profile new_prof; struct mlx4_en_priv *tmp; + int total_count; int port_up = 0; int err = 0; @@ -104,6 +105,14 @@ int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc) MLX4_EN_NUM_UP_HIGH; new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up * new_prof.num_up; + total_count = new_prof.tx_ring_num[TX] + new_prof.tx_ring_num[TX_XDP]; + if (total_count > MAX_TX_RINGS) { + err = -EINVAL; + en_err(priv, + "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", + total_count, MAX_TX_RINGS); + goto out; + } err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; @@ -1354,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) } } -static void mlx4_en_tx_timeout(struct net_device *dev) +static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int i; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue]; if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); - for (i = 0; i < priv->tx_ring_num[TX]; i++) { - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i]; - - if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) - continue; - en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", - i, tx_ring->qpn, tx_ring->sp_cqn, - tx_ring->cons, tx_ring->prod); - } + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + txqueue, tx_ring->qpn, tx_ring->sp_cqn, + tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Scheduling watchdog\n"); @@ -2286,11 +2289,7 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, lockdep_is_held(&priv->mdev->state_lock)); if (xdp_prog && carry_xdp_prog) { - xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num); - if (IS_ERR(xdp_prog)) { - mlx4_en_free_resources(tmp); - return PTR_ERR(xdp_prog); - } + bpf_prog_add(xdp_prog, tmp->rx_ring_num); for (i = 0; i < tmp->rx_ring_num; i++) rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog, xdp_prog); @@ -2645,14 +2644,6 @@ out: en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); return; } - - /* set offloads */ - priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2660,14 +2651,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) int ret; struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, vxlan_del_task); - /* unset offloads */ - priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL); - ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); if (ret) @@ -2798,11 +2781,9 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) * program for a new one. */ if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) { - if (prog) { - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); - if (IS_ERR(prog)) - return PTR_ERR(prog); - } + if (prog) + bpf_prog_add(prog, priv->rx_ring_num - 1); + mutex_lock(&mdev->state_lock); for (i = 0; i < priv->rx_ring_num; i++) { old_prog = rcu_dereference_protected( @@ -2823,13 +2804,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (!tmp) return -ENOMEM; - if (prog) { - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); - if (IS_ERR(prog)) { - err = PTR_ERR(prog); - goto out; - } - } + if (prog) + bpf_prog_add(prog, priv->rx_ring_num - 1); mutex_lock(&mdev->state_lock); memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); @@ -2878,7 +2854,6 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) unlock_out: mutex_unlock(&mdev->state_lock); -out: kfree(tmp); return err; } @@ -3415,6 +3390,23 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->LSO_support) dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + if (mdev->dev->caps.tunnel_offload_mode == + MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + } + dev->vlan_features = dev->hw_features; dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH; @@ -3483,16 +3475,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } - if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; - dev->features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; - dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; - } - /* MTU range: 68 - hw-specific max */ dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 36a92b19e613..4d5ca302c067 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -772,9 +772,7 @@ static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, /* Map fragments if any */ for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { - const struct skb_frag_struct *frag; - - frag = &shinfo->frags[i_frag]; + const skb_frag_t *frag = &shinfo->frags[i_frag]; byte_count = skb_frag_size(frag); dma = skb_frag_dma_map(ddev, frag, 0, byte_count, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1f6e16d5ea6b..5716c3d2bb86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -514,8 +514,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a - * spare CQE so the HCA HW can tell the difference between an - * empty CQ and a full CQ. + * spare CQE to enable resizing the CQ. */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; @@ -2240,7 +2239,7 @@ static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_dev_port(dev, i, &port_cap)) { mlx4_err(dev, - "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n"); + "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n"); } else if ((dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_DEFAULT) && (port_cap.dmfs_optimized_state == @@ -2292,23 +2291,31 @@ static int mlx4_init_fw(struct mlx4_dev *dev) static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_init_hca_param *init_hca = NULL; + struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; - struct mlx4_dev_cap dev_cap; struct mlx4_profile profile; - struct mlx4_init_hca_param init_hca; u64 icm_size; struct mlx4_config_dev_params params; int err; if (!mlx4_is_slave(dev)) { - err = mlx4_dev_cap(dev, &dev_cap); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL); + + if (!dev_cap || !init_hca) { + err = -ENOMEM; + goto out_free; + } + + err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - return err; + goto out_free; } - choose_steering_mode(dev, &dev_cap); - choose_tunnel_offload_mode(dev, &dev_cap); + choose_steering_mode(dev, dev_cap); + choose_tunnel_offload_mode(dev, dev_cap); if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC && mlx4_is_master(dev)) @@ -2331,48 +2338,48 @@ static int mlx4_init_hca(struct mlx4_dev *dev) MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; - icm_size = mlx4_make_profile(dev, &profile, &dev_cap, - &init_hca); + icm_size = mlx4_make_profile(dev, &profile, dev_cap, + init_hca); if ((long long) icm_size < 0) { err = icm_size; - return err; + goto out_free; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; if (enable_4k_uar || !dev->persist->num_vfs) { - init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + + init_hca->log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; - init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; + init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; } else { - init_hca.log_uar_sz = ilog2(dev->caps.num_uars); - init_hca.uar_page_sz = PAGE_SHIFT - 12; + init_hca->log_uar_sz = ilog2(dev->caps.num_uars); + init_hca->uar_page_sz = PAGE_SHIFT - 12; } - init_hca.mw_enabled = 0; + init_hca->mw_enabled = 0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) - init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; + init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE; - err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); + err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size); if (err) - return err; + goto out_free; - err = mlx4_INIT_HCA(dev, &init_hca); + err = mlx4_INIT_HCA(dev, init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting\n"); goto err_free_icm; } - if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { - err = mlx4_query_func(dev, &dev_cap); + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, dev_cap); if (err < 0) { mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); goto err_close; } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { - dev->caps.num_eqs = dev_cap.max_eqs; - dev->caps.reserved_eqs = dev_cap.reserved_eqs; - dev->caps.reserved_uars = dev_cap.reserved_uars; + dev->caps.num_eqs = dev_cap->max_eqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_uars = dev_cap->reserved_uars; } } @@ -2381,14 +2388,13 @@ static int mlx4_init_hca(struct mlx4_dev *dev) * read HCA frequency by QUERY_HCA command */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { - memset(&init_hca, 0, sizeof(init_hca)); - err = mlx4_QUERY_HCA(dev, &init_hca); + err = mlx4_QUERY_HCA(dev, init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n"); dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; } else { dev->caps.hca_core_clock = - init_hca.hca_core_clock; + init_hca->hca_core_clock; } /* In case we got HCA frequency 0 - disable timestamping @@ -2464,7 +2470,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); - return 0; + err = 0; + goto out_free; unmap_bf: unmap_internal_clock(dev); @@ -2483,6 +2490,10 @@ err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); +out_free: + kfree(dev_cap); + kfree(init_hca); + return err; } @@ -3919,26 +3930,47 @@ static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink) } } -static int mlx4_devlink_reload(struct devlink *devlink, - struct netlink_ext_ack *extack) +static void mlx4_restart_one_down(struct pci_dev *pdev); +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink); + +static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change, + struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; struct mlx4_dev_persistent *persist = dev->persist; - int err; + if (netns_change) { + NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported"); + return -EOPNOTSUPP; + } if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); - err = mlx4_restart_one(persist->pdev, true, devlink); + mlx4_restart_one_down(persist->pdev); + return 0; +} + +static int mlx4_devlink_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_dev_persistent *persist = dev->persist; + int err; + + err = mlx4_restart_one_up(persist->pdev, true, devlink); if (err) - mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err); + mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", + err); return err; } static const struct devlink_ops mlx4_devlink_ops = { .port_type_set = mlx4_devlink_port_type_set, - .reload = mlx4_devlink_reload, + .reload_down = mlx4_devlink_reload_down, + .reload_up = mlx4_devlink_reload_up, }; static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) @@ -3982,6 +4014,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto err_params_unregister; devlink_params_publish(devlink); + devlink_reload_enable(devlink); pci_save_state(pdev); return 0; @@ -4093,6 +4126,8 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + devlink_reload_disable(devlink); + if (mlx4_is_slave(dev)) persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; @@ -4151,7 +4186,13 @@ static int restore_current_port_types(struct mlx4_dev *dev, return err; } -int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) +static void mlx4_restart_one_down(struct pci_dev *pdev) +{ + mlx4_unload_one(pdev); +} + +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; @@ -4163,7 +4204,6 @@ int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) total_vfs = dev->persist->num_vfs; memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); - mlx4_unload_one(pdev); if (reload) mlx4_devlink_param_load_driverinit_values(devlink); err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); @@ -4182,6 +4222,12 @@ int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) return err; } +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_restart_one_down(pdev); + return mlx4_restart_one_up(pdev, false, NULL); +} + #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT } #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF } #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 23f1b5b512c2..527b52e48276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1043,8 +1043,7 @@ int mlx4_catas_init(struct mlx4_dev *dev); void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_crdump_init(struct mlx4_dev *dev); void mlx4_crdump_end(struct mlx4_dev *dev); -int mlx4_restart_one(struct pci_dev *pdev, bool reload, - struct devlink *devlink); +int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4356f3a58002..1187ef1375e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev) priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } -static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) +static int +mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + int vf) { - /* reduce the sink counter */ - return (dev->caps.max_counters - 1 - - (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) - / MLX4_MAX_PORTS; + struct mlx4_active_ports actv_ports; + int ports, counters_guaranteed; + + /* For master, only allocate according to the number of phys ports */ + if (vf == mlx4_master_func_num(dev)) + return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; + + /* calculate real number of ports for the VF */ + actv_ports = mlx4_get_active_ports(dev, vf); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; + + /* If we do not have enough counters for this VF, do not + * allocate any for it. '-1' to reduce the sink counter. + */ + if ((res_alloc->res_reserved + counters_guaranteed) > + (dev->caps.max_counters - 1)) + return 0; + + return counters_guaranteed; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) @@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; - int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); priv->mfunc.master.res_tracker.slave_list = kcalloc(dev->num_slaves, sizeof(struct slave_list), @@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; - if (t == mlx4_master_func_num(dev)) - res_alloc->guaranteed[t] = - MLX4_PF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else if (t <= max_vfs_guarantee_counter) - res_alloc->guaranteed[t] = - MLX4_VF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else - res_alloc->guaranteed[t] = 0; + res_alloc->guaranteed[t] = + mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 37fef8cd25e3..a1f20b205299 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -10,6 +10,7 @@ config MLX5_CORE imply PTP_1588_CLOCK imply VXLAN imply MLXFW + imply PCI_HYPERV_INTERFACE default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -19,20 +20,19 @@ config MLX5_ACCEL bool config MLX5_FPGA - bool "Mellanox Technologies Innova support" - depends on MLX5_CORE + bool "Mellanox Technologies Innova support" + depends on MLX5_CORE select MLX5_ACCEL - ---help--- - Build support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. + ---help--- + Build support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. config MLX5_CORE_EN bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE - depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL select DIMLIB default n @@ -58,14 +58,14 @@ config MLX5_EN_RXNFC API. config MLX5_MPFS - bool "Mellanox Technologies MLX5 MPFS support" - depends on MLX5_CORE_EN + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN default y - ---help--- + ---help--- Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) - support in ConnectX NIC. MPFs is required for when multi-PF configuration - is enabled to allow passing user configured unicast MAC addresses to the - requesting PF. + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. config MLX5_ESWITCH bool "Mellanox Technologies MLX5 SRIOV E-Switch support" @@ -73,10 +73,10 @@ config MLX5_ESWITCH default y ---help--- Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. - E-Switch provides internal SRIOV packet steering and switching for the - enabled VFs and PF in two available modes: - Legacy SRIOV mode (L2 mac vlan steering based). - Switchdev mode (eswitch offloads). + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" @@ -154,3 +154,10 @@ config MLX5_EN_TLS Build support for TLS cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. + +config MLX5_SW_STEERING + bool "Mellanox Technologies software-managed steering" + depends on MLX5_CORE_EN && MLX5_ESWITCH + default y + help + Build support for software-managed steering in the NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 57d2cc666fe3..d3e06cec8317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o # @@ -23,8 +23,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ - en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o + en_selftest.o en/port.o en/monitor_stats.o en/health.o \ + en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \ + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra @@ -34,16 +35,18 @@ mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ - en/tc_tun_geneve.o + en/tc_tun_geneve.o diag/en_tc_tracepoint.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o + ecpf.o rdma.o eswitch_offloads_chains.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o # # Ipoib netdev @@ -64,3 +67,10 @@ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ en_accel/ktls.o en_accel/ktls_tx.o + +mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ + steering/dr_matcher.o steering/dr_rule.o \ + steering/dr_icm_pool.o \ + steering/dr_ste.o steering/dr_send.o \ + steering/dr_cmd.o steering/dr_fw.o \ + steering/dr_action.o steering/fs_dr.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index d787bc0a4155..e09bc3858d57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -45,7 +45,7 @@ void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, tls)) + if (!MLX5_CAP_GEN(mdev, tls_tx)) return false; if (!MLX5_CAP_GEN(mdev, log_max_dek)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 549f962cd86e..42198e64a7f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,8 +71,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } -int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_frag_buf *buf, int node) +static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) { dma_addr_t t; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 8cdd7e66f8df..34cba97f7bf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -446,6 +446,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_UMEM: case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -637,6 +639,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); default: return "unknown command opcode"; } } @@ -862,7 +866,7 @@ static void cmd_work_handler(struct work_struct *work) if (!ent->page_queue) { alloc_ret = alloc_ent(cmd); if (alloc_ret < 0) { - mlx5_core_err(dev, "failed to allocate command entry\n"); + mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { ent->callback(-EAGAIN, ent->context); mlx5_free_cmd_msg(dev, ent->out); @@ -1368,49 +1372,19 @@ static void clean_debug_files(struct mlx5_core_dev *dev) debugfs_remove_recursive(dbg->dbg_root); } -static int create_debugfs_files(struct mlx5_core_dev *dev) +static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - int err = -ENOMEM; - - if (!mlx5_debugfs_root) - return 0; dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); - if (!dbg->dbg_root) - return err; - - dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_in) - goto err_dbg; - dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_out) - goto err_dbg; - - dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, - dev, &olfops); - if (!dbg->dbg_outlen) - goto err_dbg; - - dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, - &dbg->status); - if (!dbg->dbg_status) - goto err_dbg; - - dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); - if (!dbg->dbg_run) - goto err_dbg; + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); mlx5_cmdif_debugfs_init(dev); - - return 0; - -err_dbg: - clean_debug_files(dev); - return err; } static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) @@ -2007,17 +1981,10 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_cache; } - err = create_debugfs_files(dev); - if (err) { - err = -ENOMEM; - goto err_wq; - } + create_debugfs_files(dev); return 0; -err_wq: - destroy_workqueue(cmd->wq); - err_cache: destroy_msg_cache(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index a11e22d0b0cc..04854e5fbcd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -92,8 +92,6 @@ EXPORT_SYMBOL(mlx5_debugfs_root); void mlx5_register_debugfs(void) { mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); - if (IS_ERR_OR_NULL(mlx5_debugfs_root)) - mlx5_debugfs_root = NULL; } void mlx5_unregister_debugfs(void) @@ -101,45 +99,25 @@ void mlx5_unregister_debugfs(void) debugfs_remove(mlx5_debugfs_root); } -int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - atomic_set(&dev->num_qps, 0); dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); - if (!dev->priv.qp_debugfs) - return -ENOMEM; - - return 0; } void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.qp_debugfs); } -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); - if (!dev->priv.eq_debugfs) - return -ENOMEM; - - return 0; } void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.eq_debugfs); } @@ -183,85 +161,41 @@ static const struct file_operations stats_fops = { .write = average_write, }; -int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) { struct mlx5_cmd_stats *stats; struct dentry **cmd; const char *namep; - int err; int i; - if (!mlx5_debugfs_root) - return 0; - cmd = &dev->priv.cmdif_debugfs; *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); - if (!*cmd) - return -ENOMEM; for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { stats = &dev->cmd.stats[i]; namep = mlx5_command_str(i); if (strcmp(namep, "unknown command opcode")) { stats->root = debugfs_create_dir(namep, *cmd); - if (!stats->root) { - mlx5_core_warn(dev, "failed adding command %d\n", - i); - err = -ENOMEM; - goto out; - } - - stats->avg = debugfs_create_file("average", 0400, - stats->root, stats, - &stats_fops); - if (!stats->avg) { - mlx5_core_warn(dev, "failed creating debugfs file\n"); - err = -ENOMEM; - goto out; - } - - stats->count = debugfs_create_u64("n", 0400, - stats->root, - &stats->n); - if (!stats->count) { - mlx5_core_warn(dev, "failed creating debugfs file\n"); - err = -ENOMEM; - goto out; - } + + debugfs_create_file("average", 0400, stats->root, stats, + &stats_fops); + debugfs_create_u64("n", 0400, stats->root, &stats->n); } } - - return 0; -out: - debugfs_remove_recursive(dev->priv.cmdif_debugfs); - return err; } void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.cmdif_debugfs); } -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); - if (!dev->priv.cq_debugfs) - return -ENOMEM; - - return 0; } void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.cq_debugfs); } @@ -484,7 +418,6 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, { struct mlx5_rsc_debug *d; char resn[32]; - int err; int i; d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL); @@ -496,30 +429,15 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, d->type = type; sprintf(resn, "0x%x", rsn); d->root = debugfs_create_dir(resn, root); - if (!d->root) { - err = -ENOMEM; - goto out_free; - } for (i = 0; i < nfile; i++) { d->fields[i].i = i; - d->fields[i].dent = debugfs_create_file(field[i], 0400, - d->root, &d->fields[i], - &fops); - if (!d->fields[i].dent) { - err = -ENOMEM; - goto out_rem; - } + debugfs_create_file(field[i], 0400, d->root, &d->fields[i], + &fops); } *dbg = d; return 0; -out_rem: - debugfs_remove_recursive(d->root); - -out_free: - kfree(d); - return err; } static void rem_res_tree(struct mlx5_rsc_debug *d) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a400f4430c28..ac108f1e5bd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -4,6 +4,7 @@ #include <devlink.h> #include "mlx5_core.h" +#include "fs_core.h" #include "eswitch.h" static int mlx5_devlink_flash_update(struct devlink *devlink, @@ -84,6 +85,22 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, return 0; } +static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_unload_one(dev, false); +} + +static int mlx5_devlink_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_load_one(dev, false); +} + static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -95,6 +112,8 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, + .reload_down = mlx5_devlink_reload_down, + .reload_up = mlx5_devlink_reload_up, }; struct devlink *mlx5_devlink_alloc(void) @@ -107,12 +126,145 @@ void mlx5_devlink_free(struct devlink *devlink) devlink_free(devlink); } +static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offloads enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, +}; + +static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct devlink_param mlx5_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, + mlx5_devlink_fs_mode_validate), + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_roce_validate), +}; + +static void mlx5_devlink_set_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS) + strcpy(value.vstr, "dmfs"); + else + strcpy(value.vstr, "smfs"); + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + value); + + value.vbool = MLX5_CAP_GEN(dev, roce); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); +} + int mlx5_devlink_register(struct devlink *devlink, struct device *dev) { - return devlink_register(devlink, dev); + int err; + + err = devlink_register(devlink, dev); + if (err) + return err; + + err = devlink_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + if (err) + goto params_reg_err; + mlx5_devlink_set_params_init_values(devlink); + devlink_params_publish(devlink); + devlink_reload_enable(devlink); + return 0; + +params_reg_err: + devlink_unregister(devlink); + return err; } void mlx5_devlink_unregister(struct devlink *devlink) { + devlink_reload_disable(devlink); + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h new file mode 100644 index 000000000000..1177860a2ee4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_EN_REP_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "en_rep.h" + +TRACE_EVENT(mlx5e_rep_neigh_update, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, + bool neigh_connected), + TP_ARGS(nhe, ha, neigh_connected), + TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + __array(u8, ha, ETH_ALEN) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_connected) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, mn->dev->name); + __entry->neigh_connected = neigh_connected; + memcpy(__entry->ha, ha, ETH_ALEN); + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n", + __get_str(devname), __entry->ha, + __entry->v4, __entry->v6, __entry->neigh_connected + ) +); + +#endif /* _MLX5_EN_REP_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_rep_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c new file mode 100644 index 000000000000..c5dc6c50fa87 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#define CREATE_TRACE_POINTS +#include "en_tc_tracepoint.h" + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + ids[i] = entries[i].id; +} + +#define NAME_SIZE 16 + +static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = { + [FLOW_ACTION_ACCEPT] = "ACCEPT", + [FLOW_ACTION_DROP] = "DROP", + [FLOW_ACTION_TRAP] = "TRAP", + [FLOW_ACTION_GOTO] = "GOTO", + [FLOW_ACTION_REDIRECT] = "REDIRECT", + [FLOW_ACTION_MIRRED] = "MIRRED", + [FLOW_ACTION_VLAN_PUSH] = "VLAN_PUSH", + [FLOW_ACTION_VLAN_POP] = "VLAN_POP", + [FLOW_ACTION_VLAN_MANGLE] = "VLAN_MANGLE", + [FLOW_ACTION_TUNNEL_ENCAP] = "TUNNEL_ENCAP", + [FLOW_ACTION_TUNNEL_DECAP] = "TUNNEL_DECAP", + [FLOW_ACTION_MANGLE] = "MANGLE", + [FLOW_ACTION_ADD] = "ADD", + [FLOW_ACTION_CSUM] = "CSUM", + [FLOW_ACTION_MARK] = "MARK", + [FLOW_ACTION_WAKE] = "WAKE", + [FLOW_ACTION_QUEUE] = "QUEUE", + [FLOW_ACTION_SAMPLE] = "SAMPLE", + [FLOW_ACTION_POLICE] = "POLICE", + [FLOW_ACTION_CT] = "CT", +}; + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num) +{ + const char *ret = trace_seq_buffer_ptr(p); + unsigned int i; + + for (i = 0; i < num; i++) { + if (ids[i] < NUM_FLOW_ACTIONS) + trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]); + else + trace_seq_printf(p, "UNKNOWN "); + } + + trace_seq_putc(p, 0); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h new file mode 100644 index 000000000000..d4e6cfaaade3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_TC_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include <net/flow_offload.h> +#include "en_rep.h" + +#define __parse_action(ids, num) parse_action(p, ids, num) + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num); + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num); + +DECLARE_EVENT_CLASS(mlx5e_flower_template, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(unsigned int, num) + __dynamic_array(int, ids, f->rule ? + f->rule->action.num_entries : 0) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->num = (f->rule ? + f->rule->action.num_entries : 0); + if (__entry->num) + put_ids_to_array(__get_dynamic_array(ids), + f->rule->action.entries, + f->rule->action.num_entries); + ), + TP_printk("cookie=%p actions= %s\n", + __entry->cookie, __entry->num ? + __parse_action(__get_dynamic_array(ids), + __entry->num) : "NULL" + ) +); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +TRACE_EVENT(mlx5e_stats_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(u64, bytes) + __field(u64, packets) + __field(u64, lastused) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->bytes = f->stats.bytes; + __entry->packets = f->stats.pkts; + __entry->lastused = f->stats.lastused; + ), + TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n", + __entry->cookie, __entry->bytes, + __entry->packets, __entry->lastused + ) +); + +TRACE_EVENT(mlx5e_tc_update_neigh_used_value, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), + TP_ARGS(nhe, neigh_used), + TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_used) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, mn->dev->name); + __entry->neigh_used = neigh_used; + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n", + __get_str(devname), __entry->v4, __entry->v6, + __entry->neigh_used + ) +); + +#endif /* _MLX5_TC_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_tc_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 8a4930c8bf62..94d7b69a95c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -546,16 +546,17 @@ static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, trace_data->timestamp = timestamp; trace_data->lost = lost; trace_data->event_id = event_id; - strncpy(trace_data->msg, msg, TRACE_STR_MSG); + strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG); tracer->st_arr.saved_traces_index = (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); mutex_unlock(&tracer->st_arr.lock); } -static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, - struct mlx5_core_dev *dev, - u64 trace_timestamp) +static noinline +void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, + struct mlx5_core_dev *dev, + u64 trace_timestamp) { char tmp[512]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 65bec19a438f..220ef9f06f84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -54,6 +54,7 @@ #include "mlx5_core.h" #include "en_stats.h" #include "en/fs.h" +#include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -134,7 +135,7 @@ struct page_pool; #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MIN_NUM_CHANNELS 0x1 -#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_CHANNELS MLX5E_INDIR_RQT_SIZE #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 @@ -162,6 +163,14 @@ enum mlx5e_rq_group { #define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g) }; +static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) +{ + if (mlx5_lag_is_lacp_owner(mdev)) + return 1; + + return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS); +} + static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { switch (wq_type) { @@ -300,6 +309,7 @@ struct mlx5e_dcbx_dp { enum { MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_RECOVERING, MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ @@ -335,7 +345,7 @@ struct mlx5e_tx_wqe_info { u8 num_wqebbs; u8 num_dma; #ifdef CONFIG_MLX5_EN_TLS - skb_frag_t *resync_dump_frag; + struct page *resync_dump_frag_page; #endif }; @@ -356,6 +366,7 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, }; struct mlx5e_sq_wqe_info { @@ -399,6 +410,7 @@ struct mlx5e_txqsq { struct device *pdev; __be32 mkey_be; unsigned long state; + unsigned int hw_mtu; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; @@ -480,8 +492,6 @@ struct mlx5e_xdp_mpwqe { struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; }; @@ -552,6 +562,8 @@ struct mlx5e_icosq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_wqe_frag_info { @@ -671,6 +683,8 @@ struct mlx5e_rq { struct zero_copy_allocator zca; struct xdp_umem *umem; + struct work_struct recover_work; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -700,6 +714,7 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; + u8 lag_port; /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; @@ -745,7 +760,7 @@ enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, - MLX5E_STATE_XDP_OPEN, + MLX5E_STATE_XDP_ACTIVE, }; struct mlx5e_rqt { @@ -778,6 +793,15 @@ struct mlx5e_modify_sq_param { int rl_index; }; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) +struct mlx5e_hv_vhca_stats_agent { + struct mlx5_hv_vhca_agent *agent; + struct delayed_work work; + u16 delay; + void *buf; +}; +#endif + struct mlx5e_xsk { /* UMEMs are stored separately from channels, because we don't want to * lose them when channels are recreated. The kernel also stores UMEMs, @@ -792,7 +816,7 @@ struct mlx5e_xsk { struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; - int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; #endif @@ -804,7 +828,7 @@ struct mlx5e_priv { struct mlx5e_rq drop_rq; struct mlx5e_channels channels; - u32 tisn[MLX5E_MAX_NUM_TC]; + u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; @@ -847,7 +871,11 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct devlink_health_reporter *rx_reporter; struct mlx5e_xsk xsk; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + struct mlx5e_hv_vhca_stats_agent stats_agent; +#endif }; struct mlx5e_profile { @@ -864,6 +892,8 @@ struct mlx5e_profile { int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); + unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); + mlx5e_stats_grp_t *stats_grps; struct { mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; @@ -888,6 +918,26 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); @@ -916,7 +966,6 @@ struct sk_buff * mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); -void mlx5e_update_stats(struct mlx5e_priv *priv); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); @@ -1006,18 +1055,18 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); +void mlx5e_activate_rq(struct mlx5e_rq *rq); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_tx_disable_queue(struct netdev_queue *txq); -static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) -{ - return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); -} - static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { return MLX5_CAP_ETH(mdev, swp) && @@ -1063,6 +1112,7 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_destroy_tises(struct mlx5e_priv *priv); int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); @@ -1126,16 +1176,15 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); -void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, - u16 max_channels, u16 mtu); + u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index be5961ff24cc..0416f7712109 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -10,14 +10,18 @@ enum { }; struct mlx5e_tc_table { + /* protects flow table */ + struct mutex t_lock; struct mlx5_flow_table *t; struct rhashtable ht; - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; + struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ DECLARE_HASHTABLE(hairpin_tbl, 8); struct notifier_block netdevice_nb; + struct netdev_net_notifier netdevice_nn; }; struct mlx5e_flow_table { @@ -92,9 +96,15 @@ struct mlx5e_tirc_config { enum mlx5e_tunnel_types { MLX5E_TT_IPV4_GRE, MLX5E_TT_IPV6_GRE, + MLX5E_TT_IPV4_IPIP, + MLX5E_TT_IPV6_IPIP, + MLX5E_TT_IPV4_IPV6, + MLX5E_TT_IPV6_IPV6, MLX5E_NUM_TUNNEL_TT, }; +bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); + /* L3/L4 traffic type classifier */ struct mlx5e_ttc_table { struct mlx5e_flow_table ft; @@ -113,6 +123,22 @@ enum { #endif }; +#define MLX5E_TTC_NUM_GROUPS 3 +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ + MLX5E_TTC_GROUP2_SIZE +\ + MLX5E_TTC_GROUP3_SIZE) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + #ifdef CONFIG_MLX5_EN_RXNFC struct mlx5e_ethtool_table { @@ -132,12 +158,17 @@ struct mlx5e_ethtool_steering { void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); -int mlx5e_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *info, u32 *rule_locs); +int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs); #else static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } +static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ return -EOPNOTSUPP; } +static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_EN_RXNFC */ #ifdef CONFIG_MLX5_EN_ARFS @@ -224,5 +255,8 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type); +bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev); + #endif /* __MLX5E_FLOW_STEER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c new file mode 100644 index 000000000000..3a975641f902 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "lib/eq.h" + +int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = cq->channel->priv; + u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; + u8 hw_status; + void *cqc; + int err; + + err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out)); + if (err) + return err; + + cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); + hw_status = MLX5_GET(cqc, cqc, status); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u8 cq_log_stride; + u32 cq_sz; + int err; + + cq_sz = mlx5_cqwq_get_size(&cq->wq); + cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_create_reporters(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_reporter_tx_create(priv); + if (err) + return err; + + err = mlx5e_reporter_rx_create(priv); + if (err) + return err; + + return 0; +} + +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_rx_destroy(priv); + mlx5e_reporter_tx_destroy(priv); +} + +void mlx5e_health_channels_update(struct mlx5e_priv *priv) +{ + if (priv->tx_reporter) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (priv->rx_reporter) + devlink_health_reporter_state_update(priv->rx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn) +{ + struct mlx5_core_dev *mdev = channel->mdev; + struct net_device *dev = channel->netdev; + struct mlx5e_modify_sq_param msp = {}; + int err; + + msp.curr_state = MLX5_SQC_STATE_ERR; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); + return err; + } + + return 0; +} + +int mlx5e_health_recover_channels(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel) +{ + u32 eqe_count; + + netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return -EIO; + + netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + + channel->stats->eq_rearm++; + return 0; +} + +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx) +{ + netdev_err(priv->netdev, err_str); + + if (!reporter) + return err_ctx->recover(&err_ctx->ctx); + + return devlink_health_report(reporter, err_str, err_ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h new file mode 100644 index 000000000000..d3693fa547ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_HEALTH_H +#define __MLX5E_EN_HEALTH_H + +#include "en.h" + +#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) + +static inline bool cqe_syndrome_needs_recover(u8 syndrome) +{ + return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; +} + +int mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); + +int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg); + +int mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); + +#define MLX5E_REPORTER_PER_Q_MAX_LEN 256 + +struct mlx5e_err_ctx { + int (*recover)(void *ctx); + void *ctx; +}; + +int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn); +int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel); +int mlx5e_health_recover_channels(struct mlx5e_priv *priv); +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx); +int mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); +void mlx5e_health_channels_update(struct mlx5e_priv *priv); + + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c new file mode 100644 index 000000000000..ac44bbe95c5c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include "en.h" +#include "en/hv_vhca_stats.h" +#include "lib/hv_vhca.h" +#include "lib/hv.h" + +struct mlx5e_hv_vhca_per_ring_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; +}; + +static void +mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, + struct mlx5e_hv_vhca_per_ring_stats *data) +{ + struct mlx5e_channel_stats *stats; + int tc; + + stats = &priv->channel_stats[ch]; + data->rx_packets = stats->rq.packets; + data->rx_bytes = stats->rq.bytes; + + for (tc = 0; tc < priv->max_opened_tc; tc++) { + data->tx_packets += stats->sq[tc].packets; + data->tx_bytes += stats->sq[tc].bytes; + } +} + +static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, + int buf_len) +{ + int ch, i = 0; + + for (ch = 0; ch < priv->max_nch; ch++) { + void *buf = data + i; + + if (WARN_ON_ONCE(buf + + sizeof(struct mlx5e_hv_vhca_per_ring_stats) > + data + buf_len)) + return; + + mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf); + i += sizeof(struct mlx5e_hv_vhca_per_ring_stats); + } +} + +static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) +{ + return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * + priv->max_nch); +} + +static void mlx5e_hv_vhca_stats_work(struct work_struct *work) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5_hv_vhca_agent *agent; + struct delayed_work *dwork; + struct mlx5e_priv *priv; + int buf_len, rc; + void *buf; + + dwork = to_delayed_work(work); + sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work); + priv = container_of(sagent, struct mlx5e_priv, stats_agent); + buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + agent = sagent->agent; + buf = sagent->buf; + + memset(buf, 0, buf_len); + mlx5e_hv_vhca_fill_stats(priv, buf, buf_len); + + rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len); + if (rc) { + mlx5_core_err(priv->mdev, + "%s: Failed to write stats, err = %d\n", + __func__, rc); + return; + } + + if (sagent->delay) + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +enum { + MLX5_HV_VHCA_STATS_VERSION = 1, + MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF, +}; + +static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5e_priv *priv; + + priv = mlx5_hv_vhca_agent_priv(agent); + sagent = &priv->stats_agent; + + block->version = MLX5_HV_VHCA_STATS_VERSION; + block->rings = priv->max_nch; + + if (!block->command) { + cancel_delayed_work_sync(&priv->stats_agent.work); + return; + } + + sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 : + msecs_to_jiffies(block->command * 100); + + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent); + + cancel_delayed_work_sync(&priv->stats_agent.work); +} + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + struct mlx5_hv_vhca_agent *agent; + + priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); + if (!priv->stats_agent.buf) + return -ENOMEM; + + agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, + MLX5_HV_VHCA_AGENT_STATS, + mlx5e_hv_vhca_stats_control, NULL, + mlx5e_hv_vhca_stats_cleanup, + priv); + + if (IS_ERR_OR_NULL(agent)) { + if (IS_ERR(agent)) + netdev_warn(priv->netdev, + "Failed to create hv vhca stats agent, err = %ld\n", + PTR_ERR(agent)); + + kvfree(priv->stats_agent.buf); + return IS_ERR_OR_NULL(agent); + } + + priv->stats_agent.agent = agent; + INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); + + return 0; +} + +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->stats_agent.agent)) + return; + + mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); + kvfree(priv->stats_agent.buf); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h new file mode 100644 index 000000000000..664463faf77b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_STATS_VHCA_H__ +#define __MLX5_EN_STATS_VHCA_H__ +#include "en.h" + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); + +#else + +static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ +} +#endif + +#endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 79301d116667..eb2e1f2138e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -25,18 +25,33 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - u32 frag_sz = linear_rq_headroom + hw_mtu; + + return linear_rq_headroom + hw_mtu; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); /* AF_XDP doesn't build SKBs in place. */ if (!xsk) frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - /* XDP in mlx5e doesn't support multiple packets per page. */ + /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a + * special case. It can run with frames smaller than a page, as it + * doesn't allocate pages dynamically. However, here we pretend that + * fragments are page-sized: it allows to treat XSK frames like pages + * by redirecting alloc and free operations to XSK rings and by using + * the fact there are no multiple packets per "page" (which is a frame). + * The latter is important, because frames may come in a random order, + * and we will have trouble assemblying a real page of multiple frames. + */ if (mlx5e_rx_is_xdp(params, xsk)) frag_sz = max_t(u32, frag_sz, PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3a615d663d84..989d8f429438 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -76,6 +76,8 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index f777994f3005..fce6eccdcf8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -73,6 +73,7 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, [MLX5E_400GAUI_8] = 400000, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 633b117eb13e..ae99fac08b53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -155,8 +155,11 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) { + pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", + i, port_buffer->buffer[i].size); return -ENOMEM; + } port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; port_buffer->buffer[i].xon = @@ -175,7 +178,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * @port_buffer: <output> port receive buffer configuration * @change: <output> * - * Update buffer configuration based on pfc configuraiton and + * Update buffer configuration based on pfc configuration and * priority to buffer mapping. * Buffer's lossy bit is changed to: * lossless if there is at least one PFC enabled priority @@ -232,6 +235,26 @@ static int update_buffer_lossy(unsigned int max_mtu, return 0; } +static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en) +{ + u32 g_rx_pause, g_tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause); + if (err) + return err; + + /* If global pause enabled, set all active buffers to lossless. + * Otherwise, check PFC setting. + */ + if (g_rx_pause || g_tx_pause) + *pfc_en = 0xff; + else + err = mlx5_query_port_pfc(mdev, pfc_en, NULL); + + return err; +} + #define MINIMUM_MAX_MTU 9216 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, @@ -277,7 +300,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; - err = mlx5_query_port_pfc(priv->mdev, &curr_pfc_en, NULL); + err = fill_pfc_en(priv->mdev, &curr_pfc_en); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h deleted file mode 100644 index e78e92753d73..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019 Mellanox Technologies. */ - -#ifndef __MLX5E_EN_REPORTER_H -#define __MLX5E_EN_REPORTER_H - -#include <linux/mlx5/driver.h> -#include "en.h" - -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); -int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); - -#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c new file mode 100644 index 000000000000..6c72b592315b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "params.h" + +static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) +{ + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + void *out; + void *rqc; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev, rqn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *state = MLX5_GET(rqc, rqc, state); + +out: + kvfree(out); + return err; +} + +static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (icosq->cc == icosq->pc) + return 0; + + msleep(20); + } + + netdev_err(icosq->channel->netdev, + "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) +{ + WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + icosq->cc = 0; + icosq->pc = 0; +} + +static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct mlx5e_icosq *icosq; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + icosq = ctx; + rq = &icosq->channel->rq; + mdev = icosq->channel->mdev; + dev = icosq->channel->netdev; + err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n", + icosq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + err = mlx5e_wait_for_icosq_flush(icosq); + if (err) + goto out; + + mlx5e_deactivate_icosq(icosq); + + /* At this point, both the rq and the icosq are disabled */ + + err = mlx5e_health_sq_to_ready(icosq->channel, icosq->sqn); + if (err) + goto out; + + mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_descs(rq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mlx5e_activate_icosq(icosq); + mlx5e_activate_rq(rq); + + rq->stats->recover++; + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + return err; +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +{ + struct net_device *dev = rq->netdev; + int err; + + err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); + return err; + } + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); + return err; + } + + return 0; +} + +static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + rq = ctx; + mdev = rq->mdev; + dev = rq->netdev; + err = mlx5e_query_rq_state(mdev, rq->rqn, &state); + if (err) { + netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n", + rq->rqn, err); + goto out; + } + + if (state != MLX5_RQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + mlx5e_free_rx_descs(rq); + + err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR); + if (err) + goto out; + + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + mlx5e_activate_rq(rq); + rq->stats->recover++; + return 0; +out: + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + return err; +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rx_reporter_timeout_recover(void *ctx) +{ + struct mlx5e_icosq *icosq; + struct mlx5_eq_comp *eq; + struct mlx5e_rq *rq; + int err; + + rq = ctx; + icosq = &rq->channel->icosq; + eq = rq->cq.mcq.eq; + err = mlx5e_health_channel_eq_recover(eq, rq->channel); + if (err) + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); + + return err; +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *icosq = &rq->channel->icosq; + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", + icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, + void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = rq->channel->priv; + struct mlx5e_params *params; + struct mlx5e_icosq *icosq; + u8 icosq_hw_state; + int wqes_sz; + u8 hw_state; + u16 wq_head; + int err; + + params = &priv->channels.params; + icosq = &rq->channel->icosq; + err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state); + if (err) + return err; + + err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state); + if (err) + return err; + + wqes_sz = mlx5e_rqwq_get_cur_sz(rq); + wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq); + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state); + if (err) + return err; + + err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_rq *generic_rq; + u32 rq_stride, rq_sz; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + generic_rq = &priv->channels.c[0]->rq; + rq_sz = mlx5e_rqwq_get_size(generic_rq); + rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config"); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + if (err) + goto unlock; + + err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); + if (err) + goto unlock; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); + if (err) + goto unlock; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); + if (err) + goto unlock; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { + .name = "rx", + .recover = mlx5e_rx_reporter_recover, + .diagnose = mlx5e_rx_reporter_diagnose, +}; + +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 + +int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_create(devlink, + &mlx5_rx_reporter_ops, + MLX5E_REPORTER_RX_GRACEFUL_PERIOD, + true, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->rx_reporter = reporter; + return 0; +} + +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->rx_reporter) + return; + + devlink_health_reporter_destroy(priv->rx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index c7f86453c638..b468549e96ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -1,16 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */ -#include <net/devlink.h> -#include "reporter.h" -#include "lib/eq.h" - -#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 - -struct mlx5e_tx_err_ctx { - int (*recover)(struct mlx5e_txqsq *sq); - struct mlx5e_txqsq *sq; -}; +#include "health.h" static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { @@ -40,41 +31,20 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) sq->pc = 0; } -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) { - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_txqsq *sq; + u8 state; int err; - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} + sq = ctx; + mdev = sq->channel->mdev; + dev = sq->channel->netdev; -static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { @@ -97,7 +67,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) * pending WQEs. SQ can safely reset the SQ. */ - err = mlx5e_sq_to_ready(sq, state); + err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn); if (err) goto out; @@ -112,115 +82,99 @@ out: return err; } -static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, - char *err_str, - struct mlx5e_tx_err_ctx *err_ctx) +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) { - if (IS_ERR_OR_NULL(tx_reporter)) { - netdev_err(err_ctx->sq->channel->netdev, err_str); - return err_ctx->recover(err_ctx->sq); - } - - return devlink_health_report(tx_reporter, err_str, err_ctx); -} + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {0}; -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) -{ - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - struct mlx5e_tx_err_ctx err_ctx = {0}; - - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } -static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +static int mlx5e_tx_reporter_timeout_recover(void *ctx) { - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->core.eqn, eq->core.cons_index, eq->core.irqn); + struct mlx5_eq_comp *eq; + struct mlx5e_txqsq *sq; + int err; - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) { + sq = ctx; + eq = sq->cq.mcq.eq; + err = mlx5e_health_channel_eq_recover(eq, sq->channel); + if (err) clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - return -EIO; - } - netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", - eqe_count, eq->core.eqn); - sq->channel->stats->eq_rearm++; - return 0; + return err; } -int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) { - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - struct mlx5e_tx_err_ctx err_ctx; + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx; - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; sprintf(err_str, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - sq->txq->trans_start)); - return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); + return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ -static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) -{ - return err_ctx->recover(err_ctx->sq); -} - -static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) { - int err = 0; - - rtnl_lock(); - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto out; - - err = mlx5e_safe_reopen_channels(priv); - -out: - mutex_unlock(&priv->state_lock); - rtnl_unlock(); - - return err; + return err_ctx->recover(err_ctx->ctx); } static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) + void *context, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_tx_err_ctx *err_ctx = context; + struct mlx5e_err_ctx *err_ctx = context; return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : - mlx5e_tx_reporter_recover_all(priv); + mlx5e_health_recover_channels(priv); } static int mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, - u32 sqn, u8 state, bool stopped) + struct mlx5e_txqsq *sq, int tc) { + struct mlx5e_priv *priv = sq->channel->priv; + bool stopped = netif_xmit_stopped(sq->txq); + u8 state; int err; + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); if (err) return err; - err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn); + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); if (err) return err; @@ -232,6 +186,18 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, if (err) return err; + err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); + if (err) + return err; + + err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); if (err) return err; @@ -240,34 +206,65 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, } static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - int i, err = 0; + struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + u32 sq_stride, sq_sz; + + int i, tc, err = 0; mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; + sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq); + sq_stride = MLX5_SEND_WQE_BB; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config"); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + if (err) + goto unlock; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); + if (err) + goto unlock; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); + if (err) + goto unlock; + + err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); if (err) goto unlock; - for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; - i++) { - struct mlx5e_txqsq *sq = priv->txq2sq[i]; - u8 state; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; - err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); - if (err) - goto unlock; + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; - err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, - state, - netif_xmit_stopped(sq->txq)); - if (err) - goto unlock; + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); + if (err) + goto unlock; + } } err = devlink_fmsg_arr_pair_nest_end(fmsg); if (err) @@ -286,25 +283,30 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; - struct devlink *devlink = priv_to_devlink(mdev); + struct devlink *devlink; - priv->tx_reporter = + devlink = priv_to_devlink(mdev); + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->tx_reporter = reporter; + return 0; } -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return; devlink_health_reporter_destroy(priv->tx_reporter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a6a52806be45..af4ebd2951b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -31,29 +31,36 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + /* mlx5_lag_is_sriov() is a blocking function which can't be called + * while holding rcu read lock. Take the net_device for correctness + * sake. + */ + if (uplink_upper) + dev_hold(uplink_upper); + rcu_read_unlock(); + dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && real_dev == uplink_upper && mlx5_lag_is_sriov(priv->mdev)); + if (uplink_upper) + dev_put(uplink_upper); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ + *route_dev = dev; if (!netdev_port_same_parent_id(priv->netdev, real_dev) || - dst_is_lag_dev) { - *route_dev = dev; + dst_is_lag_dev || is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - } else { - *route_dev = dev; - if (is_vlan_dev(*route_dev)) - *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev) && - mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) - *out_dev = *route_dev; - else - return -EOPNOTSUPP; - } + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; if (!(mlx5e_eswitch_rep(*out_dev) && mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) @@ -70,8 +77,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct neighbour **out_n, u8 *out_ttl) { + struct neighbour *n; struct rtable *rt; - struct neighbour *n = NULL; #if IS_ENABLED(CONFIG_INET) struct mlx5_core_dev *mdev = priv->mdev; @@ -90,15 +97,19 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, if (ret) return ret; - if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { + ip_rt_put(rt); return -ENETUNREACH; + } #else return -EOPNOTSUPP; #endif ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); - if (ret < 0) + if (ret < 0) { + ip_rt_put(rt); return ret; + } if (!(*out_ttl)) *out_ttl = ip4_dst_hoplimit(&rt->dst); @@ -119,44 +130,6 @@ static const char *mlx5e_netdev_kind(struct net_device *dev) return "unknown"; } -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct neighbour *n = NULL; - struct dst_entry *dst; - -#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - int ret; - - ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, - fl6); - if (ret < 0) - return ret; - - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); - - ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); - if (ret < 0) - return ret; -#else - return -EOPNOTSUPP; -#endif - - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} - static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { @@ -199,8 +172,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; - struct neighbour *n = NULL; struct flowi4 fl4 = {}; + struct neighbour *n; int ipv4_encap_size; char *encap_header; u8 nud_state, ttl; @@ -226,12 +199,15 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto release_neigh; } encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -282,16 +258,16 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto out; + goto release_neigh; } - - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); @@ -302,12 +278,48 @@ destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); free_encap: kfree(encap_header); -out: - if (n) - neigh_release(n); +release_neigh: + neigh_release(n); return err; } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct dst_entry *dst; + struct neighbour *n; + + int ret; + + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6, + NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); + if (ret < 0) { + dst_release(dst); + return ret; + } + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e) @@ -315,9 +327,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; - struct neighbour *n = NULL; struct flowi6 fl6 = {}; struct ipv6hdr *ip6h; + struct neighbour *n = NULL; int ipv6_encap_size; char *encap_header; u8 nud_state, ttl; @@ -342,12 +354,15 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto release_neigh; } encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -397,16 +412,17 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto out; + goto release_neigh; } - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); @@ -417,11 +433,11 @@ destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); free_encap: kfree(encap_header); -out: - if (n) - neigh_release(n); +release_neigh: + neigh_release(n); return err; } +#endif bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index c362b9225dc2..6f9a78c85ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -58,9 +58,16 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +#else +static inline int +mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } +#endif bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index ddfe19adb3d9..7c8796d9743f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,7 +6,7 @@ #include "en.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1) #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) @@ -15,15 +15,14 @@ #else /* TLS offload requires additional stop_room for: * - a resync SKB. - * kTLS offload requires additional stop_room for: - * - static params WQE, - * - progress params WQE, and - * - resync DUMP per frag. + * kTLS offload requires fixed additional stop_room for: + * - a static params WQE, and a progress params WQE. + * The additional MTU-depending room for the resync DUMP WQEs + * will be calculated and added in runtime. */ #define MLX5E_SQ_TLS_ROOM \ (MLX5_SEND_WQE_MAX_WQEBBS + \ - MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ - MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS) + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS) #endif #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) @@ -92,7 +91,7 @@ mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, /* fill sq frag edge with nops to avoid wqe wrapping two pages */ for (; wi < edge_wi; wi++) { - wi->skb = NULL; + memset(wi, 0, sizeof(*wi)); wi->num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } @@ -117,9 +116,27 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, mlx5_write64((__be32 *)ctrl, uar_map); } -static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg) { - return !!wqe->ctrl.tisn; + return cseg && !!cseg->tisn; +} + +static inline u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, + struct sk_buff *skb) +{ + u8 mode; + + if (mlx5e_transport_inline_tx_wqe(cseg)) + return MLX5_INLINE_MODE_TCP_UDP; + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index b0b982cf69bb..f049e0ac308a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct xdp_umem *umem = rq->umem; struct xdp_buff xdp; u32 act; int err; @@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); - if (xsk) - xdp.handle += xdp.data - xdp.data_hard_start; + if (xsk) { + u64 off = xdp.data - xdp.data_hard_start; + + xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off); + } switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; @@ -179,33 +183,19 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; - u8 wqebbs; - u16 pi; - - mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); - - prefetchw(session->wqe->data); - session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; - session->pkt_count = 0; - session->complete = 0; + u16 pi, contig_wqebbs; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif + if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) + mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs); - wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), - MLX5E_XDP_MPW_MAX_WQEBBS); + session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi); - session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; + prefetchw(session->wqe->data); + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + session->pkt_count = 0; mlx5e_xdp_update_inline_state(sq); @@ -244,7 +234,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { + MLX5E_XDPSQ_STOP_ROOM))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -285,8 +275,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(session->complete || - session->ds_count == session->max_ds_count)) + if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || + session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index b90923932668..d7587f40ecae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,6 +40,26 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ + DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + +#define MLX5E_XDP_MPW_MAX_NUM_DS \ + (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, @@ -55,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + + if (priv->channels.params.xdp_prog) + set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); } static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) { + if (priv->channels.params.xdp_prog) + clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); - /* let other device's napi(s) see our new state */ + /* Let other device's napi(s) and XSK wakeups see our new state. */ synchronize_rcu(); } @@ -69,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } -static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) -{ - set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); -} - -static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) -{ - clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); -} - -static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) +static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv) { - return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); + return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); } static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) @@ -114,6 +130,30 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) session->inline_on = 1; } +static inline bool +mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session) +{ + return session->inline_on && + session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS; +} + +static inline void +mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->num_wqebbs = 1; + wi->num_pkts = 0; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + sq->stats->nops += nnops; +} + static inline void mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, @@ -126,20 +166,12 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, session->pkt_count++; -#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) - if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { struct mlx5_wqe_inline_seg *inline_dseg = (struct mlx5_wqe_inline_seg *)dseg; u16 ds_len = sizeof(*inline_dseg) + dma_len; u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); - if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { - /* Not enough space for inline wqe, send with memory pointer */ - session->complete = true; - goto no_inline; - } - inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); memcpy(inline_dseg->data, xdptxd->data, dma_len); @@ -148,21 +180,23 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, return; } -no_inline: dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; session->ds_count++; } -static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, - struct mlx5e_tx_wqe **wqe) +static inline struct mlx5e_tx_wqe * +mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *wqe; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, sizeof(*wqe)); - *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - memset(*wqe, 0, sizeof(**wqe)); + return wqe; } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile deleted file mode 100644 index 5ee42991900a..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile +++ /dev/null @@ -1 +0,0 @@ -subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 6a55573ec8f2..62fc8a128a8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, if (!xsk_umem_peek_addr_rq(umem, &handle)) return -ENOMEM; - dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle, + rq->buff.umem_headroom); dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); /* No need to add headroom to the DMA address. In striding RQ case, we @@ -34,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, */ dma_info->addr = xdp_umem_get_dma(umem, handle); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -104,7 +105,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* head_offset is not used in this function, because di->xsk.data and * di->addr point directly to the necessary place. Furthermore, in the - * current implementation, one page = one packet = one frame, so + * current implementation, UMR pages are mapped to XSK frames, so * head_offset should always be 0. */ WARN_ON_ONCE(head_offset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 307b923a1361..cab0e93497ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,6 +5,7 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" +#include <net/xdp_sock.h> /* RX data path */ @@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) +{ + if (!xsk_umem_uses_need_wakeup(rq->umem)) + return alloc_err; + + if (unlikely(alloc_err)) + xsk_set_rx_need_wakeup(rq->umem); + else + xsk_clear_rx_need_wakeup(rq->umem); + + return false; +} + #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 7f78c004d12f..c28cbae42331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,18 +4,23 @@ #include "setup.h" #include "en/params.h" +/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may + * change unexpectedly, and mlx5e has a minimum valid stride size for striding + * RQ, keep this check in the driver. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE 2048 + bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { - /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current - * mlx5e XDP implementation doesn't support multiple packets per page. - */ - if (xsk->chunk_size != PAGE_SIZE) + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || + xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) return false; /* Current MTU and XSK headroom don't allow packets to fit the frames. */ - if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size) return false; /* frag_sz is different for regular and XSK RQs, so ensure that linear @@ -60,24 +65,28 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, struct mlx5e_channel *c) { - struct mlx5e_channel_param cparam = {}; + struct mlx5e_channel_param *cparam; struct dim_cq_moder icocq_moder = {}; int err; if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL; - mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; + + mlx5e_build_xsk_cparam(priv, params, xsk, cparam); - err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq); if (unlikely(err)) - return err; + goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq; - err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq); if (unlikely(err)) goto err_close_rq; @@ -87,21 +96,23 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, * is disabled and then reenabled, but the SQ continues receiving CQEs * from the old UMEM. */ - err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq; - err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq); if (unlikely(err)) goto err_close_sq; /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be * triggered and NAPI to be called on the correct CPU. */ - err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq); if (unlikely(err)) goto err_close_icocq; + kvfree(cparam); + spin_lock_init(&c->xskicosq_lock); set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); @@ -123,6 +134,9 @@ err_close_rq: err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq); +err_free_cparam: + kvfree(cparam); + return err; } @@ -130,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with the XSK wakeup. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); @@ -141,6 +156,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + mlx5e_activate_icosq(&c->xskicosq); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ @@ -153,6 +169,7 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { mlx5e_deactivate_rq(&c->xskrq); /* TX queue is disabled on close. */ + mlx5e_deactivate_icosq(&c->xskicosq); } static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 35e188cf4ea4..fe2d596cb361 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -7,14 +7,14 @@ #include "en/params.h" #include <net/xdp_sock.h> -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; struct mlx5e_channel *c; u16 ix; - if (unlikely(!mlx5e_xdp_is_open(priv))) + if (unlikely(!mlx5e_xdp_is_active(priv))) return -ENETDOWN; if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) @@ -26,6 +26,13 @@ int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) return -ENXIO; if (!napi_if_scheduled_mark_missed(&c->napi)) { + /* To avoid WQE overrun, don't post a NOP if XSKICOSQ is not + * active and not polled by NAPI. Return 0, because the upcoming + * activate will trigger the IRQ for us. + */ + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state))) + return 0; + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); spin_unlock(&c->xskicosq_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index 7add18bf78d8..79b487d89757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,11 +5,23 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" +#include <net/xdp_sock.h> /* TX data path */ -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); +static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) +{ + if (!xsk_umem_uses_need_wakeup(sq->umem)) + return; + + if (sq->pc != sq->cc) + xsk_clear_tx_need_wakeup(sq->umem); + else + xsk_set_tx_need_wakeup(sq->umem); +} + #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index d2ff74d52720..46725cd743a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -38,7 +38,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, return -ENOMEM; tx_priv->expected_seq = start_offload_tcp_sn; - tx_priv->crypto_info = crypto_info; + tx_priv->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv); /* tc and underlay_qpn values are not in use for tls tis */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index b7298f9ee3d3..a3efa29a4629 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -21,7 +21,14 @@ MLX5_ST_SZ_BYTES(tls_progress_params)) #define MLX5E_KTLS_PROGRESS_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) -#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 + +struct mlx5e_dump_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +}; + +#define MLX5E_KTLS_DUMP_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) enum { MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, @@ -37,7 +44,7 @@ enum { struct mlx5e_ktls_offload_context_tx { struct tls_offload_context_tx *tx_ctx; - struct tls_crypto_info *crypto_info; + struct tls12_crypto_info_aes_gcm_128 crypto_info; u32 expected_seq; u32 tisn; u32 key_id; @@ -86,14 +93,28 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_tx_wqe **wqe, u16 *pi); void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, - struct mlx5e_sq_dma *dma); - + u32 *dma_fifo_cc); +static inline u8 +mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags, + unsigned int sync_len) +{ + /* Given the MTU and sync_len, calculates an upper bound for the + * number of WQEBBs needed for the TX resync DUMP WQEs of a record. + */ + return MLX5E_KTLS_DUMP_WQEBBS * + (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu)); +} #else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { } +static inline void +mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) {} + #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 8b93101e1a09..f260dd96873b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -24,17 +24,12 @@ enum { static void fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) { - struct tls_crypto_info *crypto_info = priv_tx->crypto_info; - struct tls12_crypto_info_aes_gcm_128 *info; + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info; char *initial_rn, *gcm_iv; u16 salt_sz, rec_seq_sz; char *salt, *rec_seq; u8 tls_version; - if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) - return; - - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; EXTRACT_INFO_FIELDS; gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); @@ -108,14 +103,15 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, } static void tx_fill_wi(struct mlx5e_txqsq *sq, - u16 pi, u8 num_wqebbs, - skb_frag_t *resync_dump_frag) + u16 pi, u8 num_wqebbs, u32 num_bytes, + struct page *page) { struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - wi->skb = NULL; - wi->num_wqebbs = num_wqebbs; - wi->resync_dump_frag = resync_dump_frag; + memset(wi, 0, sizeof(*wi)); + wi->num_wqebbs = num_wqebbs; + wi->num_bytes = num_bytes; + wi->resync_dump_frag_page = page; } void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) @@ -143,7 +139,7 @@ post_static_params(struct mlx5e_txqsq *sq, umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL); sq->pc += MLX5E_KTLS_STATIC_WQEBBS; } @@ -157,7 +153,7 @@ post_progress_params(struct mlx5e_txqsq *sq, wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL); sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; } @@ -167,6 +163,14 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, bool skip_static_post, bool fence_first_post) { bool progress_fence = skip_static_post || !fence_first_post; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 contig_wqebbs_room, pi; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); if (!skip_static_post) post_static_params(sq, priv_tx, fence_first_post); @@ -176,31 +180,51 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, struct tx_sync_info { u64 rcd_sn; - s32 sync_len; + u32 sync_len; int nr_frags; - skb_frag_t *frags[MAX_SKB_FRAGS]; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +enum mlx5e_ktls_sync_retval { + MLX5E_KTLS_SYNC_DONE, + MLX5E_KTLS_SYNC_FAIL, + MLX5E_KTLS_SYNC_SKIP_NO_DATA, }; -static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, - u32 tcp_seq, struct tx_sync_info *info) +static enum mlx5e_ktls_sync_retval +tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, int datalen, struct tx_sync_info *info) { struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; struct tls_record_info *record; int remaining, i = 0; unsigned long flags; - bool ret = true; + bool ends_before; spin_lock_irqsave(&tx_ctx->lock, flags); record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); if (unlikely(!record)) { - ret = false; + ret = MLX5E_KTLS_SYNC_FAIL; goto out; } - if (unlikely(tcp_seq < tls_record_start_seq(record))) { - if (!tls_record_is_start_marker(record)) - ret = false; + /* There are the following cases: + * 1. packet ends before start marker: bypass offload. + * 2. packet starts before start marker and ends after it: drop, + * not supported, breaks contract with kernel. + * 3. packet ends before tls record info starts: drop, + * this packet was already acknowledged and its record info + * was released. + */ + ends_before = before(tcp_seq + datalen, tls_record_start_seq(record)); + + if (unlikely(tls_record_is_start_marker(record))) { + ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; + goto out; + } else if (ends_before) { + ret = MLX5E_KTLS_SYNC_FAIL; goto out; } @@ -209,13 +233,13 @@ static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, while (remaining > 0) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag); + get_page(skb_frag_page(frag)); remaining -= skb_frag_size(frag); - info->frags[i++] = frag; + info->frags[i++] = *frag; } /* reduce the part which will be sent with the original SKB */ if (remaining < 0) - skb_frag_size_add(info->frags[i - 1], remaining); + skb_frag_size_add(&info->frags[i - 1], remaining); info->nr_frags = i; out: spin_unlock_irqrestore(&tx_ctx->lock, flags); @@ -227,17 +251,12 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, u64 rcd_sn) { - struct tls_crypto_info *crypto_info = priv_tx->crypto_info; - struct tls12_crypto_info_aes_gcm_128 *info; + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info; __be64 rn_be = cpu_to_be64(rcd_sn); bool skip_static_post; u16 rec_seq_sz; char *rec_seq; - if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) - return; - - info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; rec_seq = info->rec_seq; rec_seq_sz = sizeof(info->rec_seq); @@ -249,42 +268,28 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, } static int -tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, - skb_frag_t *frag, u32 tisn, bool first) +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) { struct mlx5_wqe_ctrl_seg *cseg; - struct mlx5_wqe_eth_seg *eseg; struct mlx5_wqe_data_seg *dseg; - struct mlx5e_tx_wqe *wqe; + struct mlx5e_dump_wqe *wqe; dma_addr_t dma_addr = 0; - u16 ds_cnt, ds_cnt_inl; - u8 num_wqebbs; - u16 pi, ihs; + u16 ds_cnt; int fsz; - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); - ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); - ds_cnt += ds_cnt_inl; - ds_cnt += 1; /* one frag */ + u16 pi; wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); - num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; cseg = &wqe->ctrl; - eseg = &wqe->eth; - dseg = wqe->data; + dseg = &wqe->data; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->tisn = cpu_to_be32(tisn << 8); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - eseg->inline_hdr.sz = cpu_to_be16(ihs); - memcpy(eseg->inline_hdr.start, skb->data, ihs); - dseg += ds_cnt_inl; - fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); @@ -296,24 +301,27 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - tx_fill_wi(sq, pi, num_wqebbs, frag); - sq->pc += num_wqebbs; - - WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, - "unexpected DUMP num_wqebbs, %d > %d", - num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS); + tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); + sq->pc += MLX5E_KTLS_DUMP_WQEBBS; return 0; } void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, - struct mlx5e_sq_dma *dma) + u32 *dma_fifo_cc) { - struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5e_sq_stats *stats; + struct mlx5e_sq_dma *dma; + + if (!wi->resync_dump_frag_page) + return; + + dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + stats = sq->stats; mlx5e_tx_dma_unmap(sq->pdev, dma); - __skb_frag_unref(wi->resync_dump_frag); + put_page(wi->resync_dump_frag_page); stats->tls_dump_packets++; stats->tls_dump_bytes += wi->num_bytes; } @@ -323,25 +331,31 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq) struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - tx_fill_wi(sq, pi, 1, NULL); + tx_fill_wi(sq, pi, 1, 0, NULL); mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); } -static struct sk_buff * +static enum mlx5e_ktls_sync_retval mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, + int datalen, u32 seq) { struct mlx5e_sq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; + enum mlx5e_ktls_sync_retval ret; struct tx_sync_info info = {}; u16 contig_wqebbs_room, pi; u8 num_wqebbs; - int i; - - if (!tx_sync_info_get(priv_tx, seq, &info)) { + int i = 0; + + ret = tx_sync_info_get(priv_tx, seq, datalen, &info); + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { + if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { + stats->tls_skip_no_sync_data++; + return MLX5E_KTLS_SYNC_SKIP_NO_DATA; + } /* We might get here if a retransmission reaches the driver * after the relevant record is acked. * It should be safe to drop the packet in this case @@ -350,46 +364,62 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, goto err_out; } - if (unlikely(info.sync_len < 0)) { - u32 payload; - int headln; + stats->tls_ooo++; - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload = skb->len - headln; - if (likely(payload <= -info.sync_len)) - return skb; + tx_post_resync_params(sq, priv_tx, info.rcd_sn); - stats->tls_drop_bypass_req++; - goto err_out; + /* If no dump WQE was sent, we need to have a fence NOP WQE before the + * actual data xmit. + */ + if (!info.nr_frags) { + tx_post_fence_nop(sq); + return MLX5E_KTLS_SYNC_DONE; } - stats->tls_ooo++; - - num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + - (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1); + num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - tx_post_resync_params(sq, priv_tx, info.rcd_sn); + for (; i < info.nr_frags; i++) { + unsigned int orig_fsz, frag_offset = 0, n = 0; + skb_frag_t *f = &info.frags[i]; - for (i = 0; i < info.nr_frags; i++) - if (tx_post_resync_dump(sq, skb, info.frags[i], - priv_tx->tisn, !i)) - goto err_out; + orig_fsz = skb_frag_size(f); - /* If no dump WQE was sent, we need to have a fence NOP WQE before the - * actual data xmit. - */ - if (!info.nr_frags) - tx_post_fence_nop(sq); + do { + bool fence = !(i || frag_offset); + unsigned int fsz; - return skb; + n++; + fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); + skb_frag_size_set(f, fsz); + if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) { + page_ref_add(skb_frag_page(f), n - 1); + goto err_out; + } + + skb_frag_off_add(f, fsz); + frag_offset += fsz; + } while (frag_offset < orig_fsz); + + page_ref_add(skb_frag_page(f), n - 1); + } + + return MLX5E_KTLS_SYNC_DONE; err_out: - dev_kfree_skb_any(skb); - return NULL; + for (; i < info.nr_frags; i++) + /* The put_page() here undoes the page ref obtained in tx_sync_info_get(). + * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be + * released only upon their completions (or in mlx5e_free_txqsq_descs, + * if channel closes). + */ + put_page(skb_frag_page(&info.frags[i])); + + return MLX5E_KTLS_SYNC_FAIL; } struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, @@ -412,7 +442,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, goto out; tls_ctx = tls_get_ctx(skb->sk); - if (unlikely(WARN_ON_ONCE(tls_ctx->netdev != netdev))) + if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) goto err_out; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); @@ -425,10 +455,21 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, seq = ntohl(tcp_hdr(skb)->seq); if (unlikely(priv_tx->expected_seq != seq)) { - skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq); - if (unlikely(!skb)) - goto out; - *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + enum mlx5e_ktls_sync_retval ret = + mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + + switch (ret) { + case MLX5E_KTLS_SYNC_DONE: + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + break; + case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + if (likely(!skb->decrypted)) + goto out; + WARN_ON_ONCE(1); + /* fall-through */ + default: /* MLX5E_KTLS_SYNC_FAIL */ + goto err_out; + } } priv_tx->expected_seq = seq + datalen; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 71384ad1a443..ef1ed15a53b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -269,7 +269,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; - if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) { skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1539cf3de5dc..f7890e0ce96c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -180,15 +180,3 @@ out: return err; } - -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) -{ - u8 min_inline_mode; - - mlx5_query_min_inline(mdev, &min_inline_mode); - if (min_inline_mode == MLX5_INLINE_MODE_NONE && - !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - min_inline_mode = MLX5_INLINE_MODE_L2; - - return min_inline_mode; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8dd31b5c740c..01f2918063af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1101,7 +1101,7 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, struct mlx5e_params *params) { - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode); if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 20e628c907e5..d674cb679895 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -218,13 +218,9 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { - int i, num_stats = 0; - switch (sset) { case ETH_SS_STATS: - for (i = 0; i < mlx5e_num_stats_grps; i++) - num_stats += mlx5e_stats_grps[i].get_num_stats(priv); - return num_stats; + return mlx5e_stats_total_num(priv); case ETH_SS_PRIV_FLAGS: return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: @@ -242,14 +238,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return mlx5e_ethtool_get_sset_count(priv, sset); } -static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) -{ - int i, idx = 0; - - for (i = 0; i < mlx5e_num_stats_grps; i++) - idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); -} - void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) { int i; @@ -268,7 +256,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) break; case ETH_SS_STATS: - mlx5e_fill_stats_strings(priv, data); + mlx5e_stats_fill_strings(priv, data); break; } } @@ -283,14 +271,13 @@ static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data) { - int i, idx = 0; + int idx = 0; mutex_lock(&priv->state_lock); - mlx5e_update_stats(priv); + mlx5e_stats_update(priv); mutex_unlock(&priv->state_lock); - for (i = 0; i < mlx5e_num_stats_grps; i++) - idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); + mlx5e_stats_fill(priv, data, idx); } static void mlx5e_get_ethtool_stats(struct net_device *dev, @@ -708,9 +695,9 @@ static int get_fec_supported_advertised(struct mlx5_core_dev *dev, static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, u32 eth_proto_cap, - u8 connector_type) + u8 connector_type, bool ext) { - if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) @@ -842,9 +829,9 @@ static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { [MLX5E_PORT_OTHER] = PORT_OTHER, }; -static u8 get_connector_port(u32 eth_proto, u8 connector_type) +static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext) { - if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) return ptys2connector_type[connector_type]; if (eth_proto & @@ -945,9 +932,9 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; link_ksettings->base.port = get_connector_port(eth_proto_oper, - connector_type); + connector_type, ext); ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, - connector_type); + connector_type, ext); get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) @@ -1021,24 +1008,17 @@ static bool ext_link_mode_requested(const unsigned long *adver) { #define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT; - __ETHTOOL_DECLARE_LINK_MODE_MASK(modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,}; bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size); return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static bool ext_speed_requested(u32 speed) -{ -#define MLX5E_MAX_PTYS_LEGACY_SPEED 100000 - return !!(speed > MLX5E_MAX_PTYS_LEGACY_SPEED); -} - -static bool ext_requested(u8 autoneg, const unsigned long *adver, u32 speed) +static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported) { bool ext_link_mode = ext_link_mode_requested(adver); - bool ext_speed = ext_speed_requested(speed); - return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_speed; + return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; } int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, @@ -1065,8 +1045,8 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, autoneg = link_ksettings->base.autoneg; speed = link_ksettings->base.speed; - ext = ext_requested(autoneg, adver, speed), ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext = ext_requested(autoneg, adver, ext_supported); if (!ext_supported && ext) return -EOPNOTSUPP; @@ -1431,7 +1411,7 @@ static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) return ret; } -static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) +static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode) { __u32 ret = 0; @@ -1459,7 +1439,7 @@ static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) return ret; } -static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode) +static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode) { u8 ret = 0; @@ -1505,7 +1485,7 @@ static void mlx5e_get_wol(struct net_device *netdev, if (err) return; - wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode); + wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode); } static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) @@ -1521,7 +1501,7 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) if (wol->wolopts & ~wol_supported) return -EINVAL; - mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts); + mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts); return mlx5_set_port_wol(mdev, mlx5_wol_mode); } @@ -1643,7 +1623,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", @@ -1958,21 +1938,27 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -#ifndef CONFIG_MLX5_EN_RXNFC -/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS - * otherwise this function will be defined from en_fs_ethtool.c - */ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); - if (info->cmd != ETHTOOL_GRXRINGS) - return -EOPNOTSUPP; - /* ring_count is needed by ethtool -x */ - info->data = priv->channels.params.num_channels; - return 0; + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + return mlx5e_ethtool_set_rxnfc(dev, cmd); } -#endif const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@ -1993,9 +1979,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, -#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, -#endif .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 76cc10e44080..73d3dc07331f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -747,8 +747,55 @@ static struct mlx5e_etype_proto ttc_tunnel_rules[] = { .etype = ETH_P_IPV6, .proto = IPPROTO_GRE, }, + [MLX5E_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5E_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5E_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5E_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + }; +bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip); + default: + return false; + } +} + +bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported(mdev, ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5e_any_tunnel_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static u8 mlx5e_etype_to_ipv(u16 ethertype) { if (ethertype == ETH_P_IP) @@ -838,6 +885,9 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = params->inner_ttc->ft.t; for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + if (!mlx5e_tunnel_proto_supported(priv->mdev, + ttc_tunnel_rules[tt].proto)) + continue; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_tunnel_rules[tt].etype, ttc_tunnel_rules[tt].proto); @@ -854,22 +904,6 @@ del_rules: return err; } -#define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ - MLX5E_TTC_GROUP2_SIZE +\ - MLX5E_TTC_GROUP3_SIZE) - -#define MLX5E_INNER_TTC_NUM_GROUPS 3 -#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ - MLX5E_INNER_TTC_GROUP2_SIZE +\ - MLX5E_INNER_TTC_GROUP3_SIZE) - static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, bool use_ipv) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 94304abc49e9..3bc2ac3d53fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -58,6 +58,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs, int num_tuples) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_ethtool_table *eth_ft; struct mlx5_flow_namespace *ns; struct mlx5_flow_table *ft; @@ -102,9 +103,11 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.log_max_ft_size)), MLX5E_ETHTOOL_NUM_ENTRIES); - ft = mlx5_create_auto_grouped_flow_table(ns, prio, - table_size, - MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); + + ft_attr.prio = prio; + ft_attr.max_fte = table_size; + ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return (void *)ft; @@ -399,10 +402,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, struct ethtool_rx_flow_spec *fs) { + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND }; struct mlx5_flow_destination *dst = NULL; - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; int err = 0; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -888,10 +891,10 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, return 0; } -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { - int err = 0; struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: @@ -911,16 +914,13 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } -int mlx5e_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *info, u32 *rule_locs) +int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (info->cmd) { - case ETHTOOL_GRXRINGS: - info->data = priv->channels.params.num_channels; - break; case ETHTOOL_GRXCLSRLCNT: info->rule_cnt = priv->fs.ethtool.tot_num_rules; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d5f6e56188f..454d3459bd8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -56,12 +56,14 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" -#include "en/reporter.h" +#include "en/health.h" #include "en/params.h" #include "en/xsk/umem.h" #include "en/xsk/setup.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" +#include "en/hv_vhca_stats.h" +#include "lib/mlx5.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) @@ -157,23 +159,14 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -void mlx5e_update_stats(struct mlx5e_priv *priv) -{ - int i; - - for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) - if (mlx5e_stats_grps[i].update_stats) - mlx5e_stats_grps[i].update_stats(priv); -} - void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { int i; - for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) - if (mlx5e_stats_grps[i].update_stats_mask & + for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--) + if (mlx5e_nic_stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) - mlx5e_stats_grps[i].update_stats(priv); + mlx5e_nic_stats_grps[i]->update_stats(priv); } static void mlx5e_update_stats_work(struct work_struct *work) @@ -247,26 +240,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5_wq_ll_get_size(&rq->mpwqe.wq); - default: - return mlx5_wq_cyc_get_size(&rq->wqe.wq); - } -} - -static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return rq->mpwqe.wq.cur_sz; - default: - return rq->wqe.wq.cur_sz; - } -} - static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, struct mlx5e_channel *c) { @@ -382,6 +355,13 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) kvfree(rq->wqe.di); } +static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); + + mlx5e_reporter_rq_cqe_err(rq); +} + static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -418,13 +398,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->stats = &c->priv->channel_stats[c->ix].xskrq; else rq->stats = &c->priv->channel_stats[c->ix].rq; + INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); - rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; - if (IS_ERR(rq->xdp_prog)) { - err = PTR_ERR(rq->xdp_prog); - rq->xdp_prog = NULL; - goto err_rq_wq_destroy; - } + if (params->xdp_prog) + bpf_prog_inc(params->xdp_prog); + rq->xdp_prog = params->xdp_prog; rq_xdp_ix = rq->ix; if (xsk) @@ -720,8 +698,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq, return err; } -static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, - int next_state) +int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5_core_dev *mdev = rq->mdev; @@ -829,10 +806,11 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); + mlx5e_reporter_rx_timeout(rq); return -ETIMEDOUT; } -static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) { __be16 wqe_ix_be; u16 wqe_ix; @@ -911,7 +889,7 @@ err_free_rq: return err; } -static void mlx5e_activate_rq(struct mlx5e_rq *rq) +void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); mlx5e_trigger_irq(&rq->channel->icosq); @@ -926,6 +904,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); + cancel_work_sync(&rq->channel->icosq.recover_work); + cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); @@ -1042,6 +1022,14 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) return 0; } +static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + mlx5e_reporter_icosq_cqe_err(sq); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) @@ -1064,6 +1052,8 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; + INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + return 0; err_sq_wq_destroy: @@ -1127,15 +1117,22 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); +#ifdef CONFIG_MLX5_EN_TLS if (mlx5_accel_is_tls_device(c->priv->mdev)) { set_bit(MLX5E_SQ_STATE_TLS, &sq->state); - sq->stop_room += MLX5E_SQ_TLS_ROOM; + sq->stop_room += MLX5E_SQ_TLS_ROOM + + mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS, + TLS_MAX_PAYLOAD_SIZE); } +#endif param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1312,7 +1309,6 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, return 0; err_free_txqsq: - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_free_txqsq(sq); return err; @@ -1347,9 +1343,13 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe_info *wi; struct mlx5e_tx_wqe *nop; - sq->db.wqe_info[pi].skb = NULL; + wi = &sq->db.wqe_info[pi]; + + memset(wi, 0, sizeof(*wi)); + wi->num_wqebbs = 1; nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); } @@ -1377,7 +1377,7 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, recover_work); - mlx5e_tx_reporter_err_cqe(sq); + mlx5e_reporter_tx_err_cqe(sq); } int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, @@ -1393,7 +1393,6 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; - set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); if (err) goto err_free_icosq; @@ -1401,18 +1400,27 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, return 0; err_free_icosq: - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_free_icosq(sq); return err; } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) { - struct mlx5e_channel *c = sq->channel; + set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); +} - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) +{ + struct mlx5e_channel *c = icosq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); napi_synchronize(&c->napi); +} + +void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_icosq(sq); @@ -1430,7 +1438,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, return err; csp.tis_lst_sz = 1; - csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.tisn = c->priv->tisn[c->lag_port][0]; /* tc = 0 */ csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; @@ -1674,13 +1682,12 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - struct mlx5e_priv *priv = c->priv; int err, tc; for (tc = 0; tc < params->num_tc; tc++) { - int txq_ix = c->ix + tc * priv->max_nch; + int txq_ix = c->ix + tc * params->num_channels; - err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, params, &cparam->sq, &c->sq[tc], tc); if (err) goto err_close_sqs; @@ -1914,6 +1921,13 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_cq(&c->icosq.cq); } +static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) +{ + u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); + + return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -1948,6 +1962,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; c->irq_desc = irq_to_desc(irq); + c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); err = mlx5e_alloc_xps_cpumask(c, params); if (err) @@ -1989,6 +2004,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_icosq(&c->icosq); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); @@ -2004,6 +2020,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_xsk(c); mlx5e_deactivate_rq(&c->rq); + mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); } @@ -2321,10 +2338,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - if (!IS_ERR_OR_NULL(priv->tx_reporter)) - devlink_health_reporter_state_update(priv->tx_reporter, - DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); - + mlx5e_health_channels_update(priv); kvfree(cparam); return 0; @@ -2852,26 +2866,21 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } -static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv) +static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) { - int i, tc; + int i, ch; - for (i = 0; i < priv->max_nch; i++) - for (tc = 0; tc < priv->profile->max_tc; tc++) - priv->channel_tc2txq[i][tc] = i + tc * priv->max_nch; -} + ch = priv->channels.num; -static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) -{ - struct mlx5e_channel *c; - struct mlx5e_txqsq *sq; - int i, tc; + for (i = 0; i < ch; i++) { + int tc; + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_txqsq *sq = &c->sq[tc]; - for (i = 0; i < priv->channels.num; i++) { - c = priv->channels.c[i]; - for (tc = 0; tc < c->num_tc; tc++) { - sq = &c->sq[tc]; priv->txq2sq[sq->txq_ix] = sq; + priv->channel_tc2realtxq[i][tc] = i + tc * ch; } } } @@ -2886,7 +2895,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, num_rxqs); - mlx5e_build_tx2sq_maps(priv); + mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); @@ -2982,12 +2991,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); - if (is_xdp) - mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) @@ -3002,8 +3008,6 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: - if (is_xdp) - mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -3035,8 +3039,6 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - if (priv->channels.params.xdp_prog) - mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -3168,40 +3170,58 @@ void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) mlx5_core_destroy_tis(mdev, tisn); } +void mlx5e_destroy_tises(struct mlx5e_priv *priv) +{ + int tc, i; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); +} + +static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1; +} + int mlx5e_create_tises(struct mlx5e_priv *priv) { + int tc, i; int err; - int tc; - for (tc = 0; tc < priv->profile->max_tc; tc++) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - void *tisc; + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; - tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, prio, tc << 1); - err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]); - if (err) - goto err_close_tises; + if (mlx5e_lag_should_assign_affinity(priv->mdev)) + MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]); + if (err) + goto err_close_tises; + } } return 0; err_close_tises: - for (tc--; tc >= 0; tc--) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + for (; i >= 0; i--) { + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); + tc = priv->profile->max_tc; + } return err; } static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { - int tc; - - mlx5e_tx_reporter_destroy(priv); - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5e_destroy_tises(priv); } static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, @@ -3422,7 +3442,7 @@ out: #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *cls_flower, - int flags) + unsigned long flags) { switch (cls_flower->command) { case FLOW_CLS_REPLACE: @@ -3442,12 +3462,12 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); struct mlx5e_priv *priv = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_NIC_OFFLOAD); + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); default: return -EOPNOTSUPP; } @@ -3463,11 +3483,15 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { #ifdef CONFIG_MLX5_ESWITCH - case TC_SETUP_BLOCK: + case TC_SETUP_BLOCK: { + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_block_cb_list, mlx5e_setup_tc_block_cb, priv, priv, true); + } #endif case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); @@ -3640,7 +3664,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@ -3781,9 +3805,10 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - features &= ~NETIF_F_LRO; - if (params->lro_en) + if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } } if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@ -3950,7 +3975,8 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); @@ -4203,6 +4229,11 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, switch (proto) { case IPPROTO_GRE: return features; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP)) + return features; + break; case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); @@ -4267,7 +4298,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) if (!netif_xmit_stopped(dev_queue)) continue; - if (mlx5e_tx_reporter_timeout(sq)) + if (mlx5e_reporter_tx_timeout(sq)) report_failed = true; } @@ -4285,7 +4316,7 @@ unlock: rtnl_unlock(); } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -4324,16 +4355,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return 0; } -static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) -{ - if (priv->channels.params.xdp_prog) - mlx5e_xdp_set_open(priv); - else - mlx5e_xdp_set_closed(priv); - - return 0; -} - static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4354,16 +4375,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && !reset) { + if (was_opened && !reset) /* num_channels is invariant here, so we can take the * batched reference right upfront. */ - prog = bpf_prog_add(prog, priv->channels.num); - if (IS_ERR(prog)) { - err = PTR_ERR(prog); - goto unlock; - } - } + bpf_prog_add(prog, priv->channels.num); if (was_opened && reset) { struct mlx5e_channels new_channels = {}; @@ -4373,7 +4389,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mlx5e_set_rq_type(priv->mdev, &new_channels.params); old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto unlock; } else { @@ -4535,7 +4551,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, - .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4714,17 +4730,19 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, tirc_default_config[tt].rx_hash_fields; } -void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, - u16 max_channels, u16 mtu) + u16 mtu) { + struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; - params->num_channels = max_channels; + params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, + priv->max_nch); params->num_tc = 1; /* SQ */ @@ -4768,7 +4786,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); @@ -4838,7 +4856,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || - MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + mlx5e_any_tunnel_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; @@ -4851,9 +4869,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } - if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) { netdev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= NETIF_F_GSO_GRE | @@ -4862,6 +4882,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) NETIF_F_GSO_GRE_CSUM; } + if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_IPIP)) { + netdev->hw_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + } + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; netdev->hw_features |= NETIF_F_GSO_UDP_L4; @@ -4952,8 +4981,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, - priv->max_nch, netdev->mtu); + mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params, + netdev->mtu); mlx5e_timestamp_init(priv); @@ -4964,13 +4993,14 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); - mlx5e_build_tc2txq_maps(priv); + mlx5e_health_create_reporters(priv); return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); mlx5e_netdev_cleanup(priv->netdev, priv); @@ -5073,7 +5103,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif - mlx5e_tx_reporter_create(priv); return 0; } @@ -5097,6 +5126,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); + mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5114,6 +5144,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { + struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5129,11 +5160,12 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); - mlx5_lag_remove(mdev); + mlx5_lag_remove(mdev, netdev); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) @@ -5157,6 +5189,8 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), + .stats_grps = mlx5e_nic_stats_grps, + .stats_grps_num = mlx5e_nic_stats_grps_num, }; /* mlx5e generic netdev management API (move to en_common.c) */ @@ -5363,6 +5397,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; } + dev_net_set(netdev, mlx5_core_net(mdev)); priv = netdev_priv(netdev); err = mlx5e_attach(mdev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d0684fdb69e1..7b48ccacebe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -38,14 +38,19 @@ #include <net/netevent.h> #include <net/arp.h> #include <net/devlink.h> +#include <net/ipv6_stubs.h> #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "en.h" #include "en_rep.h" #include "en_tc.h" #include "en/tc_tun.h" #include "fs_core.h" #include "lib/port_tun.h" +#include "lib/mlx5.h" +#define CREATE_TRACE_POINTS +#include "diag/en_rep_tracepoint.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) @@ -112,24 +117,71 @@ static const struct counter_desc vport_rep_stats_desc[] = { #define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) #define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) -static void mlx5e_rep_get_strings(struct net_device *dev, - u32 stringset, uint8_t *data) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep) { - int i, j; + return NUM_VPORT_REP_SW_COUNTERS; +} - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) - strcpy(data + (i * ETH_GSTRING_LEN), - sw_rep_stats_desc[i].format); - for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) - strcpy(data + (i * ETH_GSTRING_LEN), - vport_rep_stats_desc[j].format); - break; - } +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + sw_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); + return idx; } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct rtnl_link_stats64 stats64 = {}; + + memset(s, 0, sizeof(*s)); + mlx5e_fold_sw_stats64(priv, &stats64); + + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep) +{ + return NUM_VPORT_REP_HW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, + vport_rep_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -152,64 +204,33 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = vf_stats.rx_bytes; } -static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pport_stats *pstats = &priv->stats.pport; - struct rtnl_link_stats64 *vport_stats; - - mlx5e_grp_802_3_update_stats(priv); - - vport_stats = &priv->stats.vf_vport; - - vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); - vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); - vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); - vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); -} - -static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) { - struct mlx5e_sw_stats *s = &priv->stats.sw; - struct rtnl_link_stats64 stats64 = {}; - - memset(s, 0, sizeof(*s)); - mlx5e_fold_sw_stats64(priv, &stats64); + struct mlx5e_priv *priv = netdev_priv(dev); - s->rx_packets = stats64.rx_packets; - s->rx_bytes = stats64.rx_bytes; - s->tx_packets = stats64.tx_packets; - s->tx_bytes = stats64.tx_bytes; - s->tx_queue_dropped = stats64.tx_dropped; + switch (stringset) { + case ETH_SS_STATS: + mlx5e_stats_fill_strings(priv, data); + break; + } } static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); - int i, j; - - if (!data) - return; - - mutex_lock(&priv->state_lock); - mlx5e_rep_update_sw_counters(priv); - priv->profile->update_stats(priv); - mutex_unlock(&priv->state_lock); - - for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) - data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, - sw_rep_stats_desc, i); - for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++) - data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, - vport_rep_stats_desc, j); + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); } static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS; + return mlx5e_stats_total_num(priv); default: return -EOPNOTSUPP; } @@ -389,24 +410,17 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; -static int mlx5e_rep_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) +static void mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { - struct mlx5_eswitch *esw; struct mlx5e_priv *priv; u64 parent_id; priv = netdev_priv(dev); - esw = priv->mdev->priv.eswitch; - - if (esw->mode == MLX5_ESWITCH_NONE) - return -EOPNOTSUPP; parent_id = mlx5_query_nic_system_image_guid(priv->mdev); ppid->id_len = sizeof(parent_id); memcpy(ppid->id, &parent_id, sizeof(parent_id)); - - return 0; } static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, @@ -504,16 +518,18 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) mlx5e_sqs2vport_stop(esw, rep); } +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) { -#if IS_ENABLED(CONFIG_IPV6) - unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms, - DELAY_PROBE_TIME); -#else - unsigned long ipv6_interval = ~0UL; -#endif - unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, - DELAY_PROBE_TIME); + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); @@ -531,47 +547,97 @@ void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) neigh_update->min_interval); } +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, neigh_update.neigh_stats_work.work); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh_hash_entry *nhe = NULL; rtnl_lock(); if (!list_empty(&rpriv->neigh_update.neigh_list)) mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) mlx5e_tc_update_neigh_used_value(nhe); rtnl_unlock(); } -static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) -{ - refcount_inc(&nhe->refcnt); -} - -static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) -{ - if (refcount_dec_and_test(&nhe->refcnt)) - kfree(nhe); -} - static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, bool neigh_connected, unsigned char ha[ETH_ALEN]) { struct ethhdr *eth = (struct ethhdr *)e->encap_header; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool encap_connected; + LIST_HEAD(flow_list); ASSERT_RTNL(); + /* wait for encap to be fully initialized */ + wait_for_completion(&e->res_ready); + + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + if (e->compl_result < 0 || (encap_connected == neigh_connected && + ether_addr_equal(e->h_dest, ha))) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) - mlx5e_tc_encap_flows_del(priv, e); + mlx5e_tc_encap_flows_del(priv, e, &flow_list); if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); @@ -581,8 +647,11 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, */ ether_addr_copy(eth->h_source, e->route_dev->dev_addr); - mlx5e_tc_encap_flows_add(priv, e); + mlx5e_tc_encap_flows_add(priv, e, &flow_list); } +unlock: + mutex_unlock(&esw->offloads.encap_tbl_lock); + mlx5e_put_encap_flow_list(priv, &flow_list); } static void mlx5e_rep_neigh_update(struct work_struct *work) @@ -594,7 +663,6 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; bool neigh_connected; - bool encap_connected; u8 nud_state, dead; rtnl_lock(); @@ -612,13 +680,15 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_connected = (nud_state & NUD_VALID) && !dead; + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + list_for_each_entry(e, &nhe->encap_list, encap_list) { - encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - priv = netdev_priv(e->out_dev); + if (!mlx5e_encap_take(e)) + continue; - if (encap_connected != neigh_connected || - !ether_addr_equal(e->h_dest, ha)) - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + priv = netdev_priv(e->out_dev); + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + mlx5e_encap_put(priv, e); } mlx5e_rep_neigh_entry_release(nhe); rtnl_unlock(); @@ -659,8 +729,8 @@ mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, struct mlx5e_rep_indr_block_priv *indr_priv) { + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); - int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD; int err = 0; switch (flower->command) { @@ -714,6 +784,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; f->driver_block_list = &mlx5e_block_cb_list; switch (f->command) { @@ -722,10 +793,6 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (indr_priv) return -EEXIST; - if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb, - indr_priv, &mlx5e_block_cb_list)) - return -EBUSY; - indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); if (!indr_priv) return -ENOMEM; @@ -785,9 +852,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, { int err; - err = __tc_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, - rpriv); + err = __flow_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -800,8 +867,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, - rpriv); + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + rpriv); } static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, @@ -827,6 +894,28 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, return NOTIFY_OK; } +static void +mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe, + struct neighbour *n) +{ + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -849,7 +938,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, case NETEVENT_NEIGH_UPDATE: n = ptr; #if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != &nd_tbl && n->tbl != &arp_tbl) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif @@ -859,34 +948,13 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - /* We are in atomic context and can't take RTNL mutex, so use - * spin_lock_bh to lookup the neigh table. bh is used since - * netevent can be called from a softirq context. - */ - spin_lock_bh(&neigh_update->encap_lock); + rcu_read_lock(); nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - if (!nhe) { - spin_unlock_bh(&neigh_update->encap_lock); + rcu_read_unlock(); + if (!nhe) return NOTIFY_DONE; - } - - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); - mlx5e_rep_neigh_entry_hold(nhe); - - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); - } - spin_unlock_bh(&neigh_update->encap_lock); + mlx5e_rep_queue_neigh_update_work(priv, nhe, n); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -897,25 +965,21 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, * done per device delay prob time parameter. */ #if IS_ENABLED(CONFIG_IPV6) - if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl)) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) #else if (!p->dev || p->tbl != &arp_tbl) #endif return NOTIFY_DONE; - /* We are in atomic context and can't take RTNL mutex, - * so use spin_lock_bh to walk the neigh list and look for - * the relevant device. bh is used since netevent can be - * called from a softirq context. - */ - spin_lock_bh(&neigh_update->encap_lock); - list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { if (p->dev == nhe->m_neigh.dev) { found = true; break; } } - spin_unlock_bh(&neigh_update->encap_lock); + rcu_read_unlock(); if (!found) return NOTIFY_DONE; @@ -946,7 +1010,7 @@ static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) return err; INIT_LIST_HEAD(&neigh_update->neigh_list); - spin_lock_init(&neigh_update->encap_lock); + mutex_init(&neigh_update->encap_lock); INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, mlx5e_rep_neigh_stats_work); mlx5e_rep_neigh_update_init_interval(rpriv); @@ -973,6 +1037,7 @@ static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + mutex_destroy(&neigh_update->encap_lock); rhashtable_destroy(&neigh_update->neigh_ht); } @@ -988,28 +1053,27 @@ static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, if (err) return err; - list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); return err; } -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; - spin_lock_bh(&rpriv->neigh_update.encap_lock); + mutex_lock(&rpriv->neigh_update.encap_lock); - list_del(&nhe->neigh_list); + list_del_rcu(&nhe->neigh_list); rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); - spin_unlock_bh(&rpriv->neigh_update.encap_lock); + mutex_unlock(&rpriv->neigh_update.encap_lock); } -/* This function must only be called under RTNL lock or under the - * representor's encap_lock in case RTNL mutex can't be held. +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. */ static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, @@ -1017,9 +1081,11 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; - return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, - mlx5e_neigh_ht_params); + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; } static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, @@ -1032,8 +1098,10 @@ static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, if (!*nhe) return -ENOMEM; + (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); @@ -1047,19 +1115,6 @@ out_free: return err; } -static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) -{ - /* The neigh hash entry must be removed from the hash table regardless - * of the reference count value, so it won't be found by the next - * neigh notification call. The neigh hash entry reference count is - * incremented only during creation and neigh notification calls and - * protects from freeing the nhe struct. - */ - mlx5e_rep_neigh_entry_remove(priv, nhe); - mlx5e_rep_neigh_entry_release(nhe); -} - int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { @@ -1072,16 +1127,26 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); if (err) return err; + + mutex_lock(&rpriv->neigh_update.encap_lock); nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); if (!nhe) { err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); if (err) { + mutex_unlock(&rpriv->neigh_update.encap_lock); mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); return err; } } - list_add(&e->encap_list, &nhe->encap_list); + + e->nhe = nhe; + spin_lock(&nhe->encap_list_lock); + list_add_rcu(&e->encap_list, &nhe->encap_list); + spin_unlock(&nhe->encap_list_lock); + + mutex_unlock(&rpriv->neigh_update.encap_lock); + return 0; } @@ -1091,13 +1156,16 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; - struct mlx5e_neigh_hash_entry *nhe; - list_del(&e->encap_list); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!e->nhe) + return; + + spin_lock(&e->nhe->encap_list_lock); + list_del_rcu(&e->encap_list); + spin_unlock(&e->nhe->encap_list_lock); - if (list_empty(&nhe->encap_list)) - mlx5e_rep_neigh_entry_destroy(priv, nhe); + mlx5e_rep_neigh_entry_release(e->nhe); + e->nhe = NULL; mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } @@ -1160,33 +1228,106 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, } } +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_ESW_OFFLOAD); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); default: return -EOPNOTSUPP; } } -static LIST_HEAD(mlx5e_rep_block_cb_list); +static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload tmp, *f = type_data; + struct mlx5e_priv *priv = cb_priv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + flags = MLX5_TC_FLAG(INGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + esw = priv->mdev->priv.eswitch; + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw)) + return -EOPNOTSUPP; + if (tmp.common.chain_index != 0) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} +static LIST_HEAD(mlx5e_rep_block_tc_cb_list); +static LIST_HEAD(mlx5e_rep_block_ft_cb_list); static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; switch (type) { case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, - &mlx5e_rep_block_cb_list, + &mlx5e_rep_block_tc_cb_list, mlx5e_rep_setup_tc_cb, priv, priv, true); + case TC_SETUP_FT: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_ft_cb_list, + mlx5e_rep_setup_ft_cb, + priv, priv, true); default: return -EOPNOTSUPP; } @@ -1549,11 +1690,66 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5e_close_drop_rq(&priv->drop_rq); } +static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) +{ + int err = mlx5e_init_rep_rx(priv); + + if (err) + return err; + + mlx5e_create_q_counters(priv); + return 0; +} + +static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) +{ + mlx5e_destroy_q_counters(priv); + mlx5e_cleanup_rep_rx(priv); +} + +static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + netdev = rpriv->netdev; + priv = netdev_priv(netdev); + uplink_priv = &rpriv->uplink_priv; + + mutex_init(&uplink_priv->unready_flows_lock); + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + if (err) + return err; + + mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier_dev_net(rpriv->netdev, + &uplink_priv->netdevice_nb, + &uplink_priv->netdevice_nn); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); + goto tc_esw_cleanup; + } + + return 0; + +tc_esw_cleanup: + mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); + return err; +} + static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_rep_uplink_priv *uplink_priv; - int tc, err; + int err; err = mlx5e_create_tises(priv); if (err) { @@ -1562,53 +1758,41 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) } if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { - uplink_priv = &rpriv->uplink_priv; - - INIT_LIST_HEAD(&uplink_priv->unready_flows); - - /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + err = mlx5e_init_uplink_rep_tx(rpriv); if (err) goto destroy_tises; - - mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); - - /* init indirect block notifications */ - INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); - uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; - err = register_netdevice_notifier(&uplink_priv->netdevice_nb); - if (err) { - mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); - goto tc_esw_cleanup; - } } return 0; -tc_esw_cleanup: - mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); destroy_tises: - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5e_destroy_tises(priv); return err; } +static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + + /* clean indirect TC block notifications */ + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &uplink_priv->netdevice_nb, + &uplink_priv->netdevice_nn); + mlx5e_rep_indr_clean_block_privs(rpriv); + + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); +} + static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - int tc; - - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { - /* clean indirect TC block notifications */ - unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); - mlx5e_rep_indr_clean_block_privs(rpriv); + mlx5e_destroy_tises(priv); - /* delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); - } + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) @@ -1677,6 +1861,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { + struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1685,7 +1870,44 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) #endif mlx5_notifier_unregister(mdev, &priv->events_nb); cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); - mlx5_lag_remove(mdev); + mlx5_lag_remove(mdev, netdev); +} + +static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); +static MLX5E_DEFINE_STATS_GRP(vport_rep, MLX5E_NDO_UPDATE_STATS); + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw_rep), + &MLX5E_STATS_GRP(vport_rep), +}; + +static unsigned int mlx5e_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_rep_stats_grps); +} + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +}; + +static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_ul_rep_stats_grps); } static const struct mlx5e_profile mlx5e_rep_profile = { @@ -1697,29 +1919,33 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_rep_enable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_rep_update_hw_counters, + .update_stats = mlx5e_update_ndo_stats, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + .stats_grps = mlx5e_rep_stats_grps, + .stats_grps_num = mlx5e_rep_stats_grps_num, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, - .init_rx = mlx5e_init_rep_rx, - .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_rx = mlx5e_init_ul_rep_rx, + .cleanup_rx = mlx5e_cleanup_ul_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_uplink_rep_update_hw_counters, + .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + .stats_grps = mlx5e_ul_rep_stats_grps, + .stats_grps_num = mlx5e_ul_rep_stats_grps_num, }; static bool @@ -1731,37 +1957,46 @@ is_devlink_port_supported(const struct mlx5_core_dev *dev, mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); } +static unsigned int +vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num) +{ + return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num; +} + static int register_devlink_port(struct mlx5_core_dev *dev, struct mlx5e_rep_priv *rpriv) { struct devlink *devlink = priv_to_devlink(dev); struct mlx5_eswitch_rep *rep = rpriv->rep; struct netdev_phys_item_id ppid = {}; - int ret; + unsigned int dl_port_index = 0; if (!is_devlink_port_supported(dev, rpriv)) return 0; - ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); - if (ret) - return ret; + mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { devlink_port_attrs_set(&rpriv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, PCI_FUNC(dev->pdev->devfn), false, 0, &ppid.id[0], ppid.id_len); - else if (rep->vport == MLX5_VPORT_PF) + dl_port_index = vport_to_devlink_port_index(dev, rep->vport); + } else if (rep->vport == MLX5_VPORT_PF) { devlink_port_attrs_pci_pf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, dev->pdev->devfn); - else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) + dl_port_index = rep->vport; + } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, + rpriv->rep->vport)) { devlink_port_attrs_pci_vf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, dev->pdev->devfn, rep->vport - 1); + dl_port_index = vport_to_devlink_port_index(dev, rep->vport); + } - return devlink_port_register(devlink, &rpriv->dl_port, rep->vport); + return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index); } static void unregister_devlink_port(struct mlx5_core_dev *dev, @@ -1798,6 +2033,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return -EINVAL; } + dev_net_set(netdev, mlx5_core_net(dev)); rpriv->netdev = netdev; rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c56e6ee4350c..3f756d51435f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -35,6 +35,7 @@ #include <net/ip_tunnels.h> #include <linux/rhashtable.h> +#include <linux/mutex.h> #include "eswitch.h" #include "en.h" #include "lib/port_tun.h" @@ -48,7 +49,7 @@ struct mlx5e_neigh_update_table { */ struct list_head neigh_list; /* protect lookup/remove operations */ - spinlock_t encap_lock; + struct mutex encap_lock; struct notifier_block netevent_nb; struct delayed_work neigh_stats_work; unsigned long min_interval; /* jiffies */ @@ -72,9 +73,12 @@ struct mlx5_rep_uplink_priv { */ struct list_head tc_indr_block_priv_list; struct notifier_block netdevice_nb; + struct netdev_net_notifier netdevice_nn; struct mlx5_tun_entropy tun_entropy; + /* protects unready_flows */ + struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; }; @@ -86,6 +90,7 @@ struct mlx5e_rep_priv { struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct rtnl_link_stats64 prev_vf_vport_stats; struct devlink_port dl_port; }; @@ -107,6 +112,7 @@ struct mlx5e_neigh { struct mlx5e_neigh_hash_entry { struct rhash_head rhash_node; struct mlx5e_neigh m_neigh; + struct mlx5e_priv *priv; /* Save the neigh hash entry in a list on the representor in * addition to the hash table. In order to iterate easily over the @@ -114,6 +120,8 @@ struct mlx5e_neigh_hash_entry { */ struct list_head neigh_list; + /* protects encap list */ + spinlock_t encap_list_lock; /* encap list sharing the same neigh */ struct list_head encap_list; @@ -134,6 +142,8 @@ struct mlx5e_neigh_hash_entry { * 'used' value and avoid neigh deleting by the kernel. */ unsigned long reported_lastuse; + + struct rcu_head rcu; }; enum { @@ -142,6 +152,8 @@ enum { }; struct mlx5e_encap_entry { + /* attached neigh hash entry */ + struct mlx5e_neigh_hash_entry *nhe; /* neigh hash entry list of encaps sharing the same neigh */ struct list_head encap_list; struct mlx5e_neigh m_neigh; @@ -150,7 +162,7 @@ struct mlx5e_encap_entry { */ struct hlist_node encap_hlist; struct list_head flows; - u32 encap_id; + struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ @@ -161,6 +173,10 @@ struct mlx5e_encap_entry { u8 flags; char *encap_header; int encap_size; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct rcu_head rcu; }; struct mlx5e_rep_sq { @@ -168,7 +184,6 @@ struct mlx5e_rep_sq { struct list_head list; }; -void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ac6e586d403d..1c3ab69cbd96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -48,6 +48,7 @@ #include "lib/clock.h" #include "en/xdp.h" #include "en/xsk/rx.h" +#include "en/health.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -612,11 +613,6 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { - netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); - break; - } do { struct mlx5e_sq_wqe_info *wi; u16 ci; @@ -626,6 +622,15 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.ico_wqe[ci]; + if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->channel->priv->wq, &sq->recover_work); + break; + } + if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; wi->umr.rq->mpwqe.umr_completed++; @@ -692,8 +697,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; - /* If XSK Fill Ring doesn't have enough frames, busy poll by - * rescheduling the NAPI poll. + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. */ if (unlikely(alloc_err == -ENOMEM && rq->umem)) return true; @@ -859,13 +867,24 @@ tail_padding_csum(struct sk_buff *skb, int offset, } static void -mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, - struct mlx5e_rq_stats *stats) +mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) { struct ipv6hdr *ip6; struct iphdr *ip4; int pkt_len; + /* Fixup vlan headers, if any */ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + /* Fixup tail padding, if any */ switch (proto) { case htons(ETH_P_IP): ip4 = (struct iphdr *)(skb->data + network_depth); @@ -931,16 +950,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; /* CQE csum covers all received bytes */ /* csum might need some fixups ...*/ - if (network_depth > ETH_HLEN) - /* CQE csum is calculated from the IP header and does - * not cover VLAN headers (if present). This will add - * the checksum manually. - */ - skb->csum = csum_partial(skb->data + ETH_HLEN, - network_depth - ETH_HLEN, - skb->csum); - - mlx5e_skb_padding_csum(skb, network_depth, proto, stats); + mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); return; } @@ -1065,11 +1075,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { - rq->stats->wqe_err++; - return NULL; - } - rcu_read_lock(); consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); @@ -1097,11 +1102,6 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { - rq->stats->wqe_err++; - return NULL; - } - /* XDP is not supported in this configuration, as incoming packets * might spread among multiple pages. */ @@ -1135,6 +1135,15 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, return skb; } +static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + + if (cqe_syndrome_needs_recover(err_cqe->syndrome) && + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) + queue_work(rq->channel->priv->wq, &rq->recover_work); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1147,6 +1156,12 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, @@ -1188,6 +1203,11 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto free_wqe; + } + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ @@ -1322,7 +1342,8 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); rq->stats->wqe_err++; goto mpwrq_cqe_out; } @@ -1367,8 +1388,14 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; - if (rq->cqd.left) + if (rq->page_pool) + page_pool_nid_changed(rq->page_pool, numa_mem_id()); + + if (rq->cqd.left) { work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); + if (rq->cqd.left || work_done >= budget) + goto out; + } cqe = mlx5_cqwq_get_cqe(cqwq); if (!cqe) { @@ -1498,6 +1525,11 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, @@ -1533,26 +1565,27 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, rq, cqe, wi, cqe_bcnt); - if (unlikely(!skb)) { - /* a DROP, save the page-reuse checks */ - mlx5e_free_rx_wqe(rq, wi, true); - goto wq_cyc_pop; - } + if (unlikely(!skb)) /* a DROP, save the page-reuse checks */ + goto wq_free_wqe; + skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); - if (unlikely(!skb)) { - mlx5e_free_rx_wqe(rq, wi, true); - goto wq_cyc_pop; - } + if (unlikely(!skb)) + goto wq_free_wqe; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); +wq_free_wqe: mlx5e_free_rx_wqe(rq, wi, true); -wq_cyc_pop: mlx5_wq_cyc_pop(wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 840ec945ccba..bbff8d8ded76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -35,6 +35,7 @@ #include <linux/udp.h> #include <net/udp.h> #include "en.h" +#include "en/port.h" enum { MLX5E_ST_LINK_STATE, @@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv) static int mlx5e_test_link_speed(struct mlx5e_priv *priv) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 eth_proto_oper; - int i; + u32 speed; if (!netif_carrier_ok(priv->netdev)) return 1; - if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) - return 1; - - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) { - if (eth_proto_oper & MLX5E_PROT_MASK(i)) - return 0; - } - return 1; + return mlx5e_port_linkspeed(priv->mdev, &speed); } struct mlx5ehdr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 57f9f346d213..30b216d9284c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -35,6 +35,58 @@ #include "en_accel/ipsec.h" #include "en_accel/tls.h" +static unsigned int stats_grps_num(struct mlx5e_priv *priv) +{ + return !priv->profile->stats_grps_num ? 0 : + priv->profile->stats_grps_num(priv); +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + unsigned int total = 0; + int i; + + for (i = 0; i < num_stats_grps; i++) + total += stats_grps[i]->get_num_stats(priv); + + return total; +} + +void mlx5e_stats_update(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats) + stats_grps[i]->update_stats(priv); +} + +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_stats(priv, data, idx); +} + +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i, idx = 0; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_strings(priv, data, idx); +} + +/* Concrete NIC Stats */ + static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, @@ -52,11 +104,12 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, @@ -74,6 +127,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, @@ -90,6 +144,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, @@ -107,6 +162,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, @@ -142,12 +198,12 @@ static const struct counter_desc sw_stats_desc[] = { #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) -static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw) { return NUM_SW_COUNTERS; } -static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw) { int i; @@ -156,7 +212,7 @@ static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) return idx; } -static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw) { int i; @@ -165,7 +221,7 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; int i; @@ -200,6 +256,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_xdp_tx_xmit += xdpsq_stats->xmit; s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; s->rx_xdp_tx_full += xdpsq_stats->full; s->rx_xdp_tx_err += xdpsq_stats->err; s->rx_xdp_tx_cqe += xdpsq_stats->cqes; @@ -217,6 +274,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_cache_waive += rq_stats->cache_waive; s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; + s->rx_recover += rq_stats->recover; s->ch_events += ch_stats->events; s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; @@ -227,6 +285,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_xmit += xdpsq_red_stats->xmit; s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; @@ -282,13 +341,17 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data; s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; - s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; - s->tx_tls_dump_packets += sq_stats->tls_dump_packets; #endif s->tx_cqes += sq_stats->cqes; + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); } } } @@ -304,7 +367,7 @@ static const struct counter_desc drop_rq_stats_desc[] = { #define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) #define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) -static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt) { int num_stats = 0; @@ -317,7 +380,7 @@ static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) return num_stats; } -static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt) { int i; @@ -332,7 +395,7 @@ static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) return idx; } -static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) { int i; @@ -345,7 +408,7 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt) { struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; @@ -363,48 +426,58 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) } #define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) -static const struct counter_desc vnic_env_stats_desc[] = { +static const struct counter_desc vnic_env_stats_steer_desc[] = { { "rx_steer_missed_packets", VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, }; -#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc) +static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { + { "dev_internal_queue_oob", + VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, +}; + +#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ + ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) +#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ + ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) -static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env) { - return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ? - NUM_VNIC_ENV_COUNTERS : 0; + return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); } -static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) { int i; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) - return idx; + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_steer_desc[i].format); - for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - vnic_env_stats_desc[i].format); + vnic_env_stats_dev_oob_desc[i].format); return idx; } -static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) { int i; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) - return idx; - - for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, - vnic_env_stats_desc, i); + vnic_env_stats_steer_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_dev_oob_desc, i); return idx; } -static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) { u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out; int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out); @@ -467,13 +540,12 @@ static const struct counter_desc vport_stats_desc[] = { #define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) -static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport) { return NUM_VPORT_COUNTERS; } -static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) { int i; @@ -482,8 +554,7 @@ static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) { int i; @@ -493,7 +564,7 @@ static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport) { int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u32 *out = (u32 *)priv->stats.vport.query_vport_out; @@ -532,13 +603,12 @@ static const struct counter_desc pport_802_3_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) -static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(802_3) { return NUM_PPORT_802_3_COUNTERS; } -static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3) { int i; @@ -547,8 +617,7 @@ static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3) { int i; @@ -561,7 +630,7 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, #define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) -void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -589,13 +658,12 @@ static const struct counter_desc pport_2863_stats_desc[] = { #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) -static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2863) { return NUM_PPORT_2863_COUNTERS; } -static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863) { int i; @@ -604,8 +672,7 @@ static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863) { int i; @@ -615,7 +682,7 @@ static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -650,13 +717,12 @@ static const struct counter_desc pport_2819_stats_desc[] = { #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) -static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2819) { return NUM_PPORT_2819_COUNTERS; } -static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819) { int i; @@ -665,8 +731,7 @@ static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819) { int i; @@ -676,7 +741,7 @@ static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -714,7 +779,7 @@ pport_phy_statistical_err_lanes_stats_desc[] = { #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) -static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) { struct mlx5_core_dev *mdev = priv->mdev; int num_stats; @@ -731,8 +796,7 @@ static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) return num_stats; } -static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) { struct mlx5_core_dev *mdev = priv->mdev; int i; @@ -754,7 +818,7 @@ static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) { struct mlx5_core_dev *mdev = priv->mdev; int i; @@ -780,7 +844,7 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -810,7 +874,7 @@ static const struct counter_desc pport_eth_ext_stats_desc[] = { #define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) -static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(eth_ext) { if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) return NUM_PPORT_ETH_EXT_COUNTERS; @@ -818,8 +882,7 @@ static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv) return 0; } -static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext) { int i; @@ -830,8 +893,7 @@ static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext) { int i; @@ -843,7 +905,7 @@ static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -884,7 +946,7 @@ static const struct counter_desc pcie_perf_stall_stats_desc[] = { #define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) #define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) -static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie) { int num_stats = 0; @@ -900,8 +962,7 @@ static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv) return num_stats; } -static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie) { int i; @@ -922,8 +983,7 @@ static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie) { int i; @@ -947,7 +1007,7 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie) { struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; struct mlx5_core_dev *mdev = priv->mdev; @@ -963,12 +1023,152 @@ static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); } +#define PPORT_PER_TC_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_prio_stats_desc[] = { + { "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) }, +}; + +#define NUM_PPORT_PER_TC_PRIO_COUNTERS ARRAY_SIZE(pport_per_tc_prio_stats_desc) + +#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = { + { "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) }, + { "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) }, +}; + +#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \ + ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc) + +static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_prio_stats_desc[i].format, prio); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_congest_prio_stats_desc[i].format, prio); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest) +{ + struct mlx5e_pport_stats *pport = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio], + pport_per_tc_prio_stats_desc, i); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio], + pport_per_tc_congest_prio_stats_desc, i); + } + + return idx; +} + +static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_congest_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_port_buff_congest) +{ + return mlx5e_grp_per_tc_prio_get_num_stats(priv) + + mlx5e_grp_per_tc_congest_prio_get_num_stats(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_port_buff_congest) +{ + mlx5e_grp_per_tc_prio_update_stats(priv); + mlx5e_grp_per_tc_congest_prio_update_stats(priv); +} + #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) }, { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, }; @@ -1131,29 +1331,27 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, return idx; } -static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio) { return mlx5e_grp_per_prio_traffic_get_num_stats() + mlx5e_grp_per_prio_pfc_get_num_stats(priv); } -static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio) { idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); return idx; } -static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio) { idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); return idx; } -static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -1188,13 +1386,12 @@ static const struct counter_desc mlx5e_pme_error_desc[] = { #define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) #define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) -static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pme) { return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; } -static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme) { int i; @@ -1207,8 +1404,7 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme) { struct mlx5_pme_stats pme_stats; int i; @@ -1226,45 +1422,46 @@ static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec) { return mlx5e_ipsec_get_count(priv); } -static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec) { return idx + mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); } -static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec) { return idx + mlx5e_ipsec_get_stats(priv, data + idx); } -static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec) { mlx5e_ipsec_update_stats(priv); } -static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) { return mlx5e_tls_get_count(priv); } -static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) { return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN); } -static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) { return idx + mlx5e_tls_get_stats(priv, data + idx); } +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } + static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, @@ -1294,6 +1491,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, }; static const struct counter_desc sq_stats_desc[] = { @@ -1312,10 +1510,12 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, #endif { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, @@ -1331,6 +1531,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = { { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1340,6 +1541,7 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1393,7 +1595,7 @@ static const struct counter_desc ch_stats_desc[] = { #define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) #define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) -static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels) { int max_nch = priv->max_nch; @@ -1406,8 +1608,7 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); } -static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) { bool is_xsk = priv->xsk.ever_used; int max_nch = priv->max_nch; @@ -1435,7 +1636,7 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, sq_stats_desc[j].format, - priv->channel_tc2txq[i][tc]); + i + tc * max_nch); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) @@ -1449,8 +1650,7 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; } -static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, - int idx) +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) { bool is_xsk = priv->xsk.ever_used; int max_nch = priv->max_nch; @@ -1498,98 +1698,46 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; } + +MLX5E_DEFINE_STATS_GRP(sw, 0); +MLX5E_DEFINE_STATS_GRP(qcnt, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(vnic_env, 0); +MLX5E_DEFINE_STATS_GRP(vport, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(802_3, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(2863, 0); +MLX5E_DEFINE_STATS_GRP(2819, 0); +MLX5E_DEFINE_STATS_GRP(phy, 0); +MLX5E_DEFINE_STATS_GRP(pcie, 0); +MLX5E_DEFINE_STATS_GRP(per_prio, 0); +MLX5E_DEFINE_STATS_GRP(pme, 0); +MLX5E_DEFINE_STATS_GRP(channels, 0); +MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); +MLX5E_DEFINE_STATS_GRP(eth_ext, 0); +static MLX5E_DEFINE_STATS_GRP(ipsec, 0); +static MLX5E_DEFINE_STATS_GRP(tls, 0); + /* The stats groups order is opposite to the update_stats() order calls */ -const struct mlx5e_stats_grp mlx5e_stats_grps[] = { - { - .get_num_stats = mlx5e_grp_sw_get_num_stats, - .fill_strings = mlx5e_grp_sw_fill_strings, - .fill_stats = mlx5e_grp_sw_fill_stats, - .update_stats = mlx5e_grp_sw_update_stats, - }, - { - .get_num_stats = mlx5e_grp_q_get_num_stats, - .fill_strings = mlx5e_grp_q_fill_strings, - .fill_stats = mlx5e_grp_q_fill_stats, - .update_stats_mask = MLX5E_NDO_UPDATE_STATS, - .update_stats = mlx5e_grp_q_update_stats, - }, - { - .get_num_stats = mlx5e_grp_vnic_env_get_num_stats, - .fill_strings = mlx5e_grp_vnic_env_fill_strings, - .fill_stats = mlx5e_grp_vnic_env_fill_stats, - .update_stats = mlx5e_grp_vnic_env_update_stats, - }, - { - .get_num_stats = mlx5e_grp_vport_get_num_stats, - .fill_strings = mlx5e_grp_vport_fill_strings, - .fill_stats = mlx5e_grp_vport_fill_stats, - .update_stats_mask = MLX5E_NDO_UPDATE_STATS, - .update_stats = mlx5e_grp_vport_update_stats, - }, - { - .get_num_stats = mlx5e_grp_802_3_get_num_stats, - .fill_strings = mlx5e_grp_802_3_fill_strings, - .fill_stats = mlx5e_grp_802_3_fill_stats, - .update_stats_mask = MLX5E_NDO_UPDATE_STATS, - .update_stats = mlx5e_grp_802_3_update_stats, - }, - { - .get_num_stats = mlx5e_grp_2863_get_num_stats, - .fill_strings = mlx5e_grp_2863_fill_strings, - .fill_stats = mlx5e_grp_2863_fill_stats, - .update_stats = mlx5e_grp_2863_update_stats, - }, - { - .get_num_stats = mlx5e_grp_2819_get_num_stats, - .fill_strings = mlx5e_grp_2819_fill_strings, - .fill_stats = mlx5e_grp_2819_fill_stats, - .update_stats = mlx5e_grp_2819_update_stats, - }, - { - .get_num_stats = mlx5e_grp_phy_get_num_stats, - .fill_strings = mlx5e_grp_phy_fill_strings, - .fill_stats = mlx5e_grp_phy_fill_stats, - .update_stats = mlx5e_grp_phy_update_stats, - }, - { - .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, - .fill_strings = mlx5e_grp_eth_ext_fill_strings, - .fill_stats = mlx5e_grp_eth_ext_fill_stats, - .update_stats = mlx5e_grp_eth_ext_update_stats, - }, - { - .get_num_stats = mlx5e_grp_pcie_get_num_stats, - .fill_strings = mlx5e_grp_pcie_fill_strings, - .fill_stats = mlx5e_grp_pcie_fill_stats, - .update_stats = mlx5e_grp_pcie_update_stats, - }, - { - .get_num_stats = mlx5e_grp_per_prio_get_num_stats, - .fill_strings = mlx5e_grp_per_prio_fill_strings, - .fill_stats = mlx5e_grp_per_prio_fill_stats, - .update_stats = mlx5e_grp_per_prio_update_stats, - }, - { - .get_num_stats = mlx5e_grp_pme_get_num_stats, - .fill_strings = mlx5e_grp_pme_fill_strings, - .fill_stats = mlx5e_grp_pme_fill_stats, - }, - { - .get_num_stats = mlx5e_grp_ipsec_get_num_stats, - .fill_strings = mlx5e_grp_ipsec_fill_strings, - .fill_stats = mlx5e_grp_ipsec_fill_stats, - .update_stats = mlx5e_grp_ipsec_update_stats, - }, - { - .get_num_stats = mlx5e_grp_tls_get_num_stats, - .fill_strings = mlx5e_grp_tls_fill_strings, - .fill_stats = mlx5e_grp_tls_fill_stats, - }, - { - .get_num_stats = mlx5e_grp_channels_get_num_stats, - .fill_strings = mlx5e_grp_channels_fill_strings, - .fill_stats = mlx5e_grp_channels_fill_stats, - } +mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(ipsec), + &MLX5E_STATS_GRP(tls), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), }; -const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_nic_stats_grps); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 76ac111e14d0..092b39ffa32a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -29,6 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #ifndef __MLX5_EN_STATS_H__ #define __MLX5_EN_STATS_H__ @@ -55,6 +56,56 @@ struct counter_desc { size_t offset; /* Byte offset */ }; +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + +struct mlx5e_priv; +struct mlx5e_stats_grp { + u16 update_stats_mask; + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); +}; + +typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; + +#define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name + +#define MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, num_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(grp) \ + void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx) + +#define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp + +#define MLX5E_DECLARE_STATS_GRP(grp) \ + const struct mlx5e_stats_grp MLX5E_STATS_GRP(grp) + +#define MLX5E_DEFINE_STATS_GRP(grp, mask) \ +MLX5E_DECLARE_STATS_GRP(grp) = { \ + .get_num_stats = MLX5E_STATS_GRP_OP(grp, num_stats), \ + .fill_stats = MLX5E_STATS_GRP_OP(grp, fill_stats), \ + .fill_strings = MLX5E_STATS_GRP_OP(grp, fill_strings), \ + .update_stats = MLX5E_STATS_GRP_OP(grp, update_stats), \ + .update_stats_mask = mask, \ +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv); +void mlx5e_stats_update(struct mlx5e_priv *priv); +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); + +/* Concrete NIC Stats */ + struct mlx5e_sw_stats { u64 rx_packets; u64 rx_bytes; @@ -81,6 +132,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_tx_xmit; u64 rx_xdp_tx_mpwqe; u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; u64 rx_xdp_tx_full; u64 rx_xdp_tx_err; u64 rx_xdp_tx_cqe; @@ -97,6 +149,7 @@ struct mlx5e_sw_stats { u64 tx_xdp_xmit; u64 tx_xdp_mpwqe; u64 tx_xdp_inlnw; + u64 tx_xdp_nops; u64 tx_xdp_full; u64 tx_xdp_err; u64 tx_xdp_cqes; @@ -114,6 +167,7 @@ struct mlx5e_sw_stats { u64 rx_cache_waive; u64 rx_congst_umr; u64 rx_arfs_err; + u64 rx_recover; u64 ch_events; u64 ch_poll; u64 ch_arm; @@ -126,11 +180,12 @@ struct mlx5e_sw_stats { u64 tx_tls_encrypted_bytes; u64 tx_tls_ctx; u64 tx_tls_ooo; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; u64 tx_tls_resync_bytes; + u64 tx_tls_skip_no_sync_data; u64 tx_tls_drop_no_sync_data; u64 tx_tls_drop_bypass_req; - u64 tx_tls_dump_packets; - u64 tx_tls_dump_bytes; #endif u64 rx_xsk_packets; @@ -204,6 +259,8 @@ struct mlx5e_pport_stats { __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; }; #define PCIE_PERF_GET(pcie_stats, c) \ @@ -247,6 +304,7 @@ struct mlx5e_rq_stats { u64 cache_waive; u64 congst_umr; u64 arfs_err; + u64 recover; }; struct mlx5e_sq_stats { @@ -267,11 +325,12 @@ struct mlx5e_sq_stats { u64 tls_encrypted_bytes; u64 tls_ctx; u64 tls_ooo; + u64 tls_dump_packets; + u64 tls_dump_bytes; u64 tls_resync_bytes; + u64 tls_skip_no_sync_data; u64 tls_drop_no_sync_data; u64 tls_drop_bypass_req; - u64 tls_dump_packets; - u64 tls_dump_bytes; #endif /* less likely accessed in data path */ u64 csum_none; @@ -288,6 +347,7 @@ struct mlx5e_xdpsq_stats { u64 xmit; u64 mpwqe; u64 inlnw; + u64 nops; u64 full; u64 err; /* dirtied @completion */ @@ -313,22 +373,22 @@ struct mlx5e_stats { struct mlx5e_pcie_stats pcie; }; -enum { - MLX5E_NDO_UPDATE_STATS = BIT(0x1), -}; - -struct mlx5e_priv; -struct mlx5e_stats_grp { - u16 update_stats_mask; - int (*get_num_stats)(struct mlx5e_priv *priv); - int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); - int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); - void (*update_stats)(struct mlx5e_priv *priv); -}; - -extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; -extern const int mlx5e_num_stats_grps; +extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); -void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv); +extern MLX5E_DECLARE_STATS_GRP(sw); +extern MLX5E_DECLARE_STATS_GRP(qcnt); +extern MLX5E_DECLARE_STATS_GRP(vnic_env); +extern MLX5E_DECLARE_STATS_GRP(vport); +extern MLX5E_DECLARE_STATS_GRP(802_3); +extern MLX5E_DECLARE_STATS_GRP(2863); +extern MLX5E_DECLARE_STATS_GRP(2819); +extern MLX5E_DECLARE_STATS_GRP(phy); +extern MLX5E_DECLARE_STATS_GRP(eth_ext); +extern MLX5E_DECLARE_STATS_GRP(pcie); +extern MLX5E_DECLARE_STATS_GRP(per_prio); +extern MLX5E_DECLARE_STATS_GRP(pme); +extern MLX5E_DECLARE_STATS_GRP(channels); +extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 00b2d4a86159..74091f72c9a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -38,6 +38,8 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> +#include <linux/refcount.h> +#include <linux/completion.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> @@ -49,35 +51,39 @@ #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" #include "lib/devcom.h" #include "lib/geneve.h" +#include "diag/en_tc_tracepoint.h" struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; struct mlx5_fc *counter; }; -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) enum { - MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, - MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, - MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -100,6 +106,7 @@ enum { * container_of(helper item, containing struct type, helper field[index]) */ struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ struct list_head list; int index; }; @@ -108,7 +115,7 @@ struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u16 flags; + unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported @@ -116,10 +123,17 @@ struct mlx5e_tc_flow { */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + int tmp_efi_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -157,12 +171,20 @@ struct mlx5e_hairpin_entry { /* a node of a hash table which keeps all the hairpin entries */ struct hlist_node hairpin_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same hairpin */ struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list; u16 peer_vhca_id; u8 prio; struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; }; struct mod_hdr_key { @@ -174,16 +196,98 @@ struct mlx5e_mod_hdr_entry { /* a node of a hash table which keeps all the mod_hdr entries */ struct hlist_node mod_hdr_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same mod_hdr entry */ struct list_head flows; struct mod_hdr_key key; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; }; #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +{ + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; +} + +static void mlx5e_flow_put(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } +} + +static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, ESWITCH); +} + +static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, FT); +} + +static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, OFFLOADED); +} + static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) { return jhash(key->actions, @@ -199,15 +303,62 @@ static inline int cmp_mod_hdr_info(struct mod_hdr_key *a, return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); } +static struct mod_hdr_tbl * +get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : + &priv->fs.tc.mod_hdr; +} + +static struct mlx5e_mod_hdr_entry * +mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) +{ + struct mlx5e_mod_hdr_entry *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; +} + +static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, + struct mlx5e_mod_hdr_entry *mh, + int namespace) +{ + struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); + + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + WARN_ON(!list_empty(&mh->flows)); + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr); + + kfree(mh); +} + +static int get_flow_name_space(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; +} static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int num_actions, actions_size, namespace, err; struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_tbl *tbl; struct mod_hdr_key key; - bool found = false; u32 hash_key; num_actions = parse_attr->num_mod_hdr_actions; @@ -218,80 +369,82 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, hash_key = hash_mod_hdr_info(&key); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - namespace = MLX5_FLOW_NAMESPACE_FDB; - hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } else { - namespace = MLX5_FLOW_NAMESPACE_KERNEL; - hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } + namespace = get_flow_name_space(flow); + tbl = get_mod_hdr_table(priv, namespace); - if (found) + mutex_lock(&tbl->lock); + mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready); + + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } goto attach_flow; + } mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); - if (!mh) + if (!mh) { + mutex_unlock(&tbl->lock); return -ENOMEM; + } mh->key.actions = (void *)mh + sizeof(*mh); memcpy(mh->key.actions, key.actions, actions_size); mh->key.num_actions = num_actions; + spin_lock_init(&mh->flows_lock); INIT_LIST_HEAD(&mh->flows); - - err = mlx5_modify_header_alloc(priv->mdev, namespace, - mh->key.num_actions, - mh->key.actions, - &mh->mod_hdr_id); - if (err) - goto out_err; - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); - else - hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock); + + mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace, + mh->key.num_actions, + mh->key.actions); + if (IS_ERR(mh->modify_hdr)) { + err = PTR_ERR(mh->modify_hdr); + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready); attach_flow: + flow->mh = mh; + spin_lock(&mh->flows_lock); list_add(&flow->mod_hdr, &mh->flows); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; + spin_unlock(&mh->flows_lock); + if (mlx5e_is_eswitch_flow(flow)) + flow->esw_attr->modify_hdr = mh->modify_hdr; else - flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; + flow->nic_attr->modify_hdr = mh->modify_hdr; return 0; -out_err: - kfree(mh); +alloc_header_err: + complete_all(&mh->res_ready); +attach_header_err: + mlx5e_mod_hdr_put(priv, mh, namespace); return err; } static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->mod_hdr.next; + /* flow wasn't fully initialized */ + if (!flow->mh) + return; + spin_lock(&flow->mh->flows_lock); list_del(&flow->mod_hdr); + spin_unlock(&flow->mh->flows_lock); - if (list_empty(next)) { - struct mlx5e_mod_hdr_entry *mh; - - mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); - - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); - hash_del(&mh->mod_hdr_hlist); - kfree(mh); - } + mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); + flow->mh = NULL; } static @@ -440,7 +593,7 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; - ft_attr->max_fte = MLX5E_NUM_TT; + ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; ft_attr->prio = MLX5E_TC_PRIO; } @@ -555,13 +708,35 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, hairpin_hlist, hash_key) { - if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); return hpe; + } } return NULL; } +static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) +{ + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); +} + #define UNKNOWN_MATCH_PRIO 8 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, @@ -627,17 +802,37 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, extack); if (err) return err; + + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); - if (hpe) + if (hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); - if (!hpe) + if (!hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); return -ENOMEM; + } + spin_lock_init(&hpe->flows_lock); INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); hpe->peer_vhca_id = peer_id; hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@ -659,9 +854,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, params.num_channels = link_speed64; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); if (IS_ERR(hp)) { err = PTR_ERR(hp); - goto create_hairpin_err; + goto out_err; } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", @@ -669,46 +866,39 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); - hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, - hash_hairpin_info(peer_id, match_prio)); - attach_flow: if (hpe->hp->num_channels > 1) { - flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow_flag_set(flow, HAIRPIN_RSS); flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { flow->nic_attr->hairpin_tirn = hpe->hp->tirn; } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock); return 0; -create_hairpin_err: - kfree(hpe); +out_err: + mlx5e_hairpin_put(priv, hpe); return err; } static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->hairpin.next; + /* flow wasn't fully initialized */ + if (!flow->hpe) + return; + spin_lock(&flow->hpe->flows_lock); list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock); - /* no more hairpin flows for us, release the hairpin pair */ - if (list_empty(next)) { - struct mlx5e_hairpin_entry *hpe; - - hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); - - netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - dev_name(hpe->hp->pair->peer_mdev->device)); - - mlx5e_hairpin_destroy(hpe->hp); - hash_del(&hpe->hairpin_hlist); - kfree(hpe); - } + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; } static int @@ -723,22 +913,20 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .reformat_id = 0, .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - bool table_created = false; int err, dest_ix = 0; flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = attr->flow_tag; - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) { - goto err_add_hairpin_flow; - } - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + if (err) + return err; + + if (flow_flag_test(flow, HAIRPIN_RSS)) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dest_ix].ft = attr->hairpin_ft; } else { @@ -754,10 +942,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_fc_create; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; @@ -766,14 +953,16 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; kfree(parse_attr->mod_hdr_actions); if (err) - goto err_create_mod_hdr_id; + return err; } + mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { - int tc_grp_size, tc_tbl_size; + struct mlx5_flow_table_attr ft_attr = {}; + int tc_grp_size, tc_tbl_size, tc_num_grps; u32 max_flow_counter; max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | @@ -783,23 +972,23 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS; + ft_attr.prio = MLX5E_TC_PRIO; + ft_attr.max_fte = tc_tbl_size; + ft_attr.level = MLX5E_TC_FT_LEVEL; + ft_attr.autogroup.max_num_groups = tc_num_grps; priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, - MLX5E_TC_PRIO, - tc_tbl_size, - MLX5E_TC_TABLE_NUM_GROUPS, - MLX5E_TC_FT_LEVEL, 0); + &ft_attr); if (IS_ERR(priv->fs.tc.t)) { + mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - err = PTR_ERR(priv->fs.tc.t); - goto err_create_ft; + return PTR_ERR(priv->fs.tc.t); } - - table_created = true; } if (attr->match_level != MLX5_MATCH_NONE) @@ -807,29 +996,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); + mutex_unlock(&priv->fs.tc.t_lock); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } - - return 0; - -err_add_rule: - if (table_created) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } -err_create_ft: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_create_mod_hdr_id: - mlx5_fc_destroy(dev, counter); -err_fc_create: - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) - mlx5e_hairpin_flow_del(priv, flow); -err_add_hairpin_flow: - return err; + return PTR_ERR_OR_ZERO(flow->rule[0]); } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@ -839,18 +1008,21 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5_fc *counter = NULL; counter = attr->counter; - mlx5_del_flow_rules(flow->rule[0]); + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { + mutex_lock(&priv->fs.tc.t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + mutex_unlock(&priv->fs.tc.t_lock); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); } @@ -885,7 +1057,6 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, } } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; return rule; } @@ -894,7 +1065,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_esw_flow_attr *attr) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + flow_flag_clear(flow, OFFLOADED); if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -913,11 +1084,11 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) - flow->flags |= MLX5E_TC_FLOW_SLOW; + flow_flag_set(flow, SLOW); return rule; } @@ -930,9 +1101,28 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; + slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_SLOW; + flow_flag_clear(flow, SLOW); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) +{ + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_del(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); } static void add_unready_flow(struct mlx5e_tc_flow *flow) @@ -945,14 +1135,24 @@ static void add_unready_flow(struct mlx5e_tc_flow *flow) rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &rpriv->uplink_priv; - flow->flags |= MLX5E_TC_FLOW_NOT_READY; - list_add_tail(&flow->unready, &uplink_priv->unready_flows); + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); } static void remove_unready_flow(struct mlx5e_tc_flow *flow) { - list_del(&flow->unready); - flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); } static int @@ -961,33 +1161,37 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; - u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; bool encap_valid = true; + u32 max_prio, max_chain; int err = 0; int out_index; - if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { + if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) { NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); return -EOPNOTSUPP; } - if (attr->chain > max_chain) { + /* We check chain range only for tc flows. + * For ft flows, we checked attr->chain was originally 0 and set it to + * FDB_FT_CHAIN which is outside tc range. + * See mlx5e_rep_setup_ft_cb(). + */ + max_chain = mlx5_esw_chains_get_chain_range(esw); + if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } + max_prio = mlx5_esw_chains_get_prio_range(esw); if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { @@ -1002,7 +1206,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - goto err_attach_encap; + return err; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -1012,21 +1216,19 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - goto err_add_vlan; + return err; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) - goto err_mod_hdr; + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_create_counter; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); attr->counter = counter; } @@ -1044,27 +1246,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); } - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); + else + flow_flag_set(flow, OFFLOADED); return 0; - -err_add_rule: - mlx5_fc_destroy(attr->counter_dev, counter); -err_create_counter: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_mod_hdr: - mlx5_eswitch_del_vlan_action(esw, attr); -err_add_vlan: - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) - mlx5e_detach_encap(priv, flow, out_index); -err_attach_encap: -err_max_prio_chain: - return err; } static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@ -1088,14 +1275,14 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; int out_index; - if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); return; } - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - if (flow->flags & MLX5E_TC_FLOW_SLOW) + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@ -1107,8 +1294,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } kvfree(attr->parse_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -1119,39 +1308,39 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr, *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", - err); + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); return; } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry(flow, flow_list, tmp_list) { bool all_flow_encaps_valid = true; int i; - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (!mlx5e_is_offloaded_flow(flow)) + continue; esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec; - esw_attr->dests[efi->index].encap_id = e->encap_id; - esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Flow can be associated with multiple encap entries. * Before offloading the flow verify that all of them have * a valid neighbour. @@ -1177,30 +1366,32 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, } mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); } } void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(efi, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); /* mark the flow's encap dest as non-valid */ - flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1210,28 +1401,102 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, } mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); } /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); } static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) return flow->esw_attr->counter; else return flow->nic_attr->counter; } +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + wait_for_completion(&flow->init_done); + + flow->tmp_efi_index = efi->index; + list_add(&flow->tmp_list, flow_list); + } +} + +/* Iterate over tmp_list of flows attached to flow_list head. */ +void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) + mlx5e_flow_put(priv, flow); +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + e = next; + goto retry; + } + + return next; +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; struct mlx5e_tc_flow *flow; - struct mlx5e_encap_entry *e; struct mlx5_fc *counter; struct neigh_table *tbl; bool neigh_used = false; @@ -1242,19 +1507,30 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) tbl = &arp_tbl; #if IS_ENABLED(CONFIG_IPV6) else if (m_neigh->family == AF_INET6) - tbl = &nd_tbl; + tbl = ipv6_stub->nd_tbl; #endif else return; - list_for_each_entry(e, &nhe->encap_list, encap_list) { - struct encap_flow_item *efi; - if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) - continue; - list_for_each_entry(efi, &e->flows, list) { + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { @@ -1263,10 +1539,18 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } } } - if (neigh_used) + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_encap_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); break; + } } + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + if (neigh_used) { nhe->reported_lastuse = jiffies; @@ -1282,43 +1566,76 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { - struct list_head *next = flow->encaps[out_index].list.next; - - list_del(&flow->encaps[out_index].list); - if (list_empty(next)) { - struct mlx5e_encap_entry *e; + WARN_ON(!list_empty(&e->flows)); - e = list_entry(next, struct mlx5e_encap_entry, flows); + if (e->compl_result > 0) { mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } + + kfree(e->tun_info); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + /* flow wasn't fully initialized */ + if (!e) + return; - hash_del_rcu(&e->encap_hlist); - kfree(e->encap_header); - kfree(e); + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; } + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); } static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->flags & MLX5E_TC_FLOW_DUP)) + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) return; mutex_lock(&esw->offloads.peer_mutex); list_del(&flow->peer); mutex_unlock(&esw->offloads.peer_mutex); - flow->flags &= ~MLX5E_TC_FLOW_DUP; + flow_flag_clear(flow, DUP); + + if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kfree(flow->peer_flow); + } - mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); - kvfree(flow->peer_flow); flow->peer_flow = NULL; } @@ -1339,7 +1656,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (mlx5e_is_eswitch_flow(flow)) { mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@ -1369,46 +1686,63 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { - struct flow_match_ipv4_addrs match; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { - struct flow_match_ipv6_addrs match; + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { @@ -1476,6 +1810,40 @@ static void *get_match_headers_value(u32 flags, outer_headers); } +static int mlx5e_flower_parse_meta(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); + return -EINVAL; + } + + ingress_dev = __dev_get_by_index(dev_net(filter_dev), + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't find the ingress port to match on"); + return -EINVAL; + } + + if (ingress_dev != filter_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't match on the ingress filter port"); + return -EINVAL; + } + + return 0; +} + static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, @@ -1496,6 +1864,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, u16 addr_type = 0; u8 ip_proto = 0; u8 *match_level; + int err; match_level = outer_match_level; @@ -1539,6 +1908,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, spec); } + err = mlx5e_flower_parse_meta(filter_dev, f); + if (err) + return err; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -1596,7 +1969,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } } else if (*match_level != MLX5_MATCH_NONE) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); *match_level = MLX5_MATCH_L2; } @@ -1840,6 +2216,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; int err; inner_match_level = MLX5_MATCH_NONE; @@ -1850,7 +2227,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && @@ -1864,7 +2242,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (is_eswitch_flow) { flow->esw_attr->inner_match_level = inner_match_level; flow->esw_attr->outer_match_level = outer_match_level; } else { @@ -1921,13 +2299,14 @@ out_err: struct mlx5_fields { u8 field; - u8 size; + u8 field_bsize; + u32 field_mask; u32 offset; u32 match_offset; }; -#define OFFLOAD(fw_field, size, field, off, match_field) \ - {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \ +#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ offsetof(struct pedit_headers, field) + (off), \ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} @@ -1945,18 +2324,18 @@ struct mlx5_fields { }) static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, - void *matchmaskp, int size) + void *matchmaskp, u8 bsize) { bool same = false; - switch (size) { - case sizeof(u8): + switch (bsize) { + case 8: same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); break; - case sizeof(u16): + case 16: same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); break; - case sizeof(u32): + case 32: same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); break; } @@ -1965,41 +2344,43 @@ static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, } static struct mlx5_fields fields[] = { - OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16), - OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0, dmac_15_0), - OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16), - OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0, smac_15_0), - OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0, ethertype), - OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0, first_vid), - - OFFLOAD(IP_TTL, 1, ip4.ttl, 0, ttl_hoplimit), - OFFLOAD(SIPV4, 4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), - OFFLOAD(DIPV4, 4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - - OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0, + OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), + OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), + OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), + OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), + OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), + OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), + + OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), + OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), + OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), + OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + + OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), - OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0, + OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), - OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0, + OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), - OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0, + OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), - OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0, + OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), - OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0, + OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), - OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0, + OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), - OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0, + OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), - OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit), + OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), - OFFLOAD(TCP_SPORT, 2, tcp.source, 0, tcp_sport), - OFFLOAD(TCP_DPORT, 2, tcp.dest, 0, tcp_dport), - OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags), + OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), + OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), + /* in linux iphdr tcp_flags is 8 bits long */ + OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), - OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport), - OFFLOAD(UDP_DPORT, 2, udp.dest, 0, udp_dport), + OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), + OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at @@ -2012,19 +2393,17 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - void *headers_c = get_match_headers_criteria(*action_flags, - &parse_attr->spec); - void *headers_v = get_match_headers_value(*action_flags, - &parse_attr->spec); int i, action_size, nactions, max_actions, first, last, next_z; - void *s_masks_p, *a_masks_p, *vals_p; + void *headers_c, *headers_v, *action, *vals_p; + u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5_fields *f; - u8 cmd, field_bsize; - u32 s_mask, a_mask; unsigned long mask; __be32 mask_be32; __be16 mask_be16; - void *action; + u8 cmd; + + headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); + headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); set_masks = &hdrs[0].masks; add_masks = &hdrs[1].masks; @@ -2049,8 +2428,8 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; - memcpy(&s_mask, s_masks_p, f->size); - memcpy(&a_mask, a_masks_p, f->size); + s_mask = *s_masks_p & f->field_mask; + a_mask = *a_masks_p & f->field_mask; if (!s_mask && !a_mask) /* nothing to offload here */ continue; @@ -2079,38 +2458,34 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, vals_p = (void *)set_vals + f->offset; /* don't rewrite if we have a match on the same value */ if (cmp_val_mask(vals_p, s_masks_p, match_val, - match_mask, f->size)) + match_mask, f->field_bsize)) skip = true; /* clear to denote we consumed this field */ - memset(s_masks_p, 0, f->size); + *s_masks_p &= ~f->field_mask; } else { - u32 zero = 0; - cmd = MLX5_ACTION_TYPE_ADD; mask = a_mask; vals_p = (void *)add_vals + f->offset; /* add 0 is no change */ - if (!memcmp(vals_p, &zero, f->size)) + if ((*(u32 *)vals_p & f->field_mask) == 0) skip = true; /* clear to denote we consumed this field */ - memset(a_masks_p, 0, f->size); + *a_masks_p &= ~f->field_mask; } if (skip) continue; - field_bsize = f->size * BITS_PER_BYTE; - - if (field_bsize == 32) { + if (f->field_bsize == 32) { mask_be32 = *(__be32 *)&mask; mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (field_bsize == 16) { + } else if (f->field_bsize == 16) { mask_be16 = *(__be16 *)&mask; mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); } - first = find_first_bit(&mask, field_bsize); - next_z = find_next_zero_bit(&mask, field_bsize, first); - last = find_last_bit(&mask, field_bsize); + first = find_first_bit(&mask, f->field_bsize); + next_z = find_next_zero_bit(&mask, f->field_bsize, first); + last = find_last_bit(&mask, f->field_bsize); if (first < next_z && next_z < last) { NL_SET_ERR_MSG_MOD(extack, "rewrite of few sub-fields isn't supported"); @@ -2123,16 +2498,22 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, MLX5_SET(set_action_in, action, field, f->field); if (cmd == MLX5_ACTION_TYPE_SET) { - MLX5_SET(set_action_in, action, offset, first); + int start; + + /* if field is bit sized it can start not from first bit */ + start = find_first_bit((unsigned long *)&f->field_mask, + f->field_bsize); + + MLX5_SET(set_action_in, action, offset, first - start); /* length is num of bits to be written, zero means length of 32 */ MLX5_SET(set_action_in, action, length, (last - first + 1)); } - if (field_bsize == 32) + if (f->field_bsize == 32) MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); - else if (field_bsize == 16) + else if (f->field_bsize == 16) MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); - else if (field_bsize == 8) + else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); action += action_size; @@ -2385,14 +2766,15 @@ static bool actions_match_supported(struct mlx5e_priv *priv, { u32 actions; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) actions = flow->esw_attr->action; else actions = flow->nic_attr->action; - if (flow->flags & MLX5E_TC_FLOW_EGRESS && + if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -2504,6 +2886,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { + case FLOW_ACTION_ACCEPT: + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -2542,7 +2928,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + flow_flag_set(flow, HAIRPIN); action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { @@ -2629,6 +3015,57 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + +static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + +static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < out_index; i++) { + if (flow->encaps[i].e != e) + continue; + NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); + netdev_err(priv->netdev, "can't duplicate encap action\n"); + return true; + } + + return false; +} + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@ -2641,11 +3078,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; - struct encap_key key, e_key; + struct encap_key key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; - bool found = false; int err = 0; parse_attr = attr->parse_attr; @@ -2660,56 +3096,91 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, hash_key = hash_encap_info(&key); - hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, &key)) { - found = true; - break; - } - } + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); /* must verify if encap is valid or not */ - if (found) + if (e) { + /* Check that entry was not already attached to this flow */ + if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { + err = -EOPNOTSUPP; + goto out_err; + } + + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result < 0) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); + tun_info = dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) - goto out_err; + goto out_err_init; INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + mutex_unlock(&esw->offloads.encap_tbl_lock); if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - if (err) + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; goto out_err; - - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + } + e->compl_result = 1; attach_flow: + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; *encap_valid = true; } else { *encap_valid = false; } + mutex_unlock(&esw->offloads.encap_tbl_lock); return err; out_err: + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); + return err; + +out_err_init: + mutex_unlock(&esw->offloads.encap_tbl_lock); + kfree(tun_info); kfree(e); return err; } @@ -2797,7 +3268,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, u32 *action) { - int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev); + int nest_level = attr->parse_attr->filter_dev->lower_level; struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, }; @@ -2822,6 +3293,26 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, same_hw_devs(priv, netdev_priv(out_dev)); } +static bool is_duplicated_output_device(struct net_device *dev, + struct net_device *out_dev, + int *ifindexes, int if_count, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < if_count; i++) { + if (ifindexes[i] == out_dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, + "can't duplicate output to same device"); + netdev_err(dev, "can't duplicate output to same device: %s\n", + out_dev->name); + return true; + } + } + + return false; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -2833,10 +3324,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; const struct ip_tunnel_info *info = NULL; + int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; + bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + int err, i, if_count = 0; bool encap = false; u32 action = 0; - int err, i; if (!flow_action_has_entries(flow_action)) return -EINVAL; @@ -2877,6 +3370,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EINVAL; } + if (ft_flow && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return -EOPNOTSUPP; + } + if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, "can't support more output ports, can't offload forwarding"); @@ -2887,15 +3388,42 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (netdev_port_same_parent_id(priv->netdev, out_dev)) { + if (encap) { + parse_attr->mirred_ifindex[attr->out_count] = + out_dev->ifindex; + parse_attr->tun_info[attr->out_count] = dup_tun_info(info); + if (!parse_attr->tun_info[attr->out_count]) + return -ENOMEM; + encap = false; + attr->dests[attr->out_count].flags |= + MLX5_ESW_DEST_ENCAP; + attr->out_count++; + /* attr->dests[].rep is resolved when we + * handle encap + */ + } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + struct net_device *uplink_upper; + if (is_duplicated_output_device(priv->netdev, + out_dev, + ifindexes, + if_count, + extack)) + return -EOPNOTSUPP; + + ifindexes[if_count] = out_dev->ifindex; + if_count++; + + rcu_read_lock(); + uplink_upper = + netdev_master_upper_dev_get_rcu(uplink_dev); if (uplink_upper && netif_is_lag_master(uplink_upper) && uplink_upper == out_dev) out_dev = uplink_dev; + rcu_read_unlock(); if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@ -2925,17 +3453,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->dests[attr->out_count].rep = rpriv->rep; attr->dests[attr->out_count].mdev = out_priv->mdev; attr->out_count++; - } else if (encap) { - parse_attr->mirred_ifindex[attr->out_count] = - out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = info; - encap = false; - attr->dests[attr->out_count].flags |= - MLX5_ESW_DEST_ENCAP; - attr->out_count++; - /* attr->dests[].rep is resolved when we - * handle encap - */ } else if (parse_attr->filter_dev != priv->netdev) { /* All mlx5 devices are called to configure * high level device filters. Therefore, the @@ -2993,8 +3510,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_GOTO: { u32 dest_chain = act->chain_index; - u32 max_chain = mlx5_eswitch_get_chain_range(esw); + u32 max_chain = mlx5_esw_chains_get_chain_range(esw); + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } if (dest_chain <= attr->chain) { NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); return -EOPNOTSUPP; @@ -3056,6 +3577,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } + if (!(attr->action & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action"); + return -EOPNOTSUPP; + } + if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); @@ -3066,19 +3593,21 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return 0; } -static void get_flags(int flags, u16 *flow_flags) +static void get_flags(int flags, unsigned long *flow_flags) { - u16 __flow_flags = 0; + unsigned long __flow_flags = 0; - if (flags & MLX5E_TC_INGRESS) - __flow_flags |= MLX5E_TC_FLOW_INGRESS; - if (flags & MLX5E_TC_EGRESS) - __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); - if (flags & MLX5E_TC_ESW_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_ESWITCH; - if (flags & MLX5E_TC_NIC_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_NIC; + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); *flow_flags = __flow_flags; } @@ -3090,12 +3619,13 @@ static const struct rhashtable_params tc_ht_params = { .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (flags & MLX5E_TC_ESW_OFFLOAD) { + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); return &uplink_rpriv->uplink_priv.tc_ht; } else /* NIC offload */ @@ -3106,7 +3636,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && - flow->flags & MLX5E_TC_FLOW_INGRESS; + flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, @@ -3125,13 +3655,13 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct flow_cls_offload *f, u16 flow_flags, + struct flow_cls_offload *f, unsigned long flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; - int err; + int out_index, err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@ -3143,6 +3673,12 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->cookie = f->cookie; flow->flags = flow_flags; flow->priv = priv; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->mod_hdr); + INIT_LIST_HEAD(&flow->hairpin); + refcount_set(&flow->refcnt, 1); + init_completion(&flow->init_done); *__flow = flow; *__parse_attr = parse_attr; @@ -3182,7 +3718,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) @@ -3193,7 +3729,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int attr_size, err; - flow_flags |= MLX5E_TC_FLOW_ESWITCH; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); attr_size = sizeof(struct mlx5_esw_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@ -3215,6 +3751,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto err_free; err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + complete_all(&flow->init_done); if (err) { if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) goto err_free; @@ -3225,15 +3762,14 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return flow; err_free: - kfree(flow); - kvfree(parse_attr); + mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); } static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - u16 flow_flags) + unsigned long flow_flags) { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; @@ -3271,7 +3807,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, } flow->peer_flow = peer_flow; - flow->flags |= MLX5E_TC_FLOW_DUP; + flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); mutex_unlock(&esw->offloads.peer_mutex); @@ -3284,7 +3820,7 @@ out: static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@ -3318,7 +3854,7 @@ out: static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@ -3332,7 +3868,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) return -EOPNOTSUPP; - flow_flags |= MLX5E_TC_FLOW_NIC; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@ -3353,14 +3889,14 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flag_set(flow, OFFLOADED); kvfree(parse_attr); *__flow = flow; return 0; err_free: - kfree(flow); + mlx5e_flow_put(priv, flow); kvfree(parse_attr); out: return err; @@ -3369,12 +3905,12 @@ out: static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - int flags, + unsigned long flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u16 flow_flags; + unsigned long flow_flags; int err; get_flags(flags, &flow_flags); @@ -3393,14 +3929,16 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + rcu_read_unlock(); if (flow) { NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@ -3411,55 +3949,68 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, goto out; } + trace_mlx5e_configure_flower(f); err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free; return 0; err_free: - mlx5e_tc_del_flow(priv, flow); - kfree(flow); + mlx5e_flow_put(priv, flow); out: return err; } -#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) -#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) - static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) { - if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) - return true; + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); - return false; + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; } int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; + int err; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock(); - mlx5e_tc_del_flow(priv, flow); - - kfree(flow); + trace_mlx5e_delete_flower(f); + mlx5e_flow_put(priv, flow); return 0; + +errout: + rcu_read_unlock(); + return err; } int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3469,15 +4020,24 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, u64 lastuse = 0; u64 packets = 0; u64 bytes = 0; + int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); if (!counter) - return 0; + goto errout; mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); } @@ -3489,8 +4049,8 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, if (!peer_esw) goto out; - if ((flow->flags & MLX5E_TC_FLOW_DUP) && - (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { u64 bytes2; u64 packets2; u64 lastuse2; @@ -3509,15 +4069,122 @@ no_peer_counter: mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, lastuse); + trace_mlx5e_stats_flower(f); +errout: + mlx5e_flow_put(priv, flow); + return err; +} + +static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + vport_num = rpriv->rep->vport; + if (vport_num >= MLX5_VPORT_ECPF) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress rate limit is supported only for Eswitch ports connected to VFs"); + return -EOPNOTSUPP; + } + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } return 0; } +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + if (ma->common.prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); +} + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *peer_mdev = peer_priv->mdev; - struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt; @@ -3526,9 +4193,18 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { - if (hpe->peer_vhca_id == peer_vhca_id) + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) hpe->hp->pair->peer_gone = true; + + mlx5e_hairpin_put(priv, hpe); } } @@ -3564,7 +4240,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) struct mlx5e_tc_table *tc = &priv->fs.tc; int err; - hash_init(tc->mod_hdr_tbl); + mutex_init(&tc->t_lock); + mutex_init(&tc->mod_hdr.lock); + hash_init(tc->mod_hdr.hlist); + mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl); err = rhashtable_init(&tc->ht, &tc_ht_params); @@ -3572,7 +4251,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) return err; tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; - if (register_netdevice_notifier(&tc->netdevice_nb)) { + err = register_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + if (err) { tc->netdevice_nb.notifier_call = NULL; mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); } @@ -3594,7 +4276,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) struct mlx5e_tc_table *tc = &priv->fs.tc; if (tc->netdevice_nb.notifier_call) - unregister_netdevice_notifier(&tc->netdevice_nb); + unregister_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + + mutex_destroy(&tc->mod_hdr.lock); + mutex_destroy(&tc->hairpin_tbl_lock); rhashtable_destroy(&tc->ht); @@ -3602,6 +4289,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5_destroy_flow_table(tc->t); tc->t = NULL; } + mutex_destroy(&tc->t_lock); } int mlx5e_tc_esw_init(struct rhashtable *tc_ht) @@ -3614,7 +4302,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3636,10 +4324,10 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) reoffload_flows_work); struct mlx5e_tc_flow *flow, *tmp; - rtnl_lock(); + mutex_lock(&rpriv->unready_flows_lock); list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) - remove_unready_flow(flow); + unready_flow_del(flow); } - rtnl_unlock(); + mutex_unlock(&rpriv->unready_flows_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 3ab39275ca7d..262cdb7b69b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -40,13 +40,16 @@ #ifdef CONFIG_MLX5_ESWITCH enum { - MLX5E_TC_INGRESS = BIT(0), - MLX5E_TC_EGRESS = BIT(1), - MLX5E_TC_NIC_OFFLOAD = BIT(2), - MLX5E_TC_ESW_OFFLOAD = BIT(3), - MLX5E_TC_LAST_EXPORTED_BIT = 3, + MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, }; +#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) + int mlx5e_tc_nic_init(struct mlx5e_priv *priv); void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); @@ -54,23 +57,37 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct list_head *flow_list); void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +bool mlx5e_encap_take(struct mlx5e_encap_entry *e); +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); + +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); +void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); @@ -80,7 +97,11 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; } +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, + unsigned long flags) +{ + return 0; +} #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 600e92cb629a..ee60383adc5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -93,7 +93,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (txq_ix >= num_channels) txq_ix = priv->txq2sq[txq_ix]->ch_ix; - return priv->channel_tc2txq[txq_ix][up]; + return priv->channel_tc2realtxq[txq_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) @@ -210,7 +210,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, @@ -292,8 +292,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { - u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? - MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb); opcode = MLX5_OPCODE_SEND; mss = 0; @@ -404,7 +403,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, struct mlx5_err_cqe *err_cqe) { - u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); + struct mlx5_cqwq *wq = &sq->cq.wq; + u32 ci; + + ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); netdev_err(sq->channel->netdev, "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", @@ -449,28 +451,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) i = 0; do { + struct mlx5e_tx_wqe_info *wi; u16 wqe_counter; bool last_wqe; + u16 ci; mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { - if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, - &sq->state)) { - mlx5e_dump_error_cqe(sq, - (struct mlx5_err_cqe *)cqe); - queue_work(cq->channel->priv->wq, - &sq->recover_work); - } - stats->cqe_err++; - } - do { - struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; - u16 ci; int j; last_wqe = (sqcc == wqe_counter); @@ -480,14 +471,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) skb = wi->skb; if (unlikely(!skb)) { -#ifdef CONFIG_MLX5_EN_TLS - if (wi->resync_dump_frag) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, dma_fifo_cc++); - - mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma); - } -#endif + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); sqcc += wi->num_wqebbs; continue; } @@ -515,6 +499,18 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) napi_consume_skb(skb, napi_budget); } while (!last_wqe); + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + queue_work(cq->channel->priv->wq, + &sq->recover_work); + } + stats->cqe_err++; + } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); stats->cqes += i; @@ -543,29 +539,38 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; + u32 dma_fifo_cc; + u16 sqcc; u16 ci; int i; - while (sq->cc != sq->pc) { - ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + sqcc = sq->cc; + dma_fifo_cc = sq->dma_fifo_cc; + + while (sqcc != sq->pc) { + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.wqe_info[ci]; skb = wi->skb; - if (!skb) { /* nop */ - sq->cc++; + if (!skb) { + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); + sqcc += wi->num_wqebbs; continue; } for (i = 0; i < wi->num_dma; i++) { struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, sq->dma_fifo_cc++); + mlx5e_dma_get(sq, dma_fifo_cc++); mlx5e_tx_dma_unmap(sq->pdev, dma); } dev_kfree_skb_any(skb); - sq->cc += wi->num_wqebbs; + sqcc += wi->num_wqebbs; } + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; } #ifdef CONFIG_MLX5_CORE_IPOIB @@ -608,9 +613,11 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb); + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 49b06b256c92..257a7c9f7a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/rx.h" #include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) @@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool busy_xsk = false, xsk_rx_alloc_err; + + /* Handle the race between the application querying need_wakeup and the + * driver setting it: + * 1. Update need_wakeup both before and after the TX. If it goes to + * "yes", it can only happen with the first update. + * 2. If the application queried need_wakeup before we set it, the + * packets will be transmitted anyway, even w/o a wakeup. + * 3. Give a chance to clear need_wakeup after new packets were queued + * for TX. + */ + mlx5e_xsk_update_tx_wakeup(xsksq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + mlx5e_xsk_update_tx_wakeup(xsksq); + + xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + + return busy_xsk; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); - busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - busy_xsk |= xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); } busy |= busy_xsk; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 41f25ea2e8d9..cccea3a8eddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -156,7 +156,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb, cq->comp(cq, eqe); mlx5_cq_put(cq); } else { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + dev_dbg_ratelimited(eq->dev->device, + "Completion event for bogus CQ 0x%x\n", cqn); } ++eq->cons_index; @@ -215,11 +216,7 @@ static int mlx5_eq_async_int(struct notifier_block *nb, */ dma_rmb(); - if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) - atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); - else - mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; @@ -328,10 +325,13 @@ err_buf: /** * mlx5_eq_enable - Enable EQ for receiving EQEs - * @dev - Device which owns the eq - * @eq - EQ to enable - * @nb - notifier call block - * mlx5_eq_enable - must be called after EQ is created in device. + * @dev : Device which owns the eq + * @eq : EQ to enable + * @nb : Notifier call block + * + * Must be called after EQ is created in device. + * + * @return: 0 if no error */ int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) @@ -348,11 +348,12 @@ int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, EXPORT_SYMBOL(mlx5_eq_enable); /** - * mlx5_eq_disable - Enable EQ for receiving EQEs - * @dev - Device which owns the eq - * @eq - EQ to disable - * @nb - notifier call block - * mlx5_eq_disable - must be called before EQ is destroyed. + * mlx5_eq_disable - Disable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to disable + * @nb : Notifier call block + * + * Must be called before EQ is destroyed. */ void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) @@ -415,7 +416,7 @@ void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) int mlx5_eq_table_init(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table; - int i, err; + int i; eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); if (!eq_table) @@ -423,9 +424,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) dev->priv.eq_table = eq_table; - err = mlx5_eq_debugfs_init(dev); - if (err) - goto kvfree_eq_table; + mlx5_eq_debugfs_init(dev); mutex_init(&eq_table->lock); for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) @@ -433,11 +432,6 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) eq_table->irq_table = dev->priv.irq_table; return 0; - -kvfree_eq_table: - kvfree(eq_table); - dev->priv.eq_table = NULL; - return err; } void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) @@ -570,6 +564,39 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) gather_user_async_events(dev, mask); } +static int +setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, + struct mlx5_eq_param *param, const char *name) +{ + int err; + + eq->irq_nb.notifier_call = mlx5_eq_async_int; + + err = create_async_eq(dev, &eq->core, param); + if (err) { + mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); + return err; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); + destroy_async_eq(dev, &eq->core); + } + return err; +} + +static void cleanup_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq_async *eq, const char *name) +{ + int err; + + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + err = destroy_async_eq(dev, &eq->core); + if (err) + mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", + name, err); +} + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -579,77 +606,45 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); - table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; - - param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; - err = create_async_eq(dev, &table->cmd_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - goto err0; - } - err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) goto err1; - } + mlx5_cmd_use_events(dev); - table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, }; gather_async_events_mask(dev, param.mask); - err = create_async_eq(dev, &table->async_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); + if (err) goto err2; - } - err = mlx5_eq_enable(dev, &table->async_eq.core, - &table->async_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); - goto err3; - } - table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, + .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; - param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; - err = create_async_eq(dev, &table->pages_eq.core, ¶m); - if (err) { - mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err4; - } - err = mlx5_eq_enable(dev, &table->pages_eq.core, - &table->pages_eq.irq_nb); - if (err) { - mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); - goto err5; - } + err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); + if (err) + goto err3; - return err; + return 0; -err5: - destroy_async_eq(dev, &table->pages_eq.core); -err4: - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err3: - destroy_async_eq(dev, &table->async_eq.core); + cleanup_async_eq(dev, &table->async_eq, "async"); err2: mlx5_cmd_use_polling(dev); - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: - destroy_async_eq(dev, &table->cmd_eq.core); -err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } @@ -657,28 +652,11 @@ err0: static void destroy_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int err; - - mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); - err = destroy_async_eq(dev, &table->pages_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", - err); - - mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); - err = destroy_async_eq(dev, &table->async_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", - err); + cleanup_async_eq(dev, &table->pages_eq, "pages"); + cleanup_async_eq(dev, &table->async_eq, "async"); mlx5_cmd_use_polling(dev); - - mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); - err = destroy_async_eq(dev, &table->cmd_eq.core); - if (err) - mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", - err); - + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } @@ -945,9 +923,6 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_register); @@ -956,9 +931,6 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f3891fde2eb..5acf60b1bbfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -58,20 +58,9 @@ struct vport_addr { bool mc_promisc; }; -enum { - UC_ADDR_CHANGE = BIT(0), - MC_ADDR_CHANGE = BIT(1), - PROMISC_CHANGE = BIT(3), -}; - static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); -/* Vport context events */ -#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ - MC_ADDR_CHANGE | \ - PROMISC_CHANGE) - struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { @@ -108,13 +97,13 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); - if (events_mask & UC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_uc_address_change, 1); - if (events_mask & MC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_mc_address_change, 1); - if (events_mask & PROMISC_CHANGE) + if (events_mask & MLX5_VPORT_PROMISC_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_promisc_change, 1); @@ -122,42 +111,32 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, } /* E-Switch vport context HW commands */ -static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, - void *in, int inlen) +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, + void *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0}; MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, - void *in, int inlen) -{ - return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); -} - -static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, - void *out, int outlen) +int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, + void *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; MLX5_SET(query_esw_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, - void *out, int outlen) -{ - return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); -} - static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -190,7 +169,8 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, field_select.vport_cvlan_insert, 1); - return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); + return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, + in, sizeof(in)); } /* E-Switch FDB */ @@ -297,6 +277,7 @@ enum { static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; @@ -309,8 +290,10 @@ static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) } /* num FTE 2, num FG 2 */ - fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO, - 2, 2, 0, 0); + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); @@ -470,6 +453,37 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) esw_destroy_legacy_vepa_table(esw); } +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +static int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; +} + +static void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -483,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Skip mlx5_mpfs_add_mac for eswitch_managers, * it is already done by its netdev in mlx5e_execute_l2_action */ - if (esw->manager_vport == vport) + if (mlx5_esw_is_manager_vport(esw, vport)) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); @@ -512,10 +526,10 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for eswitch managerss, + /* Skip mlx5_mpfs_del_mac for eswitch managers, * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vaddr->mpfs || esw->manager_vport == vport) + if (!vaddr->mpfs || mlx5_esw_is_manager_vport(esw, vport)) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -901,21 +915,21 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); - if (vport->enabled_events & UC_ADDR_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) { esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); } - if (vport->enabled_events & MC_ADDR_CHANGE) + if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE) esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); - if (vport->enabled_events & PROMISC_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) { esw_update_vport_rx_mode(esw, vport); if (!IS_ERR_OR_NULL(vport->allmulti_rule)) esw_update_vport_mc_promisc(esw, vport); } - if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) + if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE)) esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); @@ -1022,14 +1036,15 @@ out: void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { mlx5_del_flow_rules(vport->egress.allowed_vlan); + vport->egress.allowed_vlan = NULL; + } - if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) - mlx5_del_flow_rules(vport->egress.drop_rule); - - vport->egress.allowed_vlan = NULL; - vport->egress.drop_rule = NULL; + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->egress.legacy.drop_rule); + vport->egress.legacy.drop_rule = NULL; + } } void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, @@ -1049,57 +1064,21 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, vport->egress.acl = NULL; } -int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int +esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *acl; struct mlx5_flow_group *g; void *match_criteria; u32 *flow_group_in; - /* The ingress acl table contains 4 groups - * (2 active rules at the same time - - * 1 allow rule from one of the first 3 groups. - * 1 drop rule from the last group): - * 1)Allow untagged traffic with smac=original mac. - * 2)Allow untagged traffic. - * 3)Allow traffic with smac=original mac. - * 4)Drop all other traffic. - */ - int table_size = 4; - int err = 0; - - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - if (!IS_ERR_OR_NULL(vport->ingress.acl)) - return 0; - - esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", - vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); - - root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - mlx5_eswitch_vport_num_to_index(esw, vport->vport)); - if (!root_ns) { - esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); - return -EOPNOTSUPP; - } + int err; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; - acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR(acl)) { - err = PTR_ERR(acl); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", - vport->vport, err); - goto out; - } - vport->ingress.acl = acl; - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1109,14 +1088,14 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", vport->vport, err); - goto out; + goto spoof_err; } - vport->ingress.allow_untagged_spoofchk_grp = g; + vport->ingress.legacy.allow_untagged_spoofchk_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1124,14 +1103,14 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", vport->vport, err); - goto out; + goto untagged_err; } - vport->ingress.allow_untagged_only_grp = g; + vport->ingress.legacy.allow_untagged_only_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1140,108 +1119,178 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", vport->vport, err); - goto out; + goto allow_spoof_err; } - vport->ingress.allow_spoofchk_only_grp = g; + vport->ingress.legacy.allow_spoofchk_only_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", vport->vport, err); - goto out; + goto drop_err; } - vport->ingress.drop_grp = g; + vport->ingress.legacy.drop_grp = g; + kvfree(flow_group_in); + return 0; -out: - if (err) { - if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_spoofchk_only_grp); - if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_untagged_only_grp); - if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_untagged_spoofchk_grp); - if (!IS_ERR_OR_NULL(vport->ingress.acl)) - mlx5_destroy_flow_table(vport->ingress.acl); +drop_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; } - +allow_spoof_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } +untagged_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } +spoof_err: kvfree(flow_group_in); return err; } +int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int table_size) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + int vport_index; + int err; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport); + root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport_index); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", + vport->vport); + return -EOPNOTSUPP; + } + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n", + vport->vport, err); + return err; + } + vport->ingress.acl = acl; + return 0; +} + +void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport) +{ + if (!vport->ingress.acl) + return; + + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; +} + void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) - mlx5_del_flow_rules(vport->ingress.drop_rule); + if (vport->ingress.legacy.drop_rule) { + mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); + vport->ingress.legacy.drop_rule = NULL; + } - if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) + if (vport->ingress.allow_rule) { mlx5_del_flow_rules(vport->ingress.allow_rule); - - vport->ingress.drop_rule = NULL; - vport->ingress.allow_rule = NULL; - - esw_vport_del_ingress_acl_modify_metadata(esw, vport); + vport->ingress.allow_rule = NULL; + } } -void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - if (IS_ERR_OR_NULL(vport->ingress.acl)) + if (!vport->ingress.acl) return; esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); esw_vport_cleanup_ingress_rules(esw, vport); - mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp); - mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp); - mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp); - mlx5_destroy_flow_group(vport->ingress.drop_grp); - mlx5_destroy_flow_table(vport->ingress.acl); - vport->ingress.acl = NULL; - vport->ingress.drop_grp = NULL; - vport->ingress.allow_spoofchk_only_grp = NULL; - vport->ingress.allow_untagged_only_grp = NULL; - vport->ingress.allow_untagged_spoofchk_grp = NULL; + if (vport->ingress.legacy.allow_spoofchk_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } + if (vport->ingress.legacy.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); + vport->ingress.legacy.drop_grp = NULL; + } + esw_vport_destroy_ingress_acl_table(vport); } static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_fc *counter = vport->ingress.drop_counter; + struct mlx5_fc *counter = vport->ingress.legacy.drop_counter; struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; + struct mlx5_flow_spec *spec = NULL; int dest_num = 0; int err = 0; u8 *smac_v; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); return 0; } - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; + if (!vport->ingress.acl) { + err = esw_vport_create_ingress_acl_table(esw, vport, table_size); + if (err) { + esw_warn(esw->dev, + "vport[%d] enable ingress acl err (%d)\n", + err, vport->vport); + return err; + } + + err = esw_vport_create_legacy_ingress_acl_groups(esw, vport); + if (err) + goto out; } esw_debug(esw->dev, @@ -1291,21 +1340,59 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, dst = &drop_ctr_dst; dest_num++; } - vport->ingress.drop_rule = + vport->ingress.legacy.drop_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, dst, dest_num); - if (IS_ERR(vport->ingress.drop_rule)) { - err = PTR_ERR(vport->ingress.drop_rule); + if (IS_ERR(vport->ingress.legacy.drop_rule)) { + err = PTR_ERR(vport->ingress.legacy.drop_rule); esw_warn(esw->dev, "vport[%d] configure ingress drop rule, err(%d)\n", vport->vport, err); - vport->ingress.drop_rule = NULL; + vport->ingress.legacy.drop_rule = NULL; goto out; } + kvfree(spec); + return 0; out: - if (err) - esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); + kvfree(spec); + return err; +} + +int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u16 vlan_id, u32 flow_action) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (vport->egress.allowed_vlan) + return -EEXIST; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = flow_action; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + } + kvfree(spec); return err; } @@ -1313,7 +1400,7 @@ out: static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_fc *counter = vport->egress.drop_counter; + struct mlx5_fc *counter = vport->egress.legacy.drop_counter; struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; @@ -1340,34 +1427,17 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); + err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan, + MLX5_FLOW_CONTEXT_ACTION_ALLOW); + if (err) + return err; - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; - vport->egress.allowed_vlan = - mlx5_add_flow_rules(vport->egress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->egress.allowed_vlan)) { - err = PTR_ERR(vport->egress.allowed_vlan); - esw_warn(esw->dev, - "vport[%d] configure egress allowed vlan rule failed, err(%d)\n", - vport->vport, err); - vport->egress.allowed_vlan = NULL; + /* Drop others rule (star rule) */ + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) goto out; - } - /* Drop others rule (star rule) */ - memset(spec, 0, sizeof(*spec)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ @@ -1378,33 +1448,64 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, dst = &drop_ctr_dst; dest_num++; } - vport->egress.drop_rule = + vport->egress.legacy.drop_rule = mlx5_add_flow_rules(vport->egress.acl, spec, &flow_act, dst, dest_num); - if (IS_ERR(vport->egress.drop_rule)) { - err = PTR_ERR(vport->egress.drop_rule); + if (IS_ERR(vport->egress.legacy.drop_rule)) { + err = PTR_ERR(vport->egress.legacy.drop_rule); esw_warn(esw->dev, "vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); - vport->egress.drop_rule = NULL; + vport->egress.legacy.drop_rule = NULL; } out: kvfree(spec); return err; } +static bool element_type_supported(struct mlx5_eswitch *esw, int type) +{ + const struct mlx5_core_dev *dev = esw->dev; + + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + /* Vport QoS management */ -static int esw_create_tsar(struct mlx5_eswitch *esw) +static void esw_create_tsar(struct mlx5_eswitch *esw) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return 0; + return; + + if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return; if (esw->qos.enabled) - return -EEXIST; + return; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1412,11 +1513,10 @@ static int esw_create_tsar(struct mlx5_eswitch *esw) &esw->qos.root_tsar_id); if (err) { esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); - return err; + return; } esw->qos.enabled = true; - return 0; } static void esw_destroy_tsar(struct mlx5_eswitch *esw) @@ -1537,6 +1637,22 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, return 0; } +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); +} + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; @@ -1555,7 +1671,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, u16 vport_num = vport->vport; int flags; - if (esw->manager_vport == vport_num) + if (mlx5_esw_is_manager_vport(esw, vport_num)) return; mlx5_modify_vport_admin_state(esw->dev, @@ -1575,79 +1691,125 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, SET_VLAN_STRIP | SET_VLAN_INSERT : 0; modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, flags); - - /* Only legacy mode needs ACLs */ - if (esw->mode == MLX5_ESWITCH_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } } -static void esw_vport_create_drop_counters(struct mlx5_vport *vport) +static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = vport->dev; + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; - if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) { - vport->ingress.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->ingress.drop_counter)) { - esw_warn(dev, + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(vport->ingress.legacy.drop_counter)) { + esw_warn(esw->dev, "vport[%d] configure ingress drop rule counter failed\n", vport->vport); - vport->ingress.drop_counter = NULL; + vport->ingress.legacy.drop_counter = NULL; } } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) { - vport->egress.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->egress.drop_counter)) { - esw_warn(dev, + ret = esw_vport_ingress_config(esw, vport); + if (ret) + goto ingress_err; + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(vport->egress.legacy.drop_counter)) { + esw_warn(esw->dev, "vport[%d] configure egress drop rule counter failed\n", vport->vport); - vport->egress.drop_counter = NULL; + vport->egress.legacy.drop_counter = NULL; } } + + ret = esw_vport_egress_config(esw, vport); + if (ret) + goto egress_err; + + return 0; + +egress_err: + esw_vport_disable_legacy_ingress_acl(esw, vport); + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + +ingress_err: + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; + return ret; } -static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) +static int esw_vport_setup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = vport->dev; + if (esw->mode == MLX5_ESWITCH_LEGACY) + return esw_vport_create_legacy_acl_tables(esw, vport); + else + return esw_vport_create_offloads_acl_tables(esw, vport); +} - if (vport->ingress.drop_counter) - mlx5_fc_destroy(dev, vport->ingress.drop_counter); - if (vport->egress.drop_counter) - mlx5_fc_destroy(dev, vport->egress.drop_counter); +static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_vport_disable_egress_acl(esw, vport); + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + + esw_vport_disable_legacy_ingress_acl(esw, vport); + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; +} + +static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_vport_destroy_legacy_acl_tables(esw, vport); + else + esw_vport_destroy_offloads_acl_tables(esw, vport); } -static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - int enable_events) +static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) { u16 vport_num = vport->vport; + int ret; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY) - esw_vport_create_drop_counters(vport); - /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); + ret = esw_vport_setup_acl(esw, vport); + if (ret) + goto done; + /* Attach vport to the eswitch rate limiter */ if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share)) esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ - vport->enabled_events = enable_events; + vport->enabled_events = enabled_events; vport->enabled = true; /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well * in smartNIC as it's a vport group manager. */ - if (esw->manager_vport == vport_num || + if (mlx5_esw_is_manager_vport(esw, vport_num) || (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; @@ -1655,7 +1817,9 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +done: mutex_unlock(&esw->state_lock); + return ret; } static void esw_disable_vport(struct mlx5_eswitch *esw, @@ -1663,18 +1827,16 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, { u16 vport_num = vport->vport; + mutex_lock(&esw->state_lock); if (!vport->enabled) - return; + goto done; esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ vport->enabled = false; - /* Wait for current already scheduled events to complete */ - flush_workqueue(esw->work_queue); /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); - mutex_lock(&esw->state_lock); /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1682,17 +1844,18 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport); - if (esw->manager_vport != vport_num && - esw->mode == MLX5_ESWITCH_LEGACY) { + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + esw->mode == MLX5_ESWITCH_LEGACY) mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); - esw_vport_destroy_drop_counters(vport); - } + + esw_vport_cleanup_acl(esw, vport); esw->enabled_vports--; + +done: mutex_unlock(&esw->state_lock); } @@ -1706,12 +1869,8 @@ static int eswitch_vport_event(struct notifier_block *nb, vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return NOTIFY_OK; - - if (vport->enabled) + if (!IS_ERR(vport)) queue_work(esw->work_queue, &vport->vport_change_handler); - return NOTIFY_OK; } @@ -1767,14 +1926,85 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) flush_workqueue(esw->work_queue); } +static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs + * whichever are present on the eswitch. + */ +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) { struct mlx5_vport *vport; + int num_vfs; + int ret; + int i; + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + return ret; + + /* Enable ECPF vport */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + goto ecpf_err; + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + goto vf_err; + } + return 0; + +vf_err: + num_vfs = i - 1; + mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs) + esw_disable_vport(esw, vport); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_disable_vport(esw, vport); + } + +ecpf_err: + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_disable_vport(esw, vport); + return ret; +} + +/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs + * whichever are previously enabled on the eswitch. + */ +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + mlx5_esw_for_all_vports_reverse(esw, i, vport) + esw_disable_vport(esw, vport); +} + +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +{ int err; - int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { @@ -1788,44 +2018,23 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "engress ACL is not supported by FW\n"); + esw_create_tsar(esw); + esw->mode = mode; mlx5_lag_update(esw->dev); if (mode == MLX5_ESWITCH_LEGACY) { - err = esw_create_legacy_table(esw); - if (err) - goto abort; + err = esw_legacy_enable(esw); } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw); + err = esw_offloads_enable(esw); } if (err) goto abort; - err = esw_create_tsar(esw); - if (err) - esw_warn(esw->dev, "Failed to create eswitch TSAR"); - - enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : - UC_ADDR_CHANGE; - - /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_enable_vport(esw, vport, enabled_events); - - /* Enable ECPF vports */ - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_enable_vport(esw, vport, enabled_events); - } - - /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - esw_enable_vport(esw, vport, enabled_events); - mlx5_eswitch_event_handlers_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -1845,12 +2054,9 @@ abort: return err; } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) { - struct esw_mc_addr *mc_promisc; - struct mlx5_vport *vport; int old_mode; - int i; if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; @@ -1859,21 +2065,14 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); - mc_promisc = &esw->mc_promisc; mlx5_eswitch_event_handlers_unregister(esw); - mlx5_esw_for_all_vports(esw, i, vport) - esw_disable_vport(esw, vport); - - if (mc_promisc && mc_promisc->uplink_rule) - mlx5_del_flow_rules(mc_promisc->uplink_rule); - - esw_destroy_tsar(esw); - if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_destroy_legacy_table(esw); + esw_legacy_disable(esw); else if (esw->mode == MLX5_ESWITCH_OFFLOADS) - esw_offloads_cleanup(esw); + esw_offloads_disable(esw); + + esw_destroy_tsar(esw); old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; @@ -1884,6 +2083,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) @@ -1931,8 +2132,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto abort; + mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); - hash_init(esw->offloads.mod_hdr_tbl); + mutex_init(&esw->offloads.mod_hdr.lock); + hash_init(esw->offloads.mod_hdr.hlist); + atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); mlx5_esw_for_all_vports(esw, i, vport) { @@ -1968,6 +2172,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->offloads.mod_hdr.lock); + mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); kfree(esw); } @@ -2044,7 +2250,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, unlock: mutex_unlock(&esw->state_lock); - return 0; + return err; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, @@ -2085,23 +2291,19 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan > 4095 || qos > 7) return -EINVAL; - mutex_lock(&esw->state_lock); - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - goto unlock; + return err; evport->info.vlan = vlan; evport->info.qos = qos; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto unlock; + return err; err = esw_vport_egress_config(esw, evport); } -unlock: - mutex_unlock(&esw->state_lock); return err; } @@ -2109,11 +2311,16 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; + int err; if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; - return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_lock(&esw->state_lock); + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_unlock(&esw->state_lock); + + return err; } int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, @@ -2400,12 +2607,12 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) return 0; - if (vport->egress.drop_counter) - mlx5_fc_query(dev, vport->egress.drop_counter, + if (vport->egress.legacy.drop_counter) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); - if (vport->ingress.drop_counter) - mlx5_fc_query(dev, vport->ingress.drop_counter, + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, &stats->tx_dropped, &bytes); if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 04685dbb280c..4472710ccc9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> +#include <linux/atomic.h> #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> @@ -42,6 +43,16 @@ #include <linux/mlx5/fs.h> #include "lib/mpfs.h" +#define FDB_TC_MAX_CHAIN 3 +#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1) + +/* The index of the last real chain (FT) + 1 as chain zero is valid as well */ +#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1) + +#define FDB_TC_MAX_PRIO 16 +#define FDB_TC_LEVELS_PER_PRIO 2 + #ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -58,21 +69,29 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) -#define FDB_MAX_CHAIN 3 -#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) -#define FDB_MAX_PRIO 16 - struct vport_ingress { struct mlx5_flow_table *acl; - struct mlx5_flow_group *allow_untagged_spoofchk_grp; - struct mlx5_flow_group *allow_spoofchk_only_grp; - struct mlx5_flow_group *allow_untagged_only_grp; - struct mlx5_flow_group *drop_grp; - int modify_metadata_id; - struct mlx5_flow_handle *modify_metadata_rule; - struct mlx5_flow_handle *allow_rule; - struct mlx5_flow_handle *drop_rule; - struct mlx5_fc *drop_counter; + struct mlx5_flow_handle *allow_rule; + struct { + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + /* Optional group to add an FTE to do internal priority + * tagging on ingress packets. + */ + struct mlx5_flow_group *metadata_prio_tag_grp; + /* Group to add default match-all FTE entry to tag ingress + * packet with metadata. + */ + struct mlx5_flow_group *metadata_allmatch_grp; + struct mlx5_modify_hdr *modify_metadata; + struct mlx5_flow_handle *modify_metadata_rule; + } offloads; }; struct vport_egress { @@ -80,8 +99,10 @@ struct vport_egress { struct mlx5_flow_group *allowed_vlans_grp; struct mlx5_flow_group *drop_grp; struct mlx5_flow_handle *allowed_vlan; - struct mlx5_flow_handle *drop_rule; - struct mlx5_fc *drop_counter; + struct { + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; }; struct mlx5_vport_drop_stats { @@ -101,6 +122,13 @@ struct mlx5_vport_info { bool trusted; }; +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -122,16 +150,15 @@ struct mlx5_vport { } qos; bool enabled; - u16 enabled_events; + enum mlx5_eswitch_vport_event enabled_events; }; enum offloads_fdb_flags { ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), }; -extern const unsigned int ESW_POOLS[4]; +struct mlx5_esw_chains_priv; -#define PRIO_LEVELS 2 struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -145,6 +172,7 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { + struct mlx5_flow_namespace *ns; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *peer_miss_grp; @@ -154,14 +182,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; - struct { - struct mlx5_flow_table *fdb; - u32 num_rules; - } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS]; - /* Protects fdb_prio table */ - struct mutex fdb_prio_lock; - - int fdb_left[ARRAY_SIZE(ESW_POOLS)]; + struct mlx5_esw_chains_priv *esw_chains_priv; } offloads; }; u32 flags; @@ -173,13 +194,14 @@ struct mlx5_esw_offload { struct mlx5_eswitch_rep *vport_reps; struct list_head peer_flows; struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; - u64 num_flows; + atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; }; @@ -208,7 +230,10 @@ struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; struct mlx5_eswitch_fdb fdb_table; + /* legacy data structures */ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ struct workqueue_struct *work_queue; struct mlx5_vport *vports; u32 flags; @@ -218,7 +243,6 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -233,30 +257,30 @@ struct mlx5_eswitch { struct mlx5_esw_functions esw_funcs; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw); +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); +int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + int table_size); +void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport); void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); -void mlx5_eswitch_disable(struct mlx5_eswitch *esw); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -278,9 +302,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); -int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in, int inlen); -int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, +int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *out, int outlen); struct mlx5_flow_spec; @@ -322,15 +348,6 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr); -bool -mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw); - -u16 -mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw); - -u32 -mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); - struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest); @@ -355,6 +372,11 @@ enum { MLX5_ESW_DEST_ENCAP_VALID = BIT(1), }; +enum { + MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; @@ -368,21 +390,21 @@ struct mlx5_esw_flow_attr { u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; u8 total_vlan; - bool vlan_handled; struct { u32 flags; struct mlx5_eswitch_rep *rep; + struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; - u32 encap_id; struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u8 inner_match_level; u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; u32 dest_chain; + u32 flags; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -407,6 +429,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); +int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u16 vlan_id, u32 flow_action); + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -445,6 +471,12 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline bool +mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return esw->manager_vport == vport_num; +} + static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev) ? @@ -513,6 +545,11 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); (vport) = &(esw)->vports[i], \ (i) < (esw)->total_vports; (i)++) +#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \ + for ((i) = (esw)->total_vports - 1; \ + (vport) = &(esw)->vports[i], \ + (i) >= MLX5_VPORT_PF; (i)--) + #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ for ((i) = MLX5_VPORT_FIRST_VF; \ (vport) = &(esw)->vports[(i)], \ @@ -574,12 +611,24 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } -static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) @@ -589,10 +638,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} -#define FDB_MAX_CHAIN 1 -#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) -#define FDB_MAX_PRIO 1 - #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0323fd078271..979f13bdc203 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,6 +37,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "eswitch_offloads_chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -47,10 +48,6 @@ * one for multicast. */ #define MLX5_ESW_MISS_FLOWS (2) - -#define fdb_prio_table(esw, chain, prio, level) \ - (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] - #define UPLINK_REP_INDEX 0 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, @@ -62,30 +59,12 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, return &esw->offloads.vport_reps[idx]; } -static struct mlx5_flow_table * -esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); -static void -esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); - -bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) -{ - return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)); -} - -u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) -{ - if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_MAX_CHAIN; - - return 0; -} - -u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) +static bool +esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) { - if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_MAX_PRIO; - - return 1; + return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)); } static void @@ -167,10 +146,17 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - if (attr->dest_chain) { - struct mlx5_flow_table *ft; + struct mlx5_flow_table *ft; - ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0); + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw); + i++; + } else if (attr->dest_chain) { + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_esw_chains_get_table(esw, attr->dest_chain, + 1, 0); if (IS_ERR(ft)) { rule = ERR_CAST(ft); goto err_create_goto_table; @@ -190,10 +176,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.reformat_id = attr->dests[j].encap_id; + flow_act.pkt_reformat = attr->dests[j].pkt_reformat; dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = - attr->dests[j].encap_id; + dest[i].vport.pkt_reformat = + attr->dests[j].pkt_reformat; } i++; } @@ -213,9 +199,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; - fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); + fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, + !!split); if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; @@ -229,15 +216,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (IS_ERR(rule)) goto err_add_rule; else - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; err_add_rule: - esw_put_prio_table(esw, attr->chain, attr->prio, !!split); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split); err_esw_get: - if (attr->dest_chain) - esw_put_prio_table(esw, attr->dest_chain, 1, 0); + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) + mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); err_create_goto_table: return rule; } @@ -254,13 +241,13 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; int i; - fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); + fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { rule = ERR_CAST(fast_fdb); goto err_get_fast; } - fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1); + fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@ -276,7 +263,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = attr->dests[i].encap_id; + dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -285,22 +272,22 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto add_err; - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; add_err: - esw_put_prio_table(esw, attr->chain, attr->prio, 1); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); err_get_fwd: - esw_put_prio_table(esw, attr->chain, attr->prio, 0); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); err_get_fast: return rule; } @@ -322,15 +309,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); } - esw->offloads.num_flows--; + atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { - esw_put_prio_table(esw, attr->chain, attr->prio, 1); - esw_put_prio_table(esw, attr->chain, attr->prio, 0); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0); } else { - esw_put_prio_table(esw, attr->chain, attr->prio, !!split); + mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, + !!split); if (attr->dest_chain) - esw_put_prio_table(esw, attr->dest_chain, 1, 0); + mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0); } } @@ -438,11 +426,13 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !attr->dest_chain); + mutex_lock(&esw->state_lock); + err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) - return err; + goto unlock; - attr->vlan_handled = false; + attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; vport = esw_vlan_action_get_vport(attr, push, pop); @@ -450,14 +440,14 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, /* tracks VF --> wire rules without vlan push action */ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; - attr->vlan_handled = true; + attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; } - return 0; + goto unlock; } if (!push && !pop) - return 0; + goto unlock; if (!(offloads->vlan_push_pop_refcount)) { /* it's the 1st vlan rule, apply global vlan pop policy */ @@ -481,7 +471,9 @@ skip_set_push: } out: if (!err) - attr->vlan_handled = true; + attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; +unlock: + mutex_unlock(&esw->state_lock); return err; } @@ -497,13 +489,15 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return 0; - if (!attr->vlan_handled) + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED)) return 0; push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + mutex_lock(&esw->state_lock); + vport = esw_vlan_action_get_vport(attr, push, pop); if (!push && !pop && fwd) { @@ -511,7 +505,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; - return 0; + goto out; } if (push) { @@ -529,12 +523,13 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, skip_unset_push: offloads->vlan_push_pop_refcount--; if (offloads->vlan_push_pop_refcount) - return 0; + goto out; /* no more vlan rules, stop global vlan pop policy */ err = esw_set_global_vlan_pop(esw, 0); out: + mutex_unlock(&esw->state_lock); return err; } @@ -568,8 +563,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, dest.vport.num = vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -583,40 +578,17 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; u8 fdb_to_vport_reg_c_id; int err; - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, - out, sizeof(out)); - if (err) - return err; - - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); - - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); - - MLX5_SET(modify_esw_vport_context_in, in, - field_select.fdb_to_vport_reg_c_id, 1); - - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, - in, sizeof(in)); -} - -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; - int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false, out, sizeof(out)); if (err) return err; @@ -624,7 +596,10 @@ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, esw_vport_context.fdb_to_vport_reg_c_id); - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + if (enable) + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + else + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); @@ -632,7 +607,7 @@ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) MLX5_SET(modify_esw_vport_context_in, in, field_select.fdb_to_vport_reg_c_id, 1); - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in, sizeof(in)); } @@ -830,8 +805,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); @@ -845,8 +820,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); dmac_v[0] = 0x01; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, - &flow_act, &dest, 1); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); @@ -861,174 +836,6 @@ out: return err; } -#define ESW_OFFLOADS_NUM_GROUPS 4 - -/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), - * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated - * for each flow table pool. We can allocate up to 16M of each pool, - * and we keep track of how much we used via put/get_sz_to_pool. - * Firmware doesn't report any of this for now. - * ESW_POOL is expected to be sorted from large to small - */ -#define ESW_SIZE (16 * 1024 * 1024) -const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, - 64 * 1024, 4 * 1024 }; - -static int -get_sz_from_pool(struct mlx5_eswitch *esw) -{ - int sz = 0, i; - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { - if (esw->fdb_table.offloads.fdb_left[i]) { - --esw->fdb_table.offloads.fdb_left[i]; - sz = ESW_POOLS[i]; - break; - } - } - - return sz; -} - -static void -put_sz_to_pool(struct mlx5_eswitch *esw, int sz) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { - if (sz >= ESW_POOLS[i]) { - ++esw->fdb_table.offloads.fdb_left[i]; - break; - } - } -} - -static struct mlx5_flow_table * -create_next_size_table(struct mlx5_eswitch *esw, - struct mlx5_flow_namespace *ns, - u16 table_prio, - int level, - u32 flags) -{ - struct mlx5_flow_table *fdb; - int sz; - - sz = get_sz_from_pool(esw); - if (!sz) - return ERR_PTR(-ENOSPC); - - fdb = mlx5_create_auto_grouped_flow_table(ns, - table_prio, - sz, - ESW_OFFLOADS_NUM_GROUPS, - level, - flags); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n", - (int)PTR_ERR(fdb), table_prio, level, sz); - put_sz_to_pool(esw, sz); - } - - return fdb; -} - -static struct mlx5_flow_table * -esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) -{ - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_table *fdb = NULL; - struct mlx5_flow_namespace *ns; - int table_prio, l = 0; - u32 flags = 0; - - if (chain == FDB_SLOW_PATH_CHAIN) - return esw->fdb_table.offloads.slow_fdb; - - mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - - fdb = fdb_prio_table(esw, chain, prio, level).fdb; - if (fdb) { - /* take ref on earlier levels as well */ - while (level >= 0) - fdb_prio_table(esw, chain, prio, level--).num_rules++; - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return fdb; - } - - ns = mlx5_get_fdb_sub_ns(dev, chain); - if (!ns) { - esw_warn(dev, "Failed to get FDB sub namespace\n"); - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return ERR_PTR(-EOPNOTSUPP); - } - - if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | - MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - - table_prio = (chain * FDB_MAX_PRIO) + prio - 1; - - /* create earlier levels for correct fs_core lookup when - * connecting tables - */ - for (l = 0; l <= level; l++) { - if (fdb_prio_table(esw, chain, prio, l).fdb) { - fdb_prio_table(esw, chain, prio, l).num_rules++; - continue; - } - - fdb = create_next_size_table(esw, ns, table_prio, l, flags); - if (IS_ERR(fdb)) { - l--; - goto err_create_fdb; - } - - fdb_prio_table(esw, chain, prio, l).fdb = fdb; - fdb_prio_table(esw, chain, prio, l).num_rules = 1; - } - - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - return fdb; - -err_create_fdb: - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); - if (l >= 0) - esw_put_prio_table(esw, chain, prio, l); - - return fdb; -} - -static void -esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) -{ - int l; - - if (chain == FDB_SLOW_PATH_CHAIN) - return; - - mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - - for (l = level; l >= 0; l--) { - if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0) - continue; - - put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte); - mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb); - fdb_prio_table(esw, chain, prio, l).fdb = NULL; - } - - mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); -} - -static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) -{ - /* If lazy creation isn't supported, deref the fast path tables */ - if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) { - esw_put_prio_table(esw, 0, 1, 1); - esw_put_prio_table(esw, 0, 1, 0); - } -} - #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1061,16 +868,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; - u32 *flow_group_in, max_flow_counter; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int table_size, ix, err = 0, i; + u32 flags = 0, *flow_group_in; + int table_size, ix, err = 0; struct mlx5_flow_group *g; - u32 flags = 0, fdb_max; void *match_criteria; u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1081,19 +888,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) err = -EOPNOTSUPP; goto ns_err; } - - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, - fdb_max); - - for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) - esw->fdb_table.offloads.fdb_left[i] = - ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; + esw->fdb_table.offloads.ns = root_ns; + err = mlx5_flow_namespace_set_mode(root_ns, + esw->dev->priv.steering->mode); + if (err) { + esw_warn(dev, "Failed to set FDB namespace steering mode\n"); + goto ns_err; + } table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + MLX5_ESW_MISS_FLOWS + esw->total_vports; @@ -1117,16 +918,10 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.slow_fdb = fdb; - /* If lazy creation isn't supported, open the fast path tables now */ - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && - esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n"); - esw_get_prio_table(esw, 0, 1, 0); - esw_get_prio_table(esw, 0, 1, 1); - } else { - esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n"); - esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + err = mlx5_esw_chains_create(esw); + if (err) { + esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + goto fdb_chains_err; } /* create send-to-vport group */ @@ -1217,9 +1012,12 @@ miss_err: peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: - esw_destroy_offloads_fast_fdb_tables(esw); + mlx5_esw_chains_destroy(esw); +fdb_chains_err: mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); ns_err: kvfree(flow_group_in); return err; @@ -1237,8 +1035,11 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + mlx5_esw_chains_destroy(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); - esw_destroy_offloads_fast_fdb_tables(esw); + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, + MLX5_FLOW_STEERING_MODE_DMFS); } static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) @@ -1371,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw); + mlx5_eswitch_disable(esw, true); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -1402,10 +1203,9 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vports = esw->total_vports; - struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN], rep_type; int vport_index; + u8 rep_type; esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@ -1413,12 +1213,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport_index, rep) { rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); rep->vport_index = vport_index; - ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) atomic_set(&rep->rep_data[rep_type].state, @@ -1640,13 +1437,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) esw_del_fdb_peer_miss_rules(esw); } +static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + bool pair) +{ + struct mlx5_flow_root_namespace *peer_ns; + struct mlx5_flow_root_namespace *ns; + int err; + + peer_ns = peer_esw->dev->priv.steering->fdb_root_ns; + ns = esw->dev->priv.steering->fdb_root_ns; + + if (pair) { + err = mlx5_flow_namespace_set_peer(ns, peer_ns); + if (err) + return err; + + err = mlx5_flow_namespace_set_peer(peer_ns, ns); + if (err) { + mlx5_flow_namespace_set_peer(ns, NULL); + return err; + } + } else { + mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(peer_ns, NULL); + } + + return 0; +} + static int mlx5_esw_offloads_devcom_event(int event, void *my_data, void *event_data) { struct mlx5_eswitch *esw = my_data; - struct mlx5_eswitch *peer_esw = event_data; struct mlx5_devcom *devcom = esw->dev->priv.devcom; + struct mlx5_eswitch *peer_esw = event_data; int err; switch (event) { @@ -1655,9 +1481,12 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; - err = mlx5_esw_offloads_pair(esw, peer_esw); + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); if (err) goto err_out; + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_peer; err = mlx5_esw_offloads_pair(peer_esw, esw); if (err) @@ -1673,6 +1502,7 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); mlx5_esw_offloads_unpair(peer_esw); mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); break; } @@ -1680,7 +1510,8 @@ static int mlx5_esw_offloads_devcom_event(int event, err_pair: mlx5_esw_offloads_unpair(esw); - +err_peer: + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); err_out: mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", event, err); @@ -1732,12 +1563,9 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, * required, allow * Unmatched traffic is allowed by default */ - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out_no_mem; - } + if (!spec) + return -ENOMEM; /* Untagged packets - push prio tag VLAN, allow */ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -1749,9 +1577,9 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; - if (vport->ingress.modify_metadata_rule) { + if (vport->ingress.offloads.modify_metadata_rule) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - flow_act.modify_id = vport->ingress.modify_metadata_id; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; } vport->ingress.allow_rule = @@ -1763,14 +1591,9 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress untagged allow rule, err(%d)\n", vport->vport, err); vport->ingress.allow_rule = NULL; - goto out; } -out: kvfree(spec); -out_no_mem: - if (err) - esw_vport_cleanup_ingress_rules(esw, vport); return err; } @@ -1787,9 +1610,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, MLX5_SET(set_action_in, action, data, mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); - err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - 1, action, &vport->ingress.modify_metadata_id); - if (err) { + vport->ingress.offloads.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.offloads.modify_metadata)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata); esw_warn(esw->dev, "failed to alloc modify header for vport %d ingress acl (%d)\n", vport->vport, err); @@ -1797,110 +1622,134 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.modify_id = vport->ingress.modify_metadata_id; - vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, - &spec, &flow_act, NULL, 0); - if (IS_ERR(vport->ingress.modify_metadata_rule)) { - err = PTR_ERR(vport->ingress.modify_metadata_rule); + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + vport->ingress.offloads.modify_metadata_rule = + mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); esw_warn(esw->dev, "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", vport->vport, err); - vport->ingress.modify_metadata_rule = NULL; - goto out; + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; } - -out: - if (err) - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); return err; } -void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - if (vport->ingress.modify_metadata_rule) { - mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + if (vport->ingress.offloads.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); - vport->ingress.modify_metadata_rule = NULL; + vport->ingress.offloads.modify_metadata_rule = NULL; } } -static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; - int err = 0; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; - if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) - return 0; + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; - /* For prio tag mode, there is only 1 FTEs: - * 1) prio tag packets - pop the prio tag VLAN, allow - * Unmatched traffic is allowed by default - */ + if (esw_check_ingress_prio_tag_enabled(esw, vport)) { + /* This group is to hold FTE to match untagged packets when prio_tag + * is enabled. + */ + memset(flow_group_in, 0, inlen); - esw_vport_cleanup_egress_rules(esw, vport); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, ret); + goto prio_tag_err; + } + vport->ingress.offloads.metadata_prio_tag_grp = g; + flow_index++; + } - err = esw_vport_enable_egress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable egress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + /* This group holds an FTE with no matches for add metadata for + * tagged packets, if prio-tag is enabled (as a fallthrough), + * or all traffic in case prio-tag is disabled. + */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto metadata_err; + } + vport->ingress.offloads.metadata_allmatch_grp = g; } - esw_debug(esw->dev, - "vport[%d] configure prio tag egress rules\n", vport->vport); + kvfree(flow_group_in); + return 0; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out_no_mem; +metadata_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; } +prio_tag_err: + kvfree(flow_group_in); + return ret; +} - /* prio tag vlan rule - pop it so VF receives untagged packets */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0); - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | - MLX5_FLOW_CONTEXT_ACTION_ALLOW; - vport->egress.allowed_vlan = - mlx5_add_flow_rules(vport->egress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->egress.allowed_vlan)) { - err = PTR_ERR(vport->egress.allowed_vlan); - esw_warn(esw->dev, - "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n", - vport->vport, err); - vport->egress.allowed_vlan = NULL; - goto out; +static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport) +{ + if (vport->ingress.offloads.metadata_allmatch_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp); + vport->ingress.offloads.metadata_allmatch_grp = NULL; } -out: - kvfree(spec); -out_no_mem: - if (err) - esw_vport_cleanup_egress_rules(esw, vport); - return err; + if (vport->ingress.offloads.metadata_prio_tag_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } } -static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { + int num_ftes = 0; int err; if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && - !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + !esw_check_ingress_prio_tag_enabled(esw, vport)) return 0; esw_vport_cleanup_ingress_rules(esw, vport); - err = esw_vport_enable_ingress_acl(esw, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + num_ftes++; + if (esw_check_ingress_prio_tag_enabled(esw, vport)) + num_ftes++; + + err = esw_vport_create_ingress_acl_table(esw, vport, num_ftes); if (err) { esw_warn(esw->dev, "failed to enable ingress acl (%d) on vport[%d]\n", @@ -1908,25 +1757,63 @@ static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, return err; } + err = esw_vport_create_ingress_acl_group(esw, vport); + if (err) + goto group_err; + esw_debug(esw->dev, "vport[%d] configure ingress rules\n", vport->vport); if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); if (err) - goto out; + goto metadata_err; } - if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && - mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + if (esw_check_ingress_prio_tag_enabled(esw, vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto out; + goto prio_tag_err; } + return 0; -out: +prio_tag_err: + esw_vport_del_ingress_acl_modify_metadata(esw, vport); +metadata_err: + esw_vport_destroy_ingress_acl_group(vport); +group_err: + esw_vport_destroy_ingress_acl_table(vport); + return err; +} + +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_egress_rules(esw, vport); + + err = esw_vport_enable_egress_acl(esw, vport); if (err) - esw_vport_disable_ingress_acl(esw, vport); + return err; + + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); + + /* prio tag vlan rule - pop it so VF receives untagged packets */ + err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0, + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW); + if (err) + esw_vport_disable_egress_acl(esw, vport); + return err; } @@ -1950,54 +1837,60 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } -static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport; - int i, j; int err; - if (esw_check_vport_match_metadata_supported(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - - mlx5_esw_for_all_vports(esw, i, vport) { - err = esw_vport_ingress_common_config(esw, vport); - if (err) - goto err_ingress; + err = esw_vport_ingress_config(esw, vport); + if (err) + return err; - if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { - err = esw_vport_egress_prio_tag_config(esw, vport); - if (err) - goto err_egress; + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_config(esw, vport); + if (err) { + esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_del_ingress_acl_modify_metadata(esw, vport); + esw_vport_destroy_ingress_acl_group(vport); + esw_vport_destroy_ingress_acl_table(vport); } } + return err; +} - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_vport_disable_egress_acl(esw, vport); + esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_del_ingress_acl_modify_metadata(esw, vport); + esw_vport_destroy_ingress_acl_group(vport); + esw_vport_destroy_ingress_acl_table(vport); +} - return 0; +static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int err; -err_egress: - esw_vport_disable_ingress_acl(esw, vport); -err_ingress: - for (j = MLX5_VPORT_PF; j < i; j++) { - vport = &esw->vports[j]; - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); - } + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + err = esw_vport_create_offloads_acl_tables(esw, vport); + if (err) + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; return err; } -static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) +static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int i; - - mlx5_esw_for_all_vports(esw, i, vport) { - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); - } + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + esw_vport_destroy_offloads_acl_tables(esw, vport); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } @@ -2013,9 +1906,8 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev); memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); - mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_create_offloads_acl_tables(esw); + err = esw_create_uplink_offloads_acl_tables(esw); if (err) return err; @@ -2040,7 +1932,7 @@ create_ft_err: esw_destroy_offloads_fdb_tables(esw); create_fdb_err: - esw_destroy_offloads_acl_tables(esw); + esw_destroy_uplink_offloads_acl_tables(esw); return err; } @@ -2050,7 +1942,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - esw_destroy_offloads_acl_tables(esw); + esw_destroy_uplink_offloads_acl_tables(esw); } static void @@ -2120,9 +2012,10 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw) +int esw_offloads_enable(struct mlx5_eswitch *esw) { - int err; + struct mlx5_vport *vport; + int err, i; if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) @@ -2130,15 +2023,22 @@ int esw_offloads_init(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mlx5_rdma_enable_roce(esw->dev); err = esw_offloads_steering_init(esw); if (err) - return err; + goto err_steering_init; - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - err = mlx5_eswitch_enable_passing_vport_metadata(esw); - if (err) - goto err_vport_metadata; - } + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; + + /* Representor will control the vport link state */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + goto err_vports; err = esw_offloads_load_all_reps(esw); if (err) @@ -2147,15 +2047,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw) esw_offloads_devcom_init(esw); mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); - return 0; err_reps: - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); +err_vports: + esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); +err_steering_init: + mlx5_rdma_disable_roce(esw->dev); return err; } @@ -2164,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw); + mlx5_eswitch_disable(esw, true); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); @@ -2178,14 +2079,14 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw) +void esw_offloads_disable(struct mlx5_eswitch *esw) { - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mlx5_rdma_disable_roce(esw->dev); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@ -2345,7 +2246,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, break; } - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; @@ -2455,7 +2356,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (esw->offloads.encap == encap) return 0; - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c new file mode 100644 index 000000000000..c5a446e295aa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -0,0 +1,758 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/fs.h> + +#include "eswitch_offloads_chains.h" +#include "mlx5_core.h" +#include "fs_core.h" +#include "eswitch.h" +#include "en.h" + +#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) +#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) +#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) +#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) +#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) +#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) +#define fdb_ignore_flow_level_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) + +#define ESW_OFFLOADS_NUM_GROUPS 4 + +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via get_next_avail_sz_from_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define ESW_SIZE (16 * 1024 * 1024) +static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 4 * 1024, }; + +struct mlx5_esw_chains_priv { + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *tc_end_fdb; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; +}; + +struct fdb_chain { + struct rhash_head node; + + u32 chain; + + int ref; + + struct mlx5_eswitch *esw; + struct list_head prios_list; +}; + +struct fdb_prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct fdb_prio { + struct rhash_head node; + struct list_head list; + + struct fdb_prio_key key; + + int ref; + + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *next_fdb; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fdb_chain, node), + .key_offset = offsetof(struct fdb_chain, chain), + .key_len = sizeof_field(struct fdb_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct fdb_prio, node), + .key_offset = offsetof(struct fdb_prio, key), + .key_len = sizeof_field(struct fdb_prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; +} + +u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX - 1; + + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_get_chain_range(esw) + 1; +} + +u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) +{ + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_LEVELS_PER_PRIO; +} + +#define POOL_NEXT_SIZE 0 +static int +mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw, + int desired_size) +{ + int i, found_i = -1; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --fdb_pool_left(esw)[found_i]; + return ESW_POOLS[found_i]; + } + + return 0; +} + +static void +mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (sz == ESW_POOLS[i]) { + ++fdb_pool_left(esw)[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz); +} + +static void +mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw) +{ + u32 fdb_max; + int i; + + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size); + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) + fdb_pool_left(esw)[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; +} + +static struct mlx5_flow_table * +mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + int sz; + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); + if (!sz) + return ERR_PTR(-ENOSPC); + ft_attr.max_fte = sz; + + /* We use tc_slow_fdb(esw) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = tc_slow_fdb(esw); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous prio (FDB_BYPASS_PATH if exists). + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!fdb_ignore_flow_level_supported(esw) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = level; + ft_attr.prio = prio - 1; + ns = mlx5_get_fdb_sub_ns(esw->dev, chain); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1. + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = 1; + ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, + "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), chain, prio, level, sz); + mlx5_esw_chains_put_sz_to_pool(esw, sz); + return fdb; + } + + return fdb; +} + +static void +mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb) +{ + mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte); + mlx5_destroy_flow_table(fdb); +} + +static struct fdb_chain * +mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain = NULL; + int err; + + fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL); + if (!fdb_chain) + return ERR_PTR(-ENOMEM); + + fdb_chain->esw = esw; + fdb_chain->chain = chain; + INIT_LIST_HEAD(&fdb_chain->prios_list); + + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + if (err) + goto err_insert; + + return fdb_chain; + +err_insert: + kvfree(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + kvfree(fdb_chain); +} + +static struct fdb_chain * +mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain; + + fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain, + chain_params); + if (!fdb_chain) { + fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return fdb_chain; + } + + fdb_chain->ref++; + + return fdb_chain; +} + +static struct mlx5_flow_handle * +mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, + struct mlx5_flow_table *next_fdb) +{ + static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_fdb; + + return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); +} + +static int +mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, + struct mlx5_flow_table *next_fdb) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + struct fdb_prio *pos; + int n = 0, err; + + if (fdb_prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + next_fdb); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_fdb = next_fdb; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain) +{ + if (--fdb_chain->ref == 0) + mlx5_esw_chains_destroy_fdb_chain(fdb_chain); +} + +static struct fdb_prio * +mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule = NULL; + struct mlx5_flow_group *miss_group; + struct fdb_prio *fdb_prio = NULL; + struct mlx5_flow_table *next_fdb; + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct list_head *pos; + u32 *flow_group_in; + int err; + + fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return ERR_CAST(fdb_chain); + + fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!fdb_prio || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + tc_slow_fdb(esw) : + tc_end_fdb(esw); + list_for_each(pos, &fdb_chain->prios_list) { + struct fdb_prio *p = list_entry(pos, struct fdb_prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb; + break; + } + } + + fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + fdb->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + fdb->max_fte - 1); + miss_group = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_fdb */ + miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + fdb_prio->miss_group = miss_group; + fdb_prio->miss_rule = miss_rule; + fdb_prio->next_fdb = next_fdb; + fdb_prio->fdb_chain = fdb_chain; + fdb_prio->key.chain = chain; + fdb_prio->key.prio = prio; + fdb_prio->key.level = level; + fdb_prio->fdb = fdb; + + err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + if (err) + goto err_insert; + + list_add(&fdb_prio->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb); + if (err) + goto err_update; + + kvfree(flow_group_in); + return fdb_prio; + +err_update: + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_esw_chains_destroy_fdb_table(esw, fdb); +err_create: +err_alloc: + kvfree(fdb_prio); + kvfree(flow_group_in); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw, + struct fdb_prio *fdb_prio) +{ + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + + WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio, + fdb_prio->next_fdb)); + + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + mlx5_del_flow_rules(fdb_prio->miss_rule); + mlx5_destroy_flow_group(fdb_prio->miss_group); + mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + kvfree(fdb_prio); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + int l = 0; + + if ((chain > mlx5_esw_chains_get_chain_range(esw) && + chain != mlx5_esw_chains_get_ft_chain(esw)) || + prio > mlx5_esw_chains_get_prio_range(esw) || + level > mlx5_esw_chains_get_level_range(esw)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l); + if (IS_ERR(prev_fts)) { + fdb_prio = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) { + fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain, + prio, level); + if (IS_ERR(fdb_prio)) + goto err_create_prio; + } + + ++fdb_prio->ref; + mutex_unlock(&esw_chains_lock(esw)); + + return fdb_prio->fdb; + +err_create_prio: + mutex_unlock(&esw_chains_lock(esw)); +err_get_prevs: + while (--l >= 0) + mlx5_esw_chains_put_table(esw, chain, prio, l); + return ERR_CAST(fdb_prio); +} + +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) + goto err_get_prio; + + if (--fdb_prio->ref == 0) + mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio); + mutex_unlock(&esw_chains_lock(esw)); + + while (level-- > 0) + mlx5_esw_chains_put_table(esw, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&esw_chains_lock(esw)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) +{ + return tc_end_fdb(esw); +} + +static int +mlx5_esw_chains_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_chains_priv *chains_priv; + struct mlx5_core_dev *dev = esw->dev; + u32 max_flow_counter, fdb_max; + int err; + + chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); + if (!chains_priv) + return -ENOMEM; + esw_chains_priv(esw) = chains_priv; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, + "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); + + mlx5_esw_chains_init_sz_pool(esw); + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else { + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", + mlx5_esw_chains_get_chain_range(esw), + mlx5_esw_chains_get_prio_range(esw)); + } + + err = rhashtable_init(&esw_chains_ht(esw), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&esw_prios_ht(esw), &prio_params); + if (err) + goto init_prios_ht_err; + + mutex_init(&esw_chains_lock(esw)); + + return 0; + +init_prios_ht_err: + rhashtable_destroy(&esw_chains_ht(esw)); +init_chains_ht_err: + kfree(chains_priv); + return err; +} + +static void +mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) +{ + mutex_destroy(&esw_chains_lock(esw)); + rhashtable_destroy(&esw_prios_ht(esw)); + rhashtable_destroy(&esw_chains_ht(esw)); + + kfree(esw_chains_priv(esw)); +} + +static int +mlx5_esw_chains_open(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table *ft; + int err; + + /* Create tc_end_fdb(esw) which is the always created ft chain */ + ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw), + 1, 0); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + tc_end_fdb(esw) = ft; + + /* Always open the root for fast path */ + ft = mlx5_esw_chains_get_table(esw, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split rules now if prios isn't supported */ + if (!mlx5_esw_chains_prios_supported(esw)) { + ft = mlx5_esw_chains_get_table(esw, 0, 1, 1); + + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_1_err; + } + } + + return 0; + +level_1_err: + mlx5_esw_chains_put_table(esw, 0, 1, 0); +level_0_err: + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); + return err; +} + +static void +mlx5_esw_chains_close(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + mlx5_esw_chains_put_table(esw, 0, 1, 1); + mlx5_esw_chains_put_table(esw, 0, 1, 0); + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); +} + +int +mlx5_esw_chains_create(struct mlx5_eswitch *esw) +{ + int err; + + err = mlx5_esw_chains_init(esw); + if (err) + return err; + + err = mlx5_esw_chains_open(esw); + if (err) + goto err_open; + + return 0; + +err_open: + mlx5_esw_chains_cleanup(esw); + return err; +} + +void +mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) +{ + mlx5_esw_chains_close(esw); + mlx5_esw_chains_cleanup(esw); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h new file mode 100644 index 000000000000..2e13097fe348 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +bool +mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw); + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); + +int mlx5_esw_chains_create(struct mlx5_eswitch *esw); +void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); + +#endif /* __ML5_ESW_CHAINS_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 1d55a324a17e..dc08ed9339ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -50,8 +50,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_flow_act *flow_act) { static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; - int prio, flags; int err; root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); @@ -63,10 +63,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - prio = FDB_SLOW_PATH; - flags = MLX5_FLOW_TABLE_TERMINATION; - tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, - 0, flags); + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { esw_warn(dev, "Failed to create termination table\n"); return -EOPNOTSUPP; @@ -177,22 +178,33 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, memset(&src->vlan[1], 0, sizeof(src->vlan[1])); } +static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, + const struct mlx5_flow_spec *spec) +{ + u32 port_mask, port_value; + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return spec->flow_context.flow_source == + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK; +} + bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, - misc_parameters.source_port); - u32 port_value = MLX5_GET(fte_match_param, spec->match_value, - misc_parameters.source_port); - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) return false; /* push vlan on RX */ return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - ((port_mask & port_value) == MLX5_VPORT_UPLINK); + mlx5_eswitch_offload_is_uplink_port(esw, spec); } struct mlx5_flow_handle * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h index eb8b0fe0b4e1..11621d265d7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -35,11 +35,11 @@ #include <linux/mlx5/driver.h> -enum mlx5_fpga_device_id { - MLX5_FPGA_DEVICE_UNKNOWN = 0, - MLX5_FPGA_DEVICE_KU040 = 1, - MLX5_FPGA_DEVICE_KU060 = 2, - MLX5_FPGA_DEVICE_KU060_2 = 3, +enum mlx5_fpga_id { + MLX5_FPGA_NEWTON = 0, + MLX5_FPGA_EDISON = 1, + MLX5_FPGA_MORSE = 2, + MLX5_FPGA_MORSEQ = 3, }; enum mlx5_fpga_image { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 4c50efe4e7f1..61021133029e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -464,8 +464,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) } err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); - if (err) + if (err) { + kvfree(in); goto err_cqwq; + } cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index d046d1ec2a86..2ce4241459ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -81,19 +81,28 @@ static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) } } -static const char *mlx5_fpga_device_name(u32 device) +static const char *mlx5_fpga_name(u32 fpga_id) { - switch (device) { - case MLX5_FPGA_DEVICE_KU040: - return "ku040"; - case MLX5_FPGA_DEVICE_KU060: - return "ku060"; - case MLX5_FPGA_DEVICE_KU060_2: - return "ku060_2"; - case MLX5_FPGA_DEVICE_UNKNOWN: - default: - return "unknown"; + static char ret[32]; + + switch (fpga_id) { + case MLX5_FPGA_NEWTON: + return "Newton"; + case MLX5_FPGA_EDISON: + return "Edison"; + case MLX5_FPGA_MORSE: + return "Morse"; + case MLX5_FPGA_MORSEQ: + return "MorseQ"; } + + snprintf(ret, sizeof(ret), "Unknown %d", fpga_id); + return ret; +} + +static int mlx5_is_fpga_lookaside(u32 fpga_id) +{ + return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON; } static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) @@ -110,8 +119,12 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) fdev->last_admin_image = query.admin_image; fdev->last_oper_image = query.oper_image; - mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n", - query.status, query.admin_image, query.oper_image); + mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n", + query.status, query.admin_image, query.oper_image); + + /* for FPGA lookaside projects FPGA load status is not important */ + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return 0; if (query.status != MLX5_FPGA_STATUS_SUCCESS) { mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", @@ -167,25 +180,30 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) struct mlx5_fpga_device *fdev = mdev->fpga; unsigned int max_num_qps; unsigned long flags; - u32 fpga_device_id; + u32 fpga_id; int err; if (!fdev) return 0; - err = mlx5_fpga_device_load_check(fdev); + err = mlx5_fpga_caps(fdev->mdev); if (err) goto out; - err = mlx5_fpga_caps(fdev->mdev); + err = mlx5_fpga_device_load_check(fdev); if (err) goto out; - fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device); - mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n", - mlx5_fpga_device_name(fpga_device_id), - fpga_device_id, + fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id); + mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id); + + /* No QPs if FPGA does not participate in net processing */ + if (mlx5_is_fpga_lookaside(fpga_id)) + goto out; + + mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n", mlx5_fpga_image_name(fdev->last_oper_image), + fdev->last_oper_image, MLX5_CAP_FPGA(fdev->mdev, image_version), MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), @@ -264,6 +282,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) if (!fdev) return; + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return; + spin_lock_irqsave(&fdev->state_lock, flags); if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { spin_unlock_irqrestore(&fdev->state_lock, flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index c76da309506b..4c61d25d2e88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -87,10 +87,10 @@ static const struct rhashtable_params rhash_sa = { * value is not constant during the lifetime * of the key object. */ - .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - - FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), + .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - + sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + - FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), + sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), .automatic_shrinking = true, .min_size = 1, @@ -850,6 +850,7 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context) mutex_lock(&fpga_xfrm->lock); if (!--fpga_xfrm->num_rules) { mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); + kfree(fpga_xfrm->sa_ctx); fpga_xfrm->sa_ctx = NULL; } mutex_unlock(&fpga_xfrm->lock); @@ -1478,7 +1479,7 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) return 0; - if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7ac1249eadc3..b25465d9e030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -107,6 +107,50 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, return 0; } +static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ +} + +static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + return 0; +} + +static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ +} + +static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + return 0; +} + +static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) @@ -182,7 +226,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, } else { MLX5_SET(create_flow_table_in, in, flow_table_context.table_miss_action, - ns->def_miss_action); + ft->def_miss_action); } break; @@ -262,7 +306,7 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns, } else { MLX5_SET(modify_flow_table_in, in, flow_table_context.table_miss_action, - ns->def_miss_action); + ft->def_miss_action); } } @@ -388,6 +432,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(set_fte_in, in, table_type, ft->type); MLX5_SET(set_fte_in, in, table_id, ft->id); MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL)); + if (ft->vport) { MLX5_SET(set_fte_in, in, vport_number, ft->vport); MLX5_SET(set_fte_in, in, other_vport, 1); @@ -412,11 +459,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } else { MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); } - MLX5_SET(flow_context, in_flow_context, modify_header_id, - fte->action.modify_id); + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); @@ -461,14 +510,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); - if (extended_dest) { + if (extended_dest && + dst->dest_attr.vport.pkt_reformat) { MLX5_SET(dest_format_struct, in_dests, packet_reformat, !!(dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); MLX5_SET(extended_dest_format, in_dests, packet_reformat_id, - dst->dest_attr.vport.reformat_id); + dst->dest_attr.vport.pkt_reformat->id); } break; default: @@ -566,7 +616,9 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -574,6 +626,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) MLX5_SET(alloc_flow_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -581,6 +634,11 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) return err; } +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + return mlx5_cmd_fc_bulk_alloc(dev, 0, id); +} + int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; @@ -615,77 +673,35 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, return 0; } -struct mlx5_cmd_fc_bulk { - u32 id; - int num; - int outlen; - u32 out[0]; -}; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) -{ - struct mlx5_cmd_fc_bulk *b; - int outlen = - MLX5_ST_SZ_BYTES(query_flow_counter_out) + - MLX5_ST_SZ_BYTES(traffic_counter) * num; - - b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); - if (!b) - return NULL; - - b->id = id; - b->num = num; - b->outlen = outlen; - - return b; -} - -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len) { - kfree(b); + return MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len; } -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out) { + int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len); u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); - MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); - MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); - return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes) -{ - int index = id - b->id; - void *stats; - - if (index < 0 || index >= b->num) { - mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", - id, b->id, b->id + b->num - 1); - return; - } - - stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, - flow_statistics[index]); - *packets = MLX5_GET64(traffic_counter, stats, packets); - *bytes = MLX5_GET64(traffic_counter, stats, octets); -} - -int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, - int reformat_type, - size_t size, - void *reformat_data, - enum mlx5_flow_namespace_type namespace, - u32 *packet_reformat_id) +static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) { u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; void *packet_reformat_context_in; int max_encap_size; void *reformat; @@ -728,35 +744,36 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, - out, packet_reformat_id); + pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, - u32 packet_reformat_id) +static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) { u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, - packet_reformat_id); + pkt_reformat->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id) +static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) { u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; int max_actions, actions_size, inlen, err; + struct mlx5_core_dev *dev = ns->dev; void *actions_in; u8 table_type; u32 *in; @@ -807,26 +824,26 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_modify_header_alloc); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) { u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_modify_header_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, - modify_header_id); + modify_hdr->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -838,6 +855,13 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .update_fte = mlx5_cmd_update_fte, .delete_fte = mlx5_cmd_delete_fte, .update_root_ft = mlx5_cmd_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -850,9 +874,16 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .update_fte = mlx5_cmd_stub_update_fte, .delete_fte = mlx5_cmd_stub_delete_fte, .update_root_ft = mlx5_cmd_stub_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; -static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) { return &mlx5_flow_cmds; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index e340f9af2f5a..d62de642eca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -75,24 +75,45 @@ struct mlx5_flow_cmds { struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect); + + int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat); + + void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat); + + int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr); + + void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr); + + int (*set_peer)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + + int (*create_ns)(struct mlx5_flow_root_namespace *ns); + int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); -struct mlx5_cmd_fc_bulk; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes); +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len); +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 3e99799bdb40..9dc24241dc91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -60,7 +60,8 @@ ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ __VA_ARGS__)\ -#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ +#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \ + .def_miss_action = def_miss_act,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } @@ -131,33 +132,41 @@ static struct init_tree_node { int num_leaf_prios; int prio; int num_levels; + enum mlx5_flow_table_miss_action def_miss_action; } root_fs = { .type = FS_TYPE_NAMESPACE, .ar_size = 7, - .children = (struct init_tree_node[]) { - ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, - BY_PASS_PRIO_NUM_LEVELS))), - ADD_PRIO(0, LAG_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, - LAG_PRIO_NUM_LEVELS))), - ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), - ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, - ETHTOOL_PRIO_NUM_LEVELS))), - ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), - ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, - KERNEL_NIC_PRIO_NUM_LEVELS))), - ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), - ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), + .children = (struct init_tree_node[]){ + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, + OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, + KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, + LEFTOVERS_NUM_LEVELS))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, + ANCHOR_NUM_LEVELS))), } }; @@ -167,8 +176,29 @@ static struct init_tree_node egress_root_fs = { .children = (struct init_tree_node[]) { ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, FS_CHAINING_CAPS_EGRESS, - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + +#define RDMA_RX_BYPASS_PRIO 0 +#define RDMA_RX_KERNEL_PRIO 1 +static struct init_tree_node rdma_rx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 2, + .children = (struct init_tree_node[]) { + [RDMA_RX_BYPASS_PRIO] = + ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + [RDMA_RX_KERNEL_PRIO] = + ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN, + ADD_MULTIPLE_PRIO(1, 1))), } }; @@ -179,7 +209,7 @@ enum fs_i_lock_class { }; static const struct rhashtable_params rhash_fte = { - .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_len = sizeof_field(struct fs_fte, val), .key_offset = offsetof(struct fs_fte, val), .head_offset = offsetof(struct fs_fte, hash), .automatic_shrinking = true, @@ -187,7 +217,7 @@ static const struct rhashtable_params rhash_fte = { }; static const struct rhashtable_params rhash_fg = { - .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_len = sizeof_field(struct mlx5_flow_group, mask), .key_offset = offsetof(struct mlx5_flow_group, mask), .head_offset = offsetof(struct mlx5_flow_group, hash), .automatic_shrinking = true, @@ -549,7 +579,9 @@ static void del_sw_flow_group(struct fs_node *node) rhashtable_destroy(&fg->ftes_hash); ida_destroy(&fg->fte_allocator); - if (ft->autogroup.active) + if (ft->autogroup.active && + fg->max_ftes == ft->autogroup.group_size && + fg->start_index < ft->autogroup.max_fte) ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, @@ -976,7 +1008,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa u16 vport) { struct mlx5_flow_root_namespace *root = find_root(&ns->node); - struct mlx5_flow_table *next_ft = NULL; + bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED; + struct mlx5_flow_table *next_ft; struct fs_prio *fs_prio = NULL; struct mlx5_flow_table *ft; int log_table_sz; @@ -993,14 +1026,21 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = -EINVAL; goto unlock_root; } - if (ft_attr->level >= fs_prio->num_levels) { - err = -ENOSPC; - goto unlock_root; + if (!unmanaged) { + /* The level is related to the + * priority level range. + */ + if (ft_attr->level >= fs_prio->num_levels) { + err = -ENOSPC; + goto unlock_root; + } + + ft_attr->level += fs_prio->start_level; } + /* The level is related to the * priority level range. */ - ft_attr->level += fs_prio->start_level; ft = alloc_flow_table(ft_attr->level, vport, ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, @@ -1013,18 +1053,27 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; - next_ft = find_next_chained_ft(fs_prio); + next_ft = unmanaged ? ft_attr->next_ft : + find_next_chained_ft(fs_prio); + ft->def_miss_action = ns->def_miss_action; err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); if (err) goto free_ft; - err = connect_flow_table(root->dev, ft, fs_prio); - if (err) - goto destroy_ft; + if (!unmanaged) { + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + } + ft->node.active = true; down_write_ref_node(&fs_prio->node, false); - tree_add_node(&ft->node, &fs_prio->node); - list_add_flow_table(ft, fs_prio); + if (!unmanaged) { + tree_add_node(&ft->node, &fs_prio->node); + list_add_flow_table(ft, fs_prio); + } else { + ft->node.root = fs_prio->node.root; + } fs_prio->num_ft++; up_write_ref_node(&fs_prio->node, false); mutex_unlock(&root->chain_lock); @@ -1072,29 +1121,27 @@ EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); struct mlx5_flow_table* mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags) + struct mlx5_flow_table_attr *ft_attr) { - struct mlx5_flow_table_attr ft_attr = {}; + int num_reserved_entries = ft_attr->autogroup.num_reserved_entries; + int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries; + int max_num_groups = ft_attr->autogroup.max_num_groups; struct mlx5_flow_table *ft; - if (max_num_groups > num_flow_table_entries) + if (max_num_groups > autogroups_max_fte) + return ERR_PTR(-EINVAL); + if (num_reserved_entries > ft_attr->max_fte) return ERR_PTR(-EINVAL); - ft_attr.max_fte = num_flow_table_entries; - ft_attr.prio = prio; - ft_attr.level = level; - ft_attr.flags = flags; - - ft = mlx5_create_flow_table(ns, &ft_attr); + ft = mlx5_create_flow_table(ns, ft_attr); if (IS_ERR(ft)) return ft; ft->autogroup.active = true; ft->autogroup.required_groups = max_num_groups; + ft->autogroup.max_fte = autogroups_max_fte; + /* We save place for flow groups in addition to max types */ + ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1); return ft; } @@ -1116,7 +1163,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg; int err; - if (ft->autogroup.active) + if (ft->autogroup.active && start_index < ft->autogroup.max_fte) return ERR_PTR(-EPERM); down_write_ref_node(&ft->node, false); @@ -1289,18 +1336,18 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; - struct mlx5_flow_group *fg; + u32 max_fte = ft->autogroup.max_fte; unsigned int candidate_index = 0; unsigned int group_size = 0; + struct mlx5_flow_group *fg; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); if (ft->autogroup.num_groups < ft->autogroup.required_groups) - /* We save place for flow groups in addition to max types */ - group_size = ft->max_fte / (ft->autogroup.required_groups + 1); + group_size = ft->autogroup.group_size; - /* ft->max_fte == ft->autogroup.max_types */ + /* max_fte == ft->autogroup.max_types */ if (group_size == 0) group_size = 1; @@ -1313,7 +1360,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) + if (candidate_index + group_size > max_fte) return ERR_PTR(-ENOSPC); fg = alloc_insert_flow_group(ft, @@ -1325,7 +1372,8 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft if (IS_ERR(fg)) goto out; - ft->autogroup.num_groups++; + if (group_size == ft->autogroup.group_size) + ft->autogroup.num_groups++; out: return fg; @@ -1384,7 +1432,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? - (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || + (d1->vport.pkt_reformat->id == + d2->vport.pkt_reformat->id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -1495,18 +1544,30 @@ static bool counter_is_valid(u32 action) } static bool dest_is_valid(struct mlx5_flow_destination *dest, - u32 action, + struct mlx5_flow_act *flow_act, struct mlx5_flow_table *ft) { + bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL; + u32 action = flow_act->action; + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) return counter_is_valid(action); if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return true; + if (ignore_level) { + if (ft->type != FS_FT_FDB) + return false; + + if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + dest->ft->type != FS_FT_FDB) + return false; + } + if (!dest || ((dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && - (dest->ft->level <= ft->level))) + (dest->ft->level <= ft->level && !ignore_level))) return false; return true; } @@ -1521,16 +1582,16 @@ struct match_list_head { struct match_list first; }; -static void free_match_list(struct match_list_head *head) +static void free_match_list(struct match_list_head *head, bool ft_locked) { if (!list_empty(&head->list)) { struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node, false); + tree_put_node(&head->first.g->node, ft_locked); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node, false); + tree_put_node(&iter->g->node, ft_locked); list_del(&iter->list); kfree(iter); } @@ -1539,7 +1600,8 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - const struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec, + bool ft_locked) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1564,7 +1626,7 @@ static int build_match_list(struct match_list_head *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - free_match_list(match_head); + free_match_list(match_head, ft_locked); err = -ENOMEM; goto out; } @@ -1736,7 +1798,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, return ERR_PTR(-EINVAL); for (i = 0; i < dest_num; i++) { - if (!dest_is_valid(&dest[i], flow_act->action, ft)) + if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); } nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); @@ -1744,7 +1806,7 @@ search_again_locked: version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec); + err = build_match_list(&match_head, ft, spec, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -1758,7 +1820,7 @@ search_again_locked: rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, dest_num, version); - free_match_list(&match_head); + free_match_list(&match_head, take_write); if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) @@ -1782,6 +1844,13 @@ search_again_locked: return rule; } + fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) { + up_write_ref_node(&ft->node, false); + err = PTR_ERR(fte); + goto err_alloc_fte; + } + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); up_write_ref_node(&ft->node, false); @@ -1789,17 +1858,9 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec, flow_act); - if (IS_ERR(fte)) { - err = PTR_ERR(fte); - goto err_release_fg; - } - err = insert_fte(g, fte); - if (err) { - kmem_cache_free(steering->ftes_cache, fte); + if (err) goto err_release_fg; - } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); @@ -1811,6 +1872,8 @@ search_again_locked: err_release_fg: up_write_ref_node(&g->node, false); + kmem_cache_free(steering->ftes_cache, fte); +err_alloc_fte: tree_put_node(&g->node, false); return ERR_PTR(err); } @@ -1998,7 +2061,8 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) int err = 0; mutex_lock(&root->chain_lock); - err = disconnect_flow_table(ft); + if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED)) + err = disconnect_flow_table(ft); if (err) { mutex_unlock(&root->chain_lock); return err; @@ -2056,16 +2120,18 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; return NULL; - case MLX5_FLOW_NAMESPACE_RDMA_RX: - if (steering->rdma_rx_root_ns) - return &steering->rdma_rx_root_ns->ns; - return NULL; default: break; } if (type == MLX5_FLOW_NAMESPACE_EGRESS) { root_ns = steering->egress_root_ns; + } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_BYPASS_PRIO; + } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) { + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_KERNEL_PRIO; } else { /* Must be NIC RX */ root_ns = steering->root_ns; prio = type; @@ -2155,7 +2221,8 @@ static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace return ns; } -static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio, + int def_miss_act) { struct mlx5_flow_namespace *ns; @@ -2164,6 +2231,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); + ns->def_miss_action = def_miss_act; tree_init_node(&ns->node, NULL, del_sw_ns); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -2230,7 +2298,7 @@ static int init_root_tree_recursive(struct mlx5_flow_steering *steering, base = &fs_prio->node; } else if (init_node->type == FS_TYPE_NAMESPACE) { fs_get_obj(fs_prio, fs_parent_node); - fs_ns = fs_create_namespace(fs_prio); + fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action); if (IS_ERR(fs_ns)) return PTR_ERR(fs_ns); base = &fs_ns->node; @@ -2323,9 +2391,17 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) int acc_level_ns = acc_level; prio->start_level = acc_level; - fs_for_each_ns(ns, prio) + fs_for_each_ns(ns, prio) { /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + + /* If this a prio with chains, and we can jump from one chain + * (namepsace) to another, so we accumulate the levels + */ + if (prio->node.type == FS_TYPE_PRIO_CHAINS) + acc_level = acc_level_ns; + } + if (!prio->num_levels) prio->num_levels = acc_level_ns - prio->start_level; WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); @@ -2494,71 +2570,129 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + int err; steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX); if (!steering->rdma_rx_root_ns) return -ENOMEM; - steering->rdma_rx_root_ns->def_miss_action = - MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN; + err = init_root_tree(steering, &rdma_rx_root_fs, + &steering->rdma_rx_root_ns->ns.node); + if (err) + goto out_err; - /* Create single prio */ - prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + set_prio_attrs(steering->rdma_rx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_rx_root_ns); + steering->rdma_rx_root_ns = NULL; + return err; } -static int init_fdb_root_ns(struct mlx5_flow_steering *steering) + +/* FT and tc chains are stored in the same array so we can re-use the + * mlx5_get_fdb_sub_ns() and tc api for FT chains. + * When creating a new ns for each chain store it in the first available slot. + * Assume tc chains are created and stored first and only then the FT chain. + */ +static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct mlx5_flow_namespace *ns) +{ + int chain = 0; + + while (steering->fdb_sub_ns[chain]) + ++chain; + + steering->fdb_sub_ns[chain] = ns; +} + +static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct fs_prio *maj_prio) { struct mlx5_flow_namespace *ns; - struct fs_prio *maj_prio; struct fs_prio *min_prio; + int prio; + + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) { + min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO); + if (IS_ERR(min_prio)) + return PTR_ERR(min_prio); + } + + store_fdb_sub_ns_prio_chain(steering, ns); + + return 0; +} + +static int create_fdb_chains(struct mlx5_flow_steering *steering, + int fs_prio, + int chains) +{ + struct fs_prio *maj_prio; int levels; int chain; - int prio; int err; - steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); - if (!steering->fdb_root_ns) - return -ENOMEM; + levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains; + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + fs_prio, + levels); + if (IS_ERR(maj_prio)) + return PTR_ERR(maj_prio); - steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * - (FDB_MAX_CHAIN + 1), GFP_KERNEL); + for (chain = 0; chain < chains; chain++) { + err = create_fdb_sub_ns_prio_chain(steering, maj_prio); + if (err) + return err; + } + + return 0; +} + +static int create_fdb_fast_path(struct mlx5_flow_steering *steering) +{ + int err; + + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, + sizeof(*steering->fdb_sub_ns), + GFP_KERNEL); if (!steering->fdb_sub_ns) return -ENOMEM; + err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); + if (err) + return err; + + return 0; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *maj_prio; + int err; + + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 1); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); goto out_err; } - - levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); - maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, - FDB_FAST_PATH, - levels); - if (IS_ERR(maj_prio)) { - err = PTR_ERR(maj_prio); + err = create_fdb_fast_path(steering); + if (err) goto out_err; - } - - for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { - ns = fs_create_namespace(maj_prio); - if (IS_ERR(ns)) { - err = PTR_ERR(ns); - goto out_err; - } - - for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { - min_prio = fs_create_prio(ns, prio, 2); - if (IS_ERR(min_prio)) { - err = PTR_ERR(min_prio); - goto out_err; - } - } - - steering->fdb_sub_ns[chain] = ns; - } maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); if (IS_ERR(maj_prio)) { @@ -2846,3 +2980,160 @@ out: return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); + +static struct mlx5_flow_root_namespace +*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_namespace *ns; + + if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) + ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + else + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return NULL; + + return find_root(&ns->node); +} + +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_modify_hdr *modify_hdr; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL); + if (!modify_hdr) + return ERR_PTR(-ENOMEM); + + modify_hdr->ns_type = ns_type; + err = root->cmds->modify_header_alloc(root, ns_type, num_actions, + modify_actions, modify_hdr); + if (err) { + kfree(modify_hdr); + return ERR_PTR(err); + } + + return modify_hdr; +} +EXPORT_SYMBOL(mlx5_modify_header_alloc); + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, modify_hdr->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->modify_header_dealloc(root, modify_hdr); + kfree(modify_hdr); +} +EXPORT_SYMBOL(mlx5_modify_header_dealloc); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_flow_root_namespace *root; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL); + if (!pkt_reformat) + return ERR_PTR(-ENOMEM); + + pkt_reformat->ns_type = ns_type; + pkt_reformat->reformat_type = reformat_type; + err = root->cmds->packet_reformat_alloc(root, reformat_type, size, + reformat_data, ns_type, + pkt_reformat); + if (err) { + kfree(pkt_reformat); + return ERR_PTR(err); + } + + return pkt_reformat; +} +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); + +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, pkt_reformat->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->packet_reformat_dealloc(root, pkt_reformat); + kfree(pkt_reformat); +} +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + if (peer_ns && ns->mode != peer_ns->mode) { + mlx5_core_err(ns->dev, + "Can't peer namespace of different steering mode\n"); + return -EINVAL; + } + + return ns->cmds->set_peer(ns, peer_ns); +} + +/* This function should be called only at init stage of the namespace. + * It is not safe to call this function while steering operations + * are executed in the namespace. + */ +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode) +{ + struct mlx5_flow_root_namespace *root; + const struct mlx5_flow_cmds *cmds; + int err; + + root = find_root(&ns->node); + if (&root->ns != ns) + /* Can't set cmds to non root namespace */ + return -EINVAL; + + if (root->table_type != FS_FT_FDB) + return -EOPNOTSUPP; + + if (root->mode == mode) + return 0; + + if (mode == MLX5_FLOW_STEERING_MODE_SMFS) + cmds = mlx5_fs_cmd_get_dr_cmds(); + else + cmds = mlx5_fs_cmd_get_fw_cmds(); + if (!cmds) + return -EOPNOTSUPP; + + err = cmds->create_ns(root); + if (err) { + mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n", + err); + return err; + } + + root->cmds->destroy_ns(root); + root->cmds = cmds; + root->mode = mode; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index c1252d6be0ef..be5f5e32c1e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -37,6 +37,24 @@ #include <linux/mlx5/fs.h> #include <linux/rhashtable.h> #include <linux/llist.h> +#include <steering/fs_dr.h> + +struct mlx5_modify_hdr { + enum mlx5_flow_namespace_type ns_type; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +struct mlx5_pkt_reformat { + enum mlx5_flow_namespace_type ns_type; + int reformat_type; /* from mlx5_ifc */ + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; /* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, * and those are in parallel to one another when going over them to connect @@ -80,9 +98,15 @@ enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +enum mlx5_flow_steering_mode { + MLX5_FLOW_STEERING_MODE_DMFS, + MLX5_FLOW_STEERING_MODE_SMFS +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; - struct kmem_cache *fgs_cache; + enum mlx5_flow_steering_mode mode; + struct kmem_cache *fgs_cache; struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; @@ -128,6 +152,7 @@ struct mlx5_flow_handle { /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; + struct mlx5_fs_dr_table fs_dr_table; u32 id; u16 vport; unsigned int max_fte; @@ -137,7 +162,9 @@ struct mlx5_flow_table { struct { bool active; unsigned int required_groups; + unsigned int group_size; unsigned int num_groups; + unsigned int max_fte; } autogroup; /* Protect fwd_rules */ struct mutex lock; @@ -145,6 +172,7 @@ struct mlx5_flow_table { struct list_head fwd_rules; u32 flags; struct rhltable fgs_hash; + enum mlx5_flow_table_miss_action def_miss_action; }; struct mlx5_ft_underlay_qp { @@ -167,6 +195,7 @@ struct mlx5_ft_underlay_qp { /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; + struct mlx5_fs_dr_rule fs_dr_rule; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; @@ -191,6 +220,7 @@ struct fs_prio { struct mlx5_flow_namespace { /* parent == NULL => root ns */ struct fs_node node; + enum mlx5_flow_table_miss_action def_miss_action; }; struct mlx5_flow_group_mask { @@ -201,6 +231,7 @@ struct mlx5_flow_group_mask { /* Type of children is fs_fte */ struct mlx5_flow_group { struct fs_node node; + struct mlx5_fs_dr_matcher fs_dr_matcher; struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; @@ -212,6 +243,8 @@ struct mlx5_flow_group { struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; + enum mlx5_flow_steering_mode mode; + struct mlx5_fs_dr_domain fs_dr_domain; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; struct mlx5_flow_table *root_ft; @@ -219,7 +252,6 @@ struct mlx5_flow_root_namespace { struct mutex chain_lock; struct list_head underlay_qpns; const struct mlx5_flow_cmds *cmds; - enum mlx5_flow_table_miss_action def_miss_action; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); @@ -230,6 +262,14 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, unsigned long interval); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode); + int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 1834d9f3aa1c..ab69effb056d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -40,6 +40,8 @@ #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FC_POOL_USED_BUFF_RATIO 10 struct mlx5_fc_cache { u64 packets; @@ -58,12 +60,18 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; + struct mlx5_fc_bulk *bulk; u32 id; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad @@ -75,7 +83,7 @@ struct mlx5_fc { * access to counter list: * - create (user context) * - mlx5_fc_create() only adds to an addlist to be used by - * mlx5_fc_stats_query_work(). addlist is a lockless single linked list + * mlx5_fc_stats_work(). addlist is a lockless single linked list * that doesn't require any additional synchronization when adding single * node. * - spawn thread to do the actual destroy @@ -136,81 +144,87 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, spin_unlock(&fc_stats->counters_idr_lock); } -/* The function returns the last counter that was queried so the caller - * function can continue calling it till all counters are queried. - */ -static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev, - struct mlx5_fc *first, - u32 last_id) +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - struct mlx5_fc *counter = NULL; - struct mlx5_cmd_fc_bulk *b; - bool more = false; - u32 afirst_id; - int num; - int err; + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} - int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, - (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +static void update_counter_cache(int index, u32 *bulk_raw_data, + struct mlx5_fc_cache *cache) +{ + void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data, + flow_statistics[index]); + u64 packets = MLX5_GET64(traffic_counter, stats, packets); + u64 bytes = MLX5_GET64(traffic_counter, stats, octets); - /* first id must be aligned to 4 when using bulk query */ - afirst_id = first->id & ~0x3; + if (cache->packets == packets) + return; - /* number of counters to query inc. the last counter */ - num = ALIGN(last_id - afirst_id + 1, 4); - if (num > max_bulk) { - num = max_bulk; - last_id = afirst_id + num - 1; - } + cache->packets = packets; + cache->bytes = bytes; + cache->lastuse = jiffies; +} - b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); - if (!b) { - mlx5_core_err(dev, "Error allocating resources for bulk query\n"); - return NULL; - } +static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + bool query_more_counters = (first->id <= last_id); + int max_bulk_len = get_max_bulk_query_len(dev); + u32 *data = fc_stats->bulk_query_out; + struct mlx5_fc *counter = first; + u32 bulk_base_id; + int bulk_len; + int err; - err = mlx5_cmd_fc_bulk_query(dev, b); - if (err) { - mlx5_core_err(dev, "Error doing bulk query: %d\n", err); - goto out; - } + while (query_more_counters) { + /* first id must be aligned to 4 when using bulk query */ + bulk_base_id = counter->id & ~0x3; - counter = first; - list_for_each_entry_from(counter, &fc_stats->counters, list) { - struct mlx5_fc_cache *c = &counter->cache; - u64 packets; - u64 bytes; + /* number of counters to query inc. the last counter */ + bulk_len = min_t(int, max_bulk_len, + ALIGN(last_id - bulk_base_id + 1, 4)); - if (counter->id > last_id) { - more = true; - break; + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, + data); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + return; } + query_more_counters = false; - mlx5_cmd_fc_bulk_get(dev, b, - counter->id, &packets, &bytes); + list_for_each_entry_from(counter, &fc_stats->counters, list) { + int counter_index = counter->id - bulk_base_id; + struct mlx5_fc_cache *cache = &counter->cache; - if (c->packets == packets) - continue; + if (counter->id >= bulk_base_id + bulk_len) { + query_more_counters = true; + break; + } - c->packets = packets; - c->bytes = bytes; - c->lastuse = jiffies; + update_counter_cache(counter_index, data, cache); + } } - -out: - mlx5_cmd_fc_bulk_free(b); - - return more ? counter : NULL; } -static void mlx5_free_fc(struct mlx5_core_dev *dev, - struct mlx5_fc *counter) +static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } +static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (counter->bulk) + mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); + else + mlx5_fc_free(dev, counter); +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -234,7 +248,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) llist_for_each_entry_safe(counter, tmp, dellist, dellist) { mlx5_fc_stats_remove(dev, counter); - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } if (time_before(now, fc_stats->next_query) || @@ -244,32 +258,62 @@ static void mlx5_fc_stats_work(struct work_struct *work) counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, list); - while (counter) - counter = mlx5_fc_stats_query(dev, counter, last->id); + if (counter) + mlx5_fc_stats_query_counter_range(dev, counter, last->id); fc_stats->next_query = now + fc_stats->sampling_interval; } -struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc *counter; int err; counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&counter->list); err = mlx5_cmd_fc_alloc(dev, &counter->id); - if (err) - goto err_out; + if (err) { + kfree(counter); + return ERR_PTR(err); + } + + return counter; +} + +static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + + if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { + counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool); + if (!IS_ERR(counter)) + return counter; + } + + return mlx5_fc_single_alloc(dev); +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int err; + + if (IS_ERR(counter)) + return counter; + + INIT_LIST_HEAD(&counter->list); + counter->aging = aging; if (aging) { u32 id = counter->id; counter->cache.lastuse = jiffies; - counter->aging = true; + counter->lastbytes = counter->cache.bytes; + counter->lastpackets = counter->cache.packets; idr_preload(GFP_KERNEL); spin_lock(&fc_stats->counters_idr_lock); @@ -290,10 +334,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) return counter; err_out_alloc: - mlx5_cmd_fc_free(dev, counter->id); -err_out: - kfree(counter); - + mlx5_fc_release(dev, counter); return ERR_PTR(err); } EXPORT_SYMBOL(mlx5_fc_create); @@ -317,13 +358,15 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) return; } - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len; + int max_out_len; spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); @@ -331,14 +374,25 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_llist_head(&fc_stats->addlist); init_llist_head(&fc_stats->dellist); + max_bulk_len = get_max_bulk_query_len(dev); + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL); + if (!fc_stats->bulk_query_out) + return -ENOMEM; + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) - return -ENOMEM; + goto err_wq_create; fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + mlx5_fc_pool_init(&fc_stats->fc_pool, dev); return 0; + +err_wq_create: + kfree(fc_stats->bulk_query_out); + return -ENOMEM; } void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) @@ -352,14 +406,16 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - idr_destroy(&fc_stats->counters_idr); - tmplist = llist_del_all(&fc_stats->addlist); llist_for_each_entry_safe(counter, tmp, tmplist, addlist) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); + + mlx5_fc_pool_cleanup(&fc_stats->fc_pool); + idr_destroy(&fc_stats->counters_idr); + kfree(fc_stats->bulk_query_out); } int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, @@ -406,3 +462,243 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, fc_stats->sampling_interval = min_t(unsigned long, interval, fc_stats->sampling_interval); } + +/* Flow counter bluks */ + +struct mlx5_fc_bulk { + struct list_head pool_list; + u32 base_id; + int bulk_len; + unsigned long *bitmask; + struct mlx5_fc fcs[0]; +}; + +static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, + u32 id) +{ + counter->bulk = bulk; + counter->id = id; +} + +static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +{ + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; + struct mlx5_fc_bulk *bulk; + int err = -ENOMEM; + int bulk_len; + u32 base_id; + int i; + + alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); + bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; + + bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), + GFP_KERNEL); + if (!bulk) + goto err_alloc_bulk; + + bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!bulk->bitmask) + goto err_alloc_bitmask; + + err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); + if (err) + goto err_mlx5_cmd_bulk_alloc; + + bulk->base_id = base_id; + bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) { + mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); + set_bit(i, bulk->bitmask); + } + + return bulk; + +err_mlx5_cmd_bulk_alloc: + kfree(bulk->bitmask); +err_alloc_bitmask: + kfree(bulk); +err_alloc_bulk: + return ERR_PTR(err); +} + +static int +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +{ + if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); + return -EBUSY; + } + + mlx5_cmd_fc_free(dev, bulk->base_id); + kfree(bulk->bitmask); + kfree(bulk); + + return 0; +} + +static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +{ + int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); + + if (free_fc_index >= bulk->bulk_len) + return ERR_PTR(-ENOSPC); + + clear_bit(free_fc_index, bulk->bitmask); + return &bulk->fcs[free_fc_index]; +} + +static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +{ + int fc_index = fc->id - bulk->base_id; + + if (test_bit(fc_index, bulk->bitmask)) + return -EINVAL; + + set_bit(fc_index, bulk->bitmask); + return 0; +} + +/* Flow counters pool API */ + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) +{ + fc_pool->dev = dev; + mutex_init(&fc_pool->pool_lock); + INIT_LIST_HEAD(&fc_pool->fully_used); + INIT_LIST_HEAD(&fc_pool->partially_used); + INIT_LIST_HEAD(&fc_pool->unused); + fc_pool->available_fcs = 0; + fc_pool->used_fcs = 0; + fc_pool->threshold = 0; +} + +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); +} + +static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +{ + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); +} + +static struct mlx5_fc_bulk * +mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *new_bulk; + + new_bulk = mlx5_fc_bulk_create(dev); + if (!IS_ERR(new_bulk)) + fc_pool->available_fcs += new_bulk->bulk_len; + mlx5_fc_pool_update_threshold(fc_pool); + return new_bulk; +} + +static void +mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + + fc_pool->available_fcs -= bulk->bulk_len; + mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fc_pool_update_threshold(fc_pool); +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk) +{ + struct mlx5_fc_bulk *bulk; + struct mlx5_fc *fc; + + if (list_empty(src_list)) + return ERR_PTR(-ENODATA); + + bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); + fc = mlx5_fc_bulk_acquire_fc(bulk); + if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) + list_move(&bulk->pool_list, next_list); + return fc; +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_fc_bulk *new_bulk; + struct mlx5_fc *fc; + + mutex_lock(&fc_pool->pool_lock); + + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, + &fc_pool->fully_used, false); + if (IS_ERR(fc)) + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, + &fc_pool->partially_used, + true); + if (IS_ERR(fc)) { + new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); + if (IS_ERR(new_bulk)) { + fc = ERR_CAST(new_bulk); + goto out; + } + fc = mlx5_fc_bulk_acquire_fc(new_bulk); + list_add(&new_bulk->pool_list, &fc_pool->partially_used); + } + fc_pool->available_fcs--; + fc_pool->used_fcs++; + +out: + mutex_unlock(&fc_pool->pool_lock); + return fc; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk = fc->bulk; + int bulk_free_fcs_amount; + + mutex_lock(&fc_pool->pool_lock); + + if (mlx5_fc_bulk_release_fc(bulk, fc)) { + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); + goto unlock; + } + + fc_pool->available_fcs++; + fc_pool->used_fcs--; + + bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); + if (bulk_free_fcs_amount == 1) + list_move_tail(&bulk->pool_list, &fc_pool->partially_used); + if (bulk_free_fcs_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fc_pool->available_fcs > fc_pool->threshold) + mlx5_fc_pool_free_bulk(fc_pool, bulk); + else + list_add(&bulk->pool_list, &fc_pool->unused); + } + +unlock: + mutex_unlock(&fc_pool->pool_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index a19790dee7b2..909a7f284614 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -131,11 +131,11 @@ static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) MLX5_PCAM_REGS_5000_TO_507F); } -static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) +static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev, + enum mlx5_mcam_reg_groups group) { - return mlx5_query_mcam_reg(dev, dev->caps.mcam, - MLX5_MCAM_FEATURE_ENHANCED_FEATURES, - MLX5_MCAM_REGS_FIRST_128); + return mlx5_query_mcam_reg(dev, dev->caps.mcam[group], + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group); } static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) @@ -221,8 +221,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pcam_reg)) mlx5_get_pcam_reg(dev); - if (MLX5_CAP_GEN(dev, mcam_reg)) - mlx5_get_mcam_reg(dev); + if (MLX5_CAP_GEN(dev, mcam_reg)) { + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F); + } if (MLX5_CAP_GEN(dev, qcam_reg)) mlx5_get_qcam_reg(dev); @@ -239,12 +242,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, tls)) { + if (MLX5_CAP_GEN(dev, tls_tx)) { err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); if (err) return err; } + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9314777d99e3..d9f4e8c59c1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -390,7 +390,8 @@ static void print_health_info(struct mlx5_core_dev *dev) static int mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); struct mlx5_core_health *health = &dev->priv.health; @@ -491,7 +492,8 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, static int mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); int err; @@ -545,23 +547,22 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { static int mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) + void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); return mlx5_health_try_recover(dev); } -#define MLX5_CR_DUMP_CHUNK_SIZE 256 static int mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, void *priv_ctx) + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); u32 crdump_size = dev->priv.health.crdump_size; u32 *cr_data; - u32 data_size; - u32 offset; int err; if (!mlx5_core_is_pf(dev)) @@ -572,7 +573,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, return -ENOMEM; err = mlx5_crdump_collect(dev, cr_data); if (err) - return err; + goto free_data; if (priv_ctx) { struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; @@ -582,19 +583,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, goto free_data; } - err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data"); - if (err) - goto free_data; - for (offset = 0; offset < crdump_size; offset += data_size) { - if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE) - data_size = crdump_size - offset; - else - data_size = MLX5_CR_DUMP_CHUNK_SIZE; - err = devlink_fmsg_binary_put(fmsg, cr_data, data_size); - if (err) - goto free_data; - } - err = devlink_fmsg_arr_pair_nest_end(fmsg); + err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size); free_data: kvfree(cr_data); @@ -700,6 +689,16 @@ static void poll_health(struct timer_list *t) if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto out; + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; + print_health_info(dev); + mlx5_trigger_health_work(dev); + goto out; + } + count = ioread32be(health->health_counter); if (count == health->prev) ++health->miss_counter; @@ -718,15 +717,6 @@ static void poll_health(struct timer_list *t) if (health->synd && health->synd != prev_synd) queue_work(health->wq, &health->report_work); - fatal_error = check_fatal_sensors(dev); - - if (fatal_error && !health->fatal_error) { - mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); - dev->priv.health.fatal_error = fatal_error; - print_health_info(dev); - mlx5_trigger_health_work(dev); - } - out: mod_timer(&health->timer, get_next_poll_jiffies()); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 1a2560e3bf7c..56078b23f1a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,8 +87,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, - priv->max_nch, netdev->mtu); + mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params, + netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5e_timestamp_init(priv); @@ -279,7 +279,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]); + err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0][0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); goto err_destroy_underlay_qp; @@ -296,7 +296,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; - mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]); mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); } @@ -419,6 +419,28 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) mlx5e_destroy_q_counters(priv); } +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5i_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +}; + +static unsigned int mlx5i_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5i_stats_grps); +} + static const struct mlx5e_profile mlx5i_nic_profile = { .init = mlx5i_init, .cleanup = mlx5i_cleanup, @@ -435,6 +457,8 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ .max_tc = MLX5I_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + .stats_grps = mlx5i_stats_grps, + .stats_grps_num = mlx5i_stats_grps_num, }; /* mlx5i netdev NDos */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index c5a491e22e55..96e64187c089 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) goto err_unint_underlay_qp; } - err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]); + err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0][0]); if (err) { mlx5_core_warn(mdev, "create child tis failed, %d\n", err); goto err_remove_rx_uderlay_qp; @@ -228,7 +228,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) return 0; err_clear_state_opened_flag: - mlx5e_destroy_tis(mdev, epriv->tisn[0]); + mlx5e_destroy_tis(mdev, epriv->tisn[0][0]); err_remove_rx_uderlay_qp: mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); err_unint_underlay_qp: @@ -257,7 +257,7 @@ static int mlx5i_pkey_close(struct net_device *netdev) mlx5i_uninit_underlay_qp(priv); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); - mlx5e_destroy_tis(mdev, priv->tisn[0]); + mlx5e_destroy_tis(mdev, priv->tisn[0][0]); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index c5ef2ff26465..b91eabc09fbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -145,34 +145,35 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, { *port1 = 1; *port2 = 2; - if (!tracker->netdev_state[0].tx_enabled || - !tracker->netdev_state[0].link_up) { + if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled || + !tracker->netdev_state[MLX5_LAG_P1].link_up) { *port1 = 2; return; } - if (!tracker->netdev_state[1].tx_enabled || - !tracker->netdev_state[1].link_up) + if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled || + !tracker->netdev_state[MLX5_LAG_P2].link_up) *port2 = 1; } void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { - struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; u8 v2p_port1, v2p_port2; int err; mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); - if (v2p_port1 != ldev->v2p_map[0] || - v2p_port2 != ldev->v2p_map[1]) { - ldev->v2p_map[0] = v2p_port1; - ldev->v2p_map[1] = v2p_port2; + if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || + v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { + ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; + ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", - ldev->v2p_map[0], ldev->v2p_map[1]); + ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2]); err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); if (err) @@ -185,16 +186,17 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, static int mlx5_create_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { - struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], - &ldev->v2p_map[1]); + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], + &ldev->v2p_map[MLX5_LAG_P2]); mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", - ldev->v2p_map[0], ldev->v2p_map[1]); + ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2]); if (err) mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -207,7 +209,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, u8 flags) { bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); - struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; err = mlx5_create_lag(ldev, tracker); @@ -229,7 +231,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; bool roce_lag = __mlx5_lag_is_roce(ldev); int err; @@ -252,14 +254,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[0].dev || !ldev->pf[1].dev) + if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) return false; #ifdef CONFIG_MLX5_ESWITCH - return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev); + return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); #else - return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) && - !mlx5_sriov_is_enabled(ldev->pf[1].dev)); + return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && + !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); #endif } @@ -285,8 +288,8 @@ static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) static void mlx5_do_bond(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[0].dev; - struct mlx5_core_dev *dev1 = ldev->pf[1].dev; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; struct lag_tracker tracker; bool do_bond, roce_lag; int err; @@ -583,7 +586,8 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (!ldev->nb.notifier_call) { ldev->nb.notifier_call = mlx5_lag_netdev_event; - if (register_netdevice_notifier(&ldev->nb)) { + if (register_netdevice_notifier_dev_net(netdev, &ldev->nb, + &ldev->nn)) { ldev->nb.notifier_call = NULL; mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } @@ -596,7 +600,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } /* Must be called with intf_mutex held */ -void mlx5_lag_remove(struct mlx5_core_dev *dev) +void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev; int i; @@ -616,7 +620,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (i == MLX5_MAX_PORTS) { if (ldev->nb.notifier_call) - unregister_netdevice_notifier(&ldev->nb); + unregister_netdevice_notifier_dev_net(netdev, &ldev->nb, + &ldev->nn); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); @@ -692,10 +697,11 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - ndev = ldev->tracker.netdev_state[0].tx_enabled ? - ldev->pf[0].netdev : ldev->pf[1].netdev; + ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? + ldev->pf[MLX5_LAG_P1].netdev : + ldev->pf[MLX5_LAG_P2].netdev; } else { - ndev = ldev->pf[0].netdev; + ndev = ldev->pf[MLX5_LAG_P1].netdev; } if (ndev) dev_hold(ndev); @@ -717,7 +723,8 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || + ldev->pf[MLX5_LAG_P1].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -746,11 +753,11 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ldev = mlx5_lag_dev_get(dev); if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; - mdev[0] = ldev->pf[0].dev; - mdev[1] = ldev->pf[1].dev; + mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; + mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; } else { num_ports = 1; - mdev[0] = dev; + mdev[MLX5_LAG_P1] = dev; } for (i = 0; i < num_ports; ++i) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index 1dea0b1c9826..316ab09e2664 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -8,6 +8,11 @@ #include "lag_mp.h" enum { + MLX5_LAG_P1, + MLX5_LAG_P2, +}; + +enum { MLX5_LAG_FLAG_ROCE = 1 << 0, MLX5_LAG_FLAG_SRIOV = 1 << 1, MLX5_LAG_FLAG_MULTIPATH = 1 << 2, @@ -39,6 +44,7 @@ struct mlx5_lag { struct workqueue_struct *wq; struct delayed_work bond_work; struct notifier_block nb; + struct netdev_net_notifier nn; struct lag_mp lag_mp; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index e69766393990..416676c35b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -11,10 +11,11 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[0].dev || !ldev->pf[1].dev) + if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) return false; - return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev); + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); } static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) @@ -43,7 +44,8 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) * 2 - set affinity to port 2. * **/ -static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port) +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, + enum mlx5_lag_port_affinity port) { struct lag_tracker tracker; @@ -51,37 +53,37 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port) return; switch (port) { - case 0: - tracker.netdev_state[0].tx_enabled = true; - tracker.netdev_state[1].tx_enabled = true; - tracker.netdev_state[0].link_up = true; - tracker.netdev_state[1].link_up = true; + case MLX5_LAG_NORMAL_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; break; - case 1: - tracker.netdev_state[0].tx_enabled = true; - tracker.netdev_state[0].link_up = true; - tracker.netdev_state[1].tx_enabled = false; - tracker.netdev_state[1].link_up = false; + case MLX5_LAG_P1_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P2].link_up = false; break; - case 2: - tracker.netdev_state[0].tx_enabled = false; - tracker.netdev_state[0].link_up = false; - tracker.netdev_state[1].tx_enabled = true; - tracker.netdev_state[1].link_up = true; + case MLX5_LAG_P2_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P1].link_up = false; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; break; default: - mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d", - port); + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Invalid affinity port %d", port); return; } - if (tracker.netdev_state[0].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events, + if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); - if (tracker.netdev_state[1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events, + if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); @@ -141,11 +143,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Verify next hops are ports of the same hca */ fib_nh0 = fib_info_nh(fi, 0); fib_nh1 = fib_info_nh(fi, 1); - if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev && - fib_nh1->fib_nh_dev == ldev->pf[1].netdev) && - !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev && - fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) { - mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); + if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Multipath offload require two ports of the same HCA\n"); return; } @@ -157,7 +160,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); } - mlx5_lag_set_port_affinity(ldev, 0); + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); mp->mfi = fi; } @@ -182,7 +185,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, } } else if (event == FIB_EVENT_NH_ADD && fib_info_num_path(fi) == 2) { - mlx5_lag_set_port_affinity(ldev, 0); + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); } } @@ -197,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work) rtnl_lock(); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, fib_work->fen_info.fi); @@ -256,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, switch (event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -267,8 +266,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, return notifier_from_errno(-EINVAL); } fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; - if (fib_dev != ldev->pf[0].netdev && - fib_dev != ldev->pf[1].netdev) { + if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && + fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { return NOTIFY_DONE; } fib_work = mlx5_lag_init_fib_work(ldev, event); @@ -308,8 +307,8 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) return 0; mp->fib_nb.notifier_call = mlx5_lag_fib_event; - err = register_fib_notifier(&mp->fib_nb, - mlx5_lag_fib_event_flush); + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); if (err) mp->fib_nb.notifier_call = NULL; @@ -323,6 +322,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) if (!mp->fib_nb.notifier_call) return; - unregister_fib_notifier(&mp->fib_nb); + unregister_fib_notifier(&init_net, &mp->fib_nb); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h index 6d14b1100be9..79be89e9c7a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -7,6 +7,12 @@ #include "lag.h" #include "mlx5_core.h" +enum mlx5_lag_port_affinity { + MLX5_LAG_NORMAL_AFFINITY, + MLX5_LAG_P1_AFFINITY, + MLX5_LAG_P2_AFFINITY, +}; + struct lag_mp { struct notifier_block fib_nb; struct fib_info *mfi; /* used in tracking fib events */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile deleted file mode 100644 index c78512eed8d7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0059b290e095..43f97601b500 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -236,6 +236,19 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + if (rq->extts.index >= clock->ptp_info.n_pins) return -EINVAL; @@ -290,6 +303,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (!MLX5_PPS_CAP(mdev)) return -EOPNOTSUPP; + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index ea1d4d26ece0..3fc575d1c3ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -2,6 +2,7 @@ // Copyright (c) 2019 Mellanox Technologies. #include "mlx5_core.h" +#include "lib/mlx5.h" int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, void *key, u32 sz_bytes, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c new file mode 100644 index 000000000000..e065c2f68f5a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_dm { + /* protect access to icm bitmask */ + spinlock_t lock; + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; +}; + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) +{ + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; + struct mlx5_dm *dm; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) + return 0; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&dm->lock); + + if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->steering_sw_icm_alloc_blocks) + goto err_steering; + } + + if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->header_modify_sw_icm_alloc_blocks) + goto err_modify_hdr; + } + + return dm; + +err_modify_hdr: + kfree(dm->steering_sw_icm_alloc_blocks); + +err_steering: + kfree(dm); + + return ERR_PTR(-ENOMEM); +} + +void mlx5_dm_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_dm *dm = dev->dm; + + if (!dev->dm) + return; + + if (dm->steering_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->steering_sw_icm_alloc_blocks); + } + + if (dm->header_modify_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->header_modify_sw_icm_alloc_blocks); + } + + kfree(dm); +} + +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + if (!dev->dm) + return -EOPNOTSUPP; + + if (!length || (length & (length - 1)) || + length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + if (!block_map) + return -EOPNOTSUPP; + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc); + +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u64 start_idx; + int err; + + if (!dev->dm) + return -EOPNOTSUPP; + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 3dfab91ae5f2..4be4d2d36218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -87,7 +87,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); /* This function should only be called after mlx5_cmd_force_teardown_hca */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c new file mode 100644 index 000000000000..583dc7e2aca8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" + +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len, + int offset, bool read) +{ + int rc = -EOPNOTSUPP; + int bytes_returned; + int block_id; + + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX; + + rc = read ? + hyperv_read_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id, + &bytes_returned) : + hyperv_write_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id); + + /* Make sure len bytes were read successfully */ + if (read && !rc && len != bytes_returned) + rc = -EIO; + + if (rc) { + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n", + read ? "read" : "write", rc, len, + offset); + return rc; + } + + return 0; +} + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, true); +} + +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, false); +} + +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + return hyperv_reg_block_invalidate(dev->pdev, context, + block_invalidate); +} + +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev) +{ + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h new file mode 100644 index 000000000000..f9a45573f459 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_H__ +#define __LIB_HV_H__ + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +#include <linux/hyperv.h> +#include <linux/mlx5/driver.h> + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev); +#endif + +#endif /* __LIB_HV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c new file mode 100644 index 000000000000..4047629a876b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" +#include "lib/hv_vhca.h" + +struct mlx5_hv_vhca { + struct mlx5_core_dev *dev; + struct workqueue_struct *work_queue; + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX]; + struct mutex agents_lock; /* Protect agents array */ +}; + +struct mlx5_hv_vhca_work { + struct work_struct invalidate_work; + struct mlx5_hv_vhca *hv_vhca; + u64 block_mask; +}; + +struct mlx5_hv_vhca_data_block { + u16 sequence; + u16 offset; + u8 reserved[4]; + u64 data[15]; +}; + +struct mlx5_hv_vhca_agent { + enum mlx5_hv_vhca_agent_type type; + struct mlx5_hv_vhca *hv_vhca; + void *priv; + u16 seq; + void (*control)(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block); + void (*invalidate)(struct mlx5_hv_vhca_agent *agent, + u64 block_mask); + void (*cleanup)(struct mlx5_hv_vhca_agent *agent); +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + struct mlx5_hv_vhca *hv_vhca = NULL; + + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); + if (!hv_vhca) + return ERR_PTR(-ENOMEM); + + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca"); + if (!hv_vhca->work_queue) { + kfree(hv_vhca); + return ERR_PTR(-ENOMEM); + } + + hv_vhca->dev = dev; + mutex_init(&hv_vhca->agents_lock); + + return hv_vhca; +} + +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + destroy_workqueue(hv_vhca->work_queue); + kfree(hv_vhca); +} + +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work) +{ + struct mlx5_hv_vhca_work *hwork; + struct mlx5_hv_vhca *hv_vhca; + int i; + + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work); + hv_vhca = hwork->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->invalidate) + continue; + + if (!(BIT(agent->type) & hwork->block_mask)) + continue; + + agent->invalidate(agent, hwork->block_mask); + } + mutex_unlock(&hv_vhca->agents_lock); + + kfree(hwork); +} + +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context; + struct mlx5_hv_vhca_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work); + work->hv_vhca = hv_vhca; + work->block_mask = block_mask; + + queue_work(hv_vhca->work_queue, &work->invalidate_work); +} + +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */) + +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca, + struct mlx5_hv_vhca_control_block *block) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->control) + continue; + + if (!(AGENT_MASK(agent->type) & block->control)) + continue; + + agent->control(agent, block); + } +} + +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca, + u32 *capabilities) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (agent) + *capabilities |= AGENT_MASK(agent->type); + } +} + +static void +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent, + u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + struct mlx5_core_dev *dev = hv_vhca->dev; + struct mlx5_hv_vhca_control_block *block; + u32 capabilities = 0; + int err; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return; + + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0); + if (err) + goto free_block; + + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities); + + /* In case no capabilities, send empty block in return */ + if (!capabilities) { + memset(block, 0, sizeof(*block)); + goto write; + } + + if (block->capabilities != capabilities) + block->capabilities = capabilities; + + if (block->control & ~capabilities) + goto free_block; + + mlx5_hv_vhca_agents_control(hv_vhca, block); + block->command_ack = block->command; + +write: + mlx5_hv_write_config(dev, block, sizeof(*block), 0); + +free_block: + kfree(block); +} + +static struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca) +{ + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL, + NULL, + mlx5_hv_vhca_control_agent_invalidate, + NULL, NULL); +} + +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + mlx5_hv_vhca_agent_destroy(agent); +} + +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int err; + + if (IS_ERR_OR_NULL(hv_vhca)) + return IS_ERR_OR_NULL(hv_vhca); + + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca, + mlx5_hv_vhca_invalidate); + if (err) + return err; + + agent = mlx5_hv_vhca_control_agent_create(hv_vhca); + if (IS_ERR_OR_NULL(agent)) { + mlx5_hv_unregister_invalidate(hv_vhca->dev); + return IS_ERR_OR_NULL(agent); + } + + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent; + + return 0; +} + +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int i; + + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL]; + if (agent) + mlx5_hv_vhca_control_agent_destroy(agent); + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) + WARN_ON(hv_vhca->agents[i]); + + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_unregister_invalidate(hv_vhca->dev); +} + +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca) +{ + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL)); +} + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleaup)(struct mlx5_hv_vhca_agent *agent), + void *priv) +{ + struct mlx5_hv_vhca_agent *agent; + + if (IS_ERR_OR_NULL(hv_vhca)) + return ERR_PTR(-ENOMEM); + + if (type >= MLX5_HV_VHCA_AGENT_MAX) + return ERR_PTR(-EINVAL); + + mutex_lock(&hv_vhca->agents_lock); + if (hv_vhca->agents[type]) { + mutex_unlock(&hv_vhca->agents_lock); + return ERR_PTR(-EINVAL); + } + mutex_unlock(&hv_vhca->agents_lock); + + agent = kzalloc(sizeof(*agent), GFP_KERNEL); + if (!agent) + return ERR_PTR(-ENOMEM); + + agent->type = type; + agent->hv_vhca = hv_vhca; + agent->priv = priv; + agent->control = control; + agent->invalidate = invalidate; + agent->cleanup = cleaup; + + mutex_lock(&hv_vhca->agents_lock); + hv_vhca->agents[type] = agent; + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_vhca_agents_update(hv_vhca); + + return agent; +} + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + + if (WARN_ON(agent != hv_vhca->agents[agent->type])) { + mutex_unlock(&hv_vhca->agents_lock); + return; + } + + hv_vhca->agents[agent->type] = NULL; + mutex_unlock(&hv_vhca->agents_lock); + + if (agent->cleanup) + agent->cleanup(agent); + + kfree(agent); + + mlx5_hv_vhca_agents_update(hv_vhca); +} + +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_data_block *data_block, + void *src, int len, int *offset) +{ + int bytes = min_t(int, (int)sizeof(data_block->data), len); + + data_block->sequence = agent->seq; + data_block->offset = (*offset)++; + memcpy(data_block->data, src, bytes); + + return bytes; +} + +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent) +{ + agent->seq++; +} + +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX; + int block_offset = 0; + int total = 0; + int err; + + while (len) { + struct mlx5_hv_vhca_data_block data_block = {0}; + int bytes; + + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block, + buf + total, + len, &block_offset); + if (!bytes) + return -ENOMEM; + + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block, + sizeof(data_block), offset); + if (err) + return err; + + total += bytes; + len -= bytes; + } + + mlx5_hv_vhca_agent_seq_update(agent); + + return 0; +} + +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent) +{ + return agent->priv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h new file mode 100644 index 000000000000..4bad6a5fde56 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_VHCA_H__ +#define __LIB_HV_VHCA_H__ + +#include "en.h" +#include "lib/hv.h" + +struct mlx5_hv_vhca_agent; +struct mlx5_hv_vhca; +struct mlx5_hv_vhca_control_block; + +enum mlx5_hv_vhca_agent_type { + MLX5_HV_VHCA_AGENT_CONTROL = 0, + MLX5_HV_VHCA_AGENT_STATS = 1, + MLX5_HV_VHCA_AGENT_MAX = 32, +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +struct mlx5_hv_vhca_control_block { + u32 capabilities; + u32 control; + u16 command; + u16 command_ack; + u16 version; + u16 rings; + u32 reserved1[28]; +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev); +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca); +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask); + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context); + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent); +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len); +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent); + +#else + +static inline struct mlx5_hv_vhca * +mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + return 0; +} + +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline void mlx5_hv_vhca_invalidate(void *context, + u64 block_mask) +{ +} + +static inline struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ +} + +static inline int +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + return 0; +} +#endif + +#endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index b99d469e4e64..249539247e2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -84,4 +84,9 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, void *key, u32 sz_bytes, u32 *p_key_id); void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index b9d4f4e19ff9..148b55c3db7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -32,6 +32,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/refcount.h> #include <linux/mlx5/driver.h> #include <net/vxlan.h> #include "mlx5_core.h" @@ -48,7 +49,7 @@ struct mlx5_vxlan { struct mlx5_vxlan_port { struct hlist_node hlist; - atomic_t refcount; + refcount_t refcount; u16 udp_port; }; @@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) vxlanp = mlx5_vxlan_lookup_port(vxlan, port); if (vxlanp) { - atomic_inc(&vxlanp->refcount); + refcount_inc(&vxlanp->refcount); return 0; } @@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) } vxlanp->udp_port = port; - atomic_set(&vxlanp->refcount, 1); + refcount_set(&vxlanp->refcount, 1); spin_lock_bh(&vxlan->lock); hash_add(vxlan->htable, &vxlanp->hlist, port); @@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) goto out_unlock; } - if (atomic_dec_and_test(&vxlanp->refcount)) { + if (refcount_dec_and_test(&vxlanp->refcount)) { hash_del(&vxlanp->hlist); remove = true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b15b27a497fc..f554cfddcf4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -69,6 +69,7 @@ #include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" +#include "lib/hv_vhca.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -495,6 +496,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.send); + ODP_CAP_SET_MAX(dev, dc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.write); + ODP_CAP_SET_MAX(dev, dc_odp_caps.read); + ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic); if (do_set) err = set_caps(dev, set_ctx, set_sz, @@ -826,16 +833,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eq_cleanup; } - err = mlx5_cq_debugfs_init(dev); - if (err) { - mlx5_core_err(dev, "failed to initialize cq debugfs\n"); - goto err_events_cleanup; - } + mlx5_cq_debugfs_init(dev); mlx5_init_qp_table(dev); - mlx5_init_mkey_table(dev); - mlx5_init_reserved_gids(dev); mlx5_init_clock(dev); @@ -873,7 +874,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) + mlx5_core_warn(dev, "Failed to init device memory%d\n", err); + dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); return 0; @@ -888,10 +894,8 @@ err_rl_cleanup: err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); - mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); -err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); @@ -905,7 +909,9 @@ err_devcom: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); + mlx5_dm_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); @@ -915,7 +921,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); - mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); @@ -1072,6 +1077,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fw_tracer; } + mlx5_hv_vhca_init(dev->hv_vhca); + err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); @@ -1127,6 +1134,7 @@ err_tls_start: err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: mlx5_eq_table_destroy(dev); @@ -1147,6 +1155,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); @@ -1155,7 +1164,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) +int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) { int err = 0; @@ -1184,6 +1193,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) if (err) goto err_load; + if (boot) { + err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + if (err) + goto err_devlink_reg; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1201,6 +1216,9 @@ out: return err; err_reg_dev: + if (boot) + mlx5_devlink_unregister(priv_to_devlink(dev)); +err_devlink_reg: mlx5_unload(dev); err_load: if (boot) @@ -1213,12 +1231,12 @@ function_teardown: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { - int err = 0; - - if (cleanup) + if (cleanup) { + mlx5_unregister_device(dev); mlx5_drain_health_wq(dev); + } mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1242,7 +1260,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) mlx5_function_teardown(dev, cleanup); out: mutex_unlock(&dev->intf_state_mutex); - return err; + return 0; } static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) @@ -1338,10 +1356,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) request_module_nowait(MLX5_IB_MOD); - err = mlx5_devlink_register(devlink, &pdev->dev); - if (err) - goto clean_load; - err = mlx5_crdump_enable(dev); if (err) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); @@ -1349,9 +1363,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_save_state(pdev); return 0; -clean_load: - mlx5_unload_one(dev, true); - err_load_one: mlx5_pci_close(dev); pci_init_err: @@ -1369,7 +1380,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - mlx5_unregister_device(dev); if (mlx5_unload_one(dev, true)) { mlx5_core_err(dev, "mlx5_unload_one failed\n"); @@ -1552,8 +1562,11 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ + { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ + { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 471bbc48bc1f..fcce9e0fc82c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -146,7 +146,7 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); void mlx5_cmd_flush(struct mlx5_core_dev *dev); -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, @@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, u8 feature_group, u8 access_reg_group); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); -void mlx5_lag_remove(struct mlx5_core_dev *dev); +void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev); int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); @@ -198,6 +198,9 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); +void mlx5_dm_cleanup(struct mlx5_core_dev *dev); + #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ MLX5_CAP_GEN((mdev), pps_modify) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ @@ -240,4 +243,7 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); + +int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); +int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9231b39d18b2..42cc3c7ac5b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,16 +36,6 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -void mlx5_init_mkey_table(struct mlx5_core_dev *dev) -{ - xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); -} - -void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) -{ - WARN_ON(!xa_empty(&dev->priv.mkey_table)); -} - int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, @@ -54,7 +44,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_async_work *context) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; - struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -84,16 +73,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - - err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); - if (err) { - mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", - mlx5_base_mkey(mkey->key), err); - mlx5_core_destroy_mkey(dev, mkey); - } - - return err; + return 0; } EXPORT_SYMBOL(mlx5_core_create_mkey_cb); @@ -111,18 +91,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, { u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; - struct xarray *mkeys = &dev->priv.mkey_table; - struct mlx5_core_mkey *deleted_mkey; - unsigned long flags; - - xa_lock_irqsave(mkeys, flags); - deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); - xa_unlock_irqrestore(mkeys, flags); - if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", - mlx5_base_mkey(mkey->key)); - return -ENOENT; - } MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b8ba74de9555..c3aea4cc2fff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -53,7 +53,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) common = radix_tree_lookup(&table->tree, rsn); if (common) - atomic_inc(&common->refcount); + refcount_inc(&common->refcount); spin_unlock_irqrestore(&table->lock, flags); @@ -62,7 +62,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common) { - if (atomic_dec_and_test(&common->refcount)) + if (refcount_dec_and_test(&common->refcount)) complete(&common->free); } @@ -162,7 +162,7 @@ static int rsc_event_notifier(struct notifier_block *nb, common = mlx5_get_rsc(table, rsn); if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + mlx5_core_dbg(dev, "Async event for unknown resource 0x%x\n", rsn); return NOTIFY_OK; } @@ -209,7 +209,7 @@ static int create_resource_common(struct mlx5_core_dev *dev, if (err) return err; - atomic_set(&qp->common.refcount, 1); + refcount_set(&qp->common.refcount, 1); init_completion(&qp->common.free); qp->pid = current->pid; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 17ce9dd56b13..0fc7de4aa572 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -14,9 +14,6 @@ static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) { struct mlx5_core_roce *roce = &dev->priv.roce; - if (!roce->ft) - return; - mlx5_del_flow_rules(roce->allow_rule); mlx5_destroy_flow_group(roce->fg); mlx5_destroy_flow_table(roce->ft); @@ -51,7 +48,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return -ENOMEM; } - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX); + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL); if (!ns) { mlx5_core_err(dev, "Failed to get RDMA RX namespace"); err = -EOPNOTSUPP; @@ -145,6 +142,11 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) { + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + mlx5_rdma_disable_roce_steering(dev); mlx5_rdma_del_roce_addr(dev); mlx5_nic_vport_disable_roce(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index bc86dffdc43c..01c380425f9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -188,8 +188,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, /* new rate limit */ err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl); if (err) { - mlx5_core_err(dev, "Failed configuring rate limit(err %d): \ - rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + mlx5_core_err(dev, "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", err, rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); goto out; @@ -218,8 +217,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) mutex_lock(&table->rl_lock); entry = find_rl_entry(table, rl); if (!entry || !entry->refcount) { - mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \ - are not configured\n", + mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n", rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 61fcfd8b39b4..03f037811f1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -108,10 +108,10 @@ enable_vfs_hca: return 0; } -static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) +static void +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int num_vfs = pci_num_vf(dev->pdev); int err; int vf; @@ -127,7 +127,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) } if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable(dev->priv.eswitch); + mlx5_eswitch_disable(dev->priv.eswitch, clear_vf); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); @@ -147,7 +147,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev); + mlx5_device_disable_sriov(dev, num_vfs, true); } return err; } @@ -155,9 +155,10 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) static void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int num_vfs = pci_num_vf(dev->pdev); pci_disable_sriov(pdev); - mlx5_device_disable_sriov(dev); + mlx5_device_disable_sriov(dev, num_vfs, true); } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) @@ -192,7 +193,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile index c78512eed8d7..c78512eed8d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c new file mode 100644 index 000000000000..6dec2a550a10 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -0,0 +1,1935 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +enum dr_action_domain { + DR_ACTION_DOMAIN_NIC_INGRESS, + DR_ACTION_DOMAIN_NIC_EGRESS, + DR_ACTION_DOMAIN_FDB_INGRESS, + DR_ACTION_DOMAIN_FDB_EGRESS, + DR_ACTION_DOMAIN_MAX, +}; + +enum dr_action_valid_state { + DR_ACTION_STATE_ERR, + DR_ACTION_STATE_NO_ACTION, + DR_ACTION_STATE_REFORMAT, + DR_ACTION_STATE_MODIFY_HDR, + DR_ACTION_STATE_MODIFY_VLAN, + DR_ACTION_STATE_NON_TERM, + DR_ACTION_STATE_TERM, + DR_ACTION_STATE_MAX, +}; + +static const enum dr_action_valid_state +next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = { + [DR_ACTION_DOMAIN_NIC_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_NIC_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, +}; + +struct dr_action_modify_field_conv { + u16 hw_field; + u8 start; + u8 end; + u8 l3_type; + u8 l4_type; +}; + +static const struct dr_action_modify_field_conv dr_action_conv_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15, + }, +}; + +#define MAX_VLANS 2 +struct dr_action_vlan_info { + int count; + u32 headers[MAX_VLANS]; +}; + +struct dr_action_apply_attr { + u32 modify_index; + u16 modify_actions; + u32 decap_index; + u16 decap_actions; + u8 decap_with_vlan:1; + u64 final_icm_addr; + u32 flow_tag; + u32 ctr_id; + u16 gvmi; + u16 hit_gvmi; + u32 reformat_id; + u32 reformat_size; + struct dr_action_vlan_info vlans; +}; + +static int +dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type, + enum mlx5dr_action_type *action_type) +{ + switch (reformat_type) { + case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L2_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L2; + break; + case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L3_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L3; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void dr_actions_init_next_ste(u8 **last_ste, + u32 *added_stes, + enum mlx5dr_ste_entry_type entry_type, + u16 gvmi) +{ + (*added_stes)++; + *last_ste += DR_STE_SIZE; + mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi); +} + +static void dr_actions_apply_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] || + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]; + + /* We want to make sure the modify header comes before L2 + * encapsulation. The reason for that is that we support + * modify headers for outer headers only + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_push_vlan(last_ste, + attr->vlans.headers[i], + encap); + } + } + + if (encap) { + /* Modify header and encapsulation require a different STEs. + * Since modify header STE format doesn't support encapsulation + * tunneling_action. + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] || + action_type_set[DR_ACTION_TYP_PUSH_VLAN]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_encap(last_ste, + attr->reformat_id, + attr->reformat_size, + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]); + /* Whenever prio_tag_required enabled, we can be sure that the + * previous table (ACL) already push vlan to our packet, + * And due to HW limitation we need to set this bit, otherwise + * push vlan + reformat will not work. + */ + if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required)) + mlx5dr_ste_set_go_back_bit(last_ste); + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); +} + +static void dr_actions_apply_rx(u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->decap_actions, + attr->decap_index); + } + + if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) + mlx5dr_ste_set_rx_decap(last_ste); + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || + action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] || + action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_set_rx_pop_vlan(last_ste); + } + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_MODIFY_PKT, + attr->gvmi); + else + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag); + } +} + +/* Apply the actions on the rule STE array starting from the last_ste. + * Actions might require more than one STE, new_num_stes will return + * the new size of the STEs array, rule with actions. + */ +static void dr_actions_apply(struct mlx5dr_domain *dmn, + enum mlx5dr_ste_entry_type ste_type, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *new_num_stes) +{ + u32 added_stes = 0; + + if (ste_type == MLX5DR_STE_TYPE_RX) + dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes); + else + dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes); + + last_ste += added_stes * DR_STE_SIZE; + *new_num_stes += added_stes; + + mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi); + mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static enum dr_action_domain +dr_action_get_action_domain(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type) +{ + switch (domain) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + return DR_ACTION_DOMAIN_NIC_INGRESS; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + return DR_ACTION_DOMAIN_NIC_EGRESS; + case MLX5DR_DOMAIN_TYPE_FDB: + if (ste_type == MLX5DR_STE_TYPE_RX) + return DR_ACTION_DOMAIN_FDB_INGRESS; + return DR_ACTION_DOMAIN_FDB_EGRESS; + default: + WARN_ON(true); + return DR_ACTION_DOMAIN_MAX; + } +} + +static +int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain, + u32 action_type, + u32 *state) +{ + u32 cur_state = *state; + + /* Check action state machine is valid */ + *state = next_action_state[action_domain][cur_state][action_type]; + + if (*state == DR_ACTION_STATE_ERR) + return -EOPNOTSUPP; + + return 0; +} + +static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, + struct mlx5dr_action *dest_action, + u64 *final_icm_addr) +{ + int ret; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_FT: + /* Allow destination flow table only if table is a terminating + * table, since there is an *assumption* that in such case FW + * will recalculate the CS. + */ + if (dest_action->dest_tbl.is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr; + } else { + mlx5dr_dbg(dmn, + "Destination FT should be terminating when modify TTL is used\n"); + return -EINVAL; + } + break; + + case DR_ACTION_TYP_VPORT: + /* If destination is vport we will get the FW flow table + * that recalculates the CS and forwards to the vport. + */ + ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn, + dest_action->vport.caps->num, + final_icm_addr); + if (ret) { + mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); + return ret; + } + break; + + default: + break; + } + + return 0; +} + +#define WITH_VLAN_NUM_HW_ACTIONS 6 + +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 action_type_set[DR_ACTION_TYP_MAX] = {}; + struct mlx5dr_action *dest_action = NULL; + u32 state = DR_ACTION_STATE_NO_ACTION; + struct dr_action_apply_attr attr = {}; + enum dr_action_domain action_domain; + bool recalc_cs_required = false; + u8 *last_ste; + int i, ret; + + attr.gvmi = dmn->info.caps.gvmi; + attr.hit_gvmi = dmn->info.caps.gvmi; + attr.final_icm_addr = nic_dmn->default_icm_addr; + action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type); + + for (i = 0; i < num_actions; i++) { + struct mlx5dr_action *action; + int max_actions_type = 1; + u32 action_type; + + action = actions[i]; + action_type = action->action_type; + + switch (action_type) { + case DR_ACTION_TYP_DROP: + attr.final_icm_addr = nic_dmn->drop_icm_addr; + break; + case DR_ACTION_TYP_FT: + dest_action = action; + if (!action->dest_tbl.is_fw_tbl) { + if (action->dest_tbl.tbl->dmn != dmn) { + mlx5dr_dbg(dmn, + "Destination table belongs to a different domain\n"); + goto out_invalid_arg; + } + if (action->dest_tbl.tbl->level <= matcher->tbl->level) { + mlx5_core_warn_once(dmn->mdev, + "Connecting table to a lower/same level destination table\n"); + mlx5dr_dbg(dmn, + "Connecting table at level %d to a destination table at level %d\n", + matcher->tbl->level, + action->dest_tbl.tbl->level); + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr; + } else { + struct mlx5dr_cmd_query_flow_table_details output; + int ret; + + /* get the relevant addresses */ + if (!action->dest_tbl.fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(dmn->mdev, + action->dest_tbl.fw_tbl.type, + action->dest_tbl.fw_tbl.id, + &output); + if (!ret) { + action->dest_tbl.fw_tbl.tx_icm_addr = + output.sw_owner_icm_root_1; + action->dest_tbl.fw_tbl.rx_icm_addr = + output.sw_owner_icm_root_0; + } else { + mlx5dr_dbg(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.fw_tbl.rx_icm_addr : + action->dest_tbl.fw_tbl.tx_icm_addr; + } + break; + case DR_ACTION_TYP_QP: + mlx5dr_info(dmn, "Domain doesn't support QP\n"); + goto out_invalid_arg; + case DR_ACTION_TYP_CTR: + attr.ctr_id = action->ctr.ctr_id + + action->ctr.offeset; + break; + case DR_ACTION_TYP_TAG: + attr.flow_tag = action->flow_tag; + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + attr.decap_index = action->rewrite.index; + attr.decap_actions = action->rewrite.num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + break; + case DR_ACTION_TYP_MODIFY_HDR: + attr.modify_index = action->rewrite.index; + attr.modify_actions = action->rewrite.num_of_actions; + recalc_cs_required = action->rewrite.modify_ttl; + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + attr.reformat_size = action->reformat.reformat_size; + attr.reformat_id = action->reformat.reformat_id; + break; + case DR_ACTION_TYP_VPORT: + attr.hit_gvmi = action->vport.caps->vhca_gvmi; + dest_action = action; + if (rx_rule) { + /* Loopback on WIRE vport is not supported */ + if (action->vport.caps->num == WIRE_PORT) + goto out_invalid_arg; + + attr.final_icm_addr = action->vport.caps->icm_address_rx; + } else { + attr.final_icm_addr = action->vport.caps->icm_address_tx; + } + break; + case DR_ACTION_TYP_POP_VLAN: + max_actions_type = MAX_VLANS; + attr.vlans.count++; + break; + case DR_ACTION_TYP_PUSH_VLAN: + max_actions_type = MAX_VLANS; + if (attr.vlans.count == MAX_VLANS) + return -EINVAL; + + attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr; + break; + default: + goto out_invalid_arg; + } + + /* Check action duplication */ + if (++action_type_set[action_type] > max_actions_type) { + mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n", + action_type, max_actions_type); + goto out_invalid_arg; + } + + /* Check action state machine is valid */ + if (dr_action_validate_and_get_next_state(action_domain, + action_type, + &state)) { + mlx5dr_dbg(dmn, "Invalid action sequence provided\n"); + return -EOPNOTSUPP; + } + } + + *new_hw_ste_arr_sz = nic_matcher->num_of_builders; + last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); + + /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect + * checksum calculation. In this case we will use a FW table to + * recalculate. + */ + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && + rx_rule && recalc_cs_required && dest_action) { + ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); + if (ret) { + mlx5dr_dbg(dmn, + "Failed to handle checksum recalculation err %d\n", + ret); + return ret; + } + } + + dr_actions_apply(dmn, + nic_dmn->ste_type, + action_type_set, + last_ste, + &attr, + new_hw_ste_arr_sz); + + return 0; + +out_invalid_arg: + return -EINVAL; +} + +#define CVLAN_ETHERTYPE 0x8100 +#define SVLAN_ETHERTYPE 0x88a8 +#define HDR_LEN_L2_ONLY 14 +#define HDR_LEN_L2_VLAN 18 +#define REWRITE_HW_ACTION_NUM 6 + +static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action, + void *data, size_t data_sz) +{ + struct mlx5_ifc_l2_hdr_bits *l2_hdr = data; + u64 ops[REWRITE_HW_ACTION_NUM] = {}; + u32 hdr_fld_4b; + u16 hdr_fld_2b; + u16 vlan_type; + bool vlan; + int i = 0; + int ret; + + vlan = (data_sz != HDR_LEN_L2_ONLY); + + /* dmac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* smac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 | + MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* dmac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + /* ethertype + (optional) vlan */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 32); + if (!vlan) { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16); + } else { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN; + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan); + hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b; + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18); + } + i++; + + /* smac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + if (vlan) { + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + i++; + } + + action->rewrite.data = (void *)ops; + action->rewrite.num_of_actions = i; + action->rewrite.chunk->byte_size = i * sizeof(*ops); + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) { + mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n"); + return ret; + } + + return 0; +} + +static struct mlx5dr_action * +dr_action_create_generic(enum mlx5dr_action_type action_type) +{ + struct mlx5dr_action *action; + + action = kzalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return NULL; + + action->action_type = action_type; + refcount_set(&action->refcount, 1); + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_drop(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_DROP); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) +{ + struct mlx5dr_action *action; + + refcount_inc(&tbl->refcount); + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto dec_ref; + + action->dest_tbl.tbl = tbl; + + return action; + +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests) +{ + struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; + struct mlx5dr_action **ref_actions; + struct mlx5dr_action *action; + bool reformat_req = false; + u32 num_of_ref = 0; + int ret; + int i; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_err(dmn, "Multiple destination support is for FDB only\n"); + return NULL; + } + + hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL); + if (!hw_dests) + return NULL; + + ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL); + if (!ref_actions) + goto free_hw_dests; + + for (i = 0; i < num_of_dests; i++) { + struct mlx5dr_action *reformat_action = dests[i].reformat; + struct mlx5dr_action *dest_action = dests[i].dest; + + ref_actions[num_of_ref++] = dest_action; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_VPORT: + hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + hw_dests[i].vport.num = dest_action->vport.caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi; + if (reformat_action) { + reformat_req = true; + hw_dests[i].vport.reformat_id = + reformat_action->reformat.reformat_id; + ref_actions[num_of_ref++] = reformat_action; + hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + } + break; + + case DR_ACTION_TYP_FT: + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (dest_action->dest_tbl.is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id; + else + hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id; + break; + + default: + mlx5dr_dbg(dmn, "Invalid multiple destinations action\n"); + goto free_ref_actions; + } + } + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto free_ref_actions; + + ret = mlx5dr_fw_create_md_tbl(dmn, + hw_dests, + num_of_dests, + reformat_req, + &action->dest_tbl.fw_tbl.id, + &action->dest_tbl.fw_tbl.group_id); + if (ret) + goto free_action; + + refcount_inc(&dmn->refcount); + + for (i = 0; i < num_of_ref; i++) + refcount_inc(&ref_actions[i]->refcount); + + action->dest_tbl.is_fw_tbl = true; + action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl.fw_tbl.type = FS_FT_FDB; + action->dest_tbl.fw_tbl.ref_actions = ref_actions; + action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref; + + kfree(hw_dests); + + return action; + +free_action: + kfree(action); +free_ref_actions: + kfree(ref_actions); +free_hw_dests: + kfree(hw_dests); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = 1; + action->dest_tbl.fw_tbl.type = ft->type; + action->dest_tbl.fw_tbl.id = ft->id; + action->dest_tbl.fw_tbl.dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_CTR); + if (!action) + return NULL; + + action->ctr.ctr_id = counter_id; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_TAG); + if (!action) + return NULL; + + action->flow_tag = tag_value & 0xffffff; + + return action; +} + +static int +dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type, + struct mlx5dr_domain *dmn, + size_t data_sz, + void *data) +{ + if ((!data && data_sz) || (data && !data_sz) || reformat_type > + DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Invalid reformat parameter!\n"); + goto out_err; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + return 0; + + if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 && + reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) { + mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n"); + goto out_err; + } + } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 && + reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -EINVAL; +} + +#define ACTION_CACHE_LINE_SIZE 64 + +static int +dr_action_create_reformat_action(struct mlx5dr_domain *dmn, + size_t data_sz, void *data, + struct mlx5dr_action *action) +{ + u32 reformat_id; + int ret; + + switch (action->action_type) { + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + { + enum mlx5_reformat_ctx_type rt; + + if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2) + rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + else + rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat.reformat_id = reformat_id; + action->reformat.reformat_size = data_sz; + return 0; + } + case DR_ACTION_TYP_TNL_L2_TO_L2: + { + return 0; + } + case DR_ACTION_TYP_TNL_L3_TO_L2: + { + /* Only Ethernet frame is supported, with VLAN (18) or without (14) */ + if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN) + return -EINVAL; + + action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite.chunk) + return -ENOMEM; + + action->rewrite.index = (action->rewrite.chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = dr_actions_l2_rewrite(dmn, action, data, data_sz); + if (ret) { + mlx5dr_icm_free_chunk(action->rewrite.chunk); + return ret; + } + return 0; + } + default: + mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type); + return -EINVAL; + } +} + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN); +} + +struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, + __be32 vlan_hdr) +{ + u32 vlan_hdr_h = ntohl(vlan_hdr); + u16 ethertype = vlan_hdr_h >> 16; + struct mlx5dr_action *action; + + if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) { + mlx5dr_dbg(dmn, "Invalid vlan ethertype\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN); + if (!action) + return NULL; + + action->push_vlan.vlan_hdr = vlan_hdr_h; + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) +{ + enum mlx5dr_action_type action_type; + struct mlx5dr_action *action; + int ret; + + refcount_inc(&dmn->refcount); + + /* General checks */ + ret = dr_action_reformat_to_action_type(reformat_type, &action_type); + if (ret) { + mlx5dr_dbg(dmn, "Invalid reformat_type provided\n"); + goto dec_ref; + } + + ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data); + if (ret) + goto dec_ref; + + action = dr_action_create_generic(action_type); + if (!action) + goto dec_ref; + + action->reformat.dmn = dmn; + + ret = dr_action_create_reformat_action(dmn, + data_sz, + data, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +static const struct dr_action_modify_field_conv * +dr_action_modify_get_hw_info(u16 sw_field) +{ + const struct dr_action_modify_field_conv *hw_action_info; + + if (sw_field >= ARRAY_SIZE(dr_action_conv_arr)) + goto not_found; + + hw_action_info = &dr_action_conv_arr[sw_field]; + if (!hw_action_info->end && !hw_action_info->start) + goto not_found; + + return hw_action_info; + +not_found: + return NULL; +} + +static int +dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_hw_info) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u8 max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = dr_action_modify_get_hw_info(sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify add action invalid field given\n"); + return -EINVAL; + } + + max_length = hw_action_info->end - hw_action_info->start + 1; + + MLX5_SET(dr_action_hw_set, hw_action, + opcode, MLX5DR_ACTION_MDFY_HW_OP_ADD); + + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, + hw_action_info->hw_field); + + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, + hw_action_info->start); + + /* PRM defines that length zero specific length of 32bits */ + MLX5_SET(dr_action_hw_set, hw_action, destination_length, + max_length == 32 ? 0 : max_length); + + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_hw_info) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u8 offset, length, max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + length = MLX5_GET(set_action_in, sw_action, length); + offset = MLX5_GET(set_action_in, sw_action, offset); + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = dr_action_modify_get_hw_info(sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify set action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + max_length = hw_action_info->end - hw_action_info->start + 1; + + if (length + offset > max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + MLX5_SET(dr_action_hw_set, hw_action, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, + hw_action_info->hw_field); + + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, + hw_action_info->start + offset); + + MLX5_SET(dr_action_hw_set, hw_action, destination_length, + length == 32 ? 0 : length); + + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_dst_hw_info, + const struct dr_action_modify_field_conv **ret_src_hw_info) +{ + u8 src_offset, dst_offset, src_max_length, dst_max_length, length; + const struct dr_action_modify_field_conv *hw_dst_action_info; + const struct dr_action_modify_field_conv *hw_src_action_info; + u16 src_field, dst_field; + + /* Get SW modify action data */ + src_field = MLX5_GET(copy_action_in, sw_action, src_field); + dst_field = MLX5_GET(copy_action_in, sw_action, dst_field); + src_offset = MLX5_GET(copy_action_in, sw_action, src_offset); + dst_offset = MLX5_GET(copy_action_in, sw_action, dst_offset); + length = MLX5_GET(copy_action_in, sw_action, length); + + /* Convert SW data to HW modify action format */ + hw_src_action_info = dr_action_modify_get_hw_info(src_field); + hw_dst_action_info = dr_action_modify_get_hw_info(dst_field); + if (!hw_src_action_info || !hw_dst_action_info) { + mlx5dr_dbg(dmn, "Modify copy action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + src_max_length = hw_src_action_info->end - + hw_src_action_info->start + 1; + dst_max_length = hw_dst_action_info->end - + hw_dst_action_info->start + 1; + + if (length + src_offset > src_max_length || + length + dst_offset > dst_max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + MLX5_SET(dr_action_hw_copy, hw_action, + opcode, MLX5DR_ACTION_MDFY_HW_OP_COPY); + + MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code, + hw_dst_action_info->hw_field); + + MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter, + hw_dst_action_info->start + dst_offset); + + MLX5_SET(dr_action_hw_copy, hw_action, destination_length, + length == 32 ? 0 : length); + + MLX5_SET(dr_action_hw_copy, hw_action, source_field_code, + hw_src_action_info->hw_field); + + MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter, + hw_src_action_info->start + dst_offset); + + *ret_dst_hw_info = hw_dst_action_info; + *ret_src_hw_info = hw_src_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_dst_hw_info, + const struct dr_action_modify_field_conv **ret_src_hw_info) +{ + u8 action; + int ret; + + *hw_action = 0; + *ret_src_hw_info = NULL; + + /* Get SW modify action type */ + action = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_sw_to_hw_set(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_sw_to_hw_add(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_sw_to_hw_copy(dmn, sw_action, + hw_action, + ret_dst_hw_info, + ret_src_hw_info); + break; + + default: + mlx5dr_info(dmn, "Unsupported action_type for modify action\n"); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int +dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite.dmn; + + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite.allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_field); + return -EINVAL; + } + } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite.allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + + if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite.dmn; + + if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && + sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) { + mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", + sw_field); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite.dmn; + u16 sw_fields[2]; + int i; + + sw_fields[0] = MLX5_GET(copy_action_in, sw_action, src_field); + sw_fields[1] = MLX5_GET(copy_action_in, sw_action, dst_field); + + for (i = 0; i < 2; i++) { + if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite.allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite.allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } + } + + if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite.dmn; + u8 action_type; + int ret; + + action_type = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action_type) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_check_set_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_check_add_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_check_copy_field_limitation(action, + sw_action); + break; + + default: + mlx5dr_info(dmn, "Unsupported action %d modify action\n", + action_type); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static bool +dr_action_modify_check_is_ttl_modify(const u64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + + return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; +} + +static int dr_actions_convert_modify_header(struct mlx5dr_action *action, + u32 max_hw_actions, + u32 num_sw_actions, + __be64 sw_actions[], + __be64 hw_actions[], + u32 *num_hw_actions, + bool *modify_ttl) +{ + const struct dr_action_modify_field_conv *hw_dst_action_info; + const struct dr_action_modify_field_conv *hw_src_action_info; + u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED; + u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE; + u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE; + struct mlx5dr_domain *dmn = action->rewrite.dmn; + int ret, i, hw_idx = 0; + __be64 *sw_action; + __be64 hw_action; + + *modify_ttl = false; + + action->rewrite.allow_rx = 1; + action->rewrite.allow_tx = 1; + + for (i = 0; i < num_sw_actions; i++) { + sw_action = &sw_actions[i]; + + ret = dr_action_modify_check_field_limitation(action, + sw_action); + if (ret) + return ret; + + if (!(*modify_ttl)) + *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action); + + /* Convert SW action to HW action */ + ret = dr_action_modify_sw_to_hw(dmn, + sw_action, + &hw_action, + &hw_dst_action_info, + &hw_src_action_info); + if (ret) + return ret; + + /* Due to a HW limitation we cannot modify 2 different L3 types */ + if (l3_type && hw_dst_action_info->l3_type && + hw_dst_action_info->l3_type != l3_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l3_type) + l3_type = hw_dst_action_info->l3_type; + + /* Due to a HW limitation we cannot modify two different L4 types */ + if (l4_type && hw_dst_action_info->l4_type && + hw_dst_action_info->l4_type != l4_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l4_type) + l4_type = hw_dst_action_info->l4_type; + + /* HW reads and executes two actions at once this means we + * need to create a gap if two actions access the same field + */ + if ((hw_idx % 2) && (hw_field == hw_dst_action_info->hw_field || + (hw_src_action_info && + hw_field == hw_src_action_info->hw_field))) { + /* Check if after gap insertion the total number of HW + * modify actions doesn't exceeds the limit + */ + hw_idx++; + if ((num_sw_actions + hw_idx - i) >= max_hw_actions) { + mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); + return -EINVAL; + } + } + hw_field = hw_dst_action_info->hw_field; + + hw_actions[hw_idx] = hw_action; + hw_idx++; + } + + *num_hw_actions = hw_idx; + + return 0; +} + +static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, + size_t actions_sz, + __be64 actions[], + struct mlx5dr_action *action) +{ + struct mlx5dr_icm_chunk *chunk; + u32 max_hw_actions; + u32 num_hw_actions; + u32 num_sw_actions; + __be64 *hw_actions; + bool modify_ttl; + int ret; + + num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE; + max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16); + + if (num_sw_actions > max_hw_actions) { + mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n", + num_sw_actions, max_hw_actions); + return -EINVAL; + } + + chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16); + if (!chunk) + return -ENOMEM; + + hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); + if (!hw_actions) { + ret = -ENOMEM; + goto free_chunk; + } + + ret = dr_actions_convert_modify_header(action, + max_hw_actions, + num_sw_actions, + actions, + hw_actions, + &num_hw_actions, + &modify_ttl); + if (ret) + goto free_hw_actions; + + action->rewrite.chunk = chunk; + action->rewrite.modify_ttl = modify_ttl; + action->rewrite.data = (u8 *)hw_actions; + action->rewrite.num_of_actions = num_hw_actions; + action->rewrite.index = (chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) + goto free_hw_actions; + + return 0; + +free_hw_actions: + kfree(hw_actions); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return ret; +} + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, + u32 flags, + size_t actions_sz, + __be64 actions[]) +{ + struct mlx5dr_action *action; + int ret = 0; + + refcount_inc(&dmn->refcount); + + if (actions_sz % DR_MODIFY_ACTION_SIZE) { + mlx5dr_dbg(dmn, "Invalid modify actions size provided\n"); + goto dec_ref; + } + + action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR); + if (!action) + goto dec_ref; + + action->rewrite.dmn = dmn; + + ret = dr_action_create_modify_action(dmn, + actions_sz, + actions, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *vport_dmn; + struct mlx5dr_action *action; + u8 peer_vport; + + peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + if (!vport_dmn) { + mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); + return NULL; + } + + if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n"); + return NULL; + } + + vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_VPORT); + if (!action) + return NULL; + + action->vport.dmn = vport_dmn; + action->vport.caps = vport_cap; + + return action; +} + +int mlx5dr_action_destroy(struct mlx5dr_action *action) +{ + if (refcount_read(&action->refcount) > 1) + return -EBUSY; + + switch (action->action_type) { + case DR_ACTION_TYP_FT: + if (action->dest_tbl.is_fw_tbl) + refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount); + else + refcount_dec(&action->dest_tbl.tbl->refcount); + + if (action->dest_tbl.is_fw_tbl && + action->dest_tbl.fw_tbl.num_of_ref_actions) { + struct mlx5dr_action **ref_actions; + int i; + + ref_actions = action->dest_tbl.fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++) + refcount_dec(&ref_actions[i]->refcount); + + kfree(ref_actions); + + mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn, + action->dest_tbl.fw_tbl.id, + action->dest_tbl.fw_tbl.group_id); + } + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev, + action->reformat.reformat_id); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_MODIFY_HDR: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + kfree(action->rewrite.data); + refcount_dec(&action->rewrite.dmn->refcount); + break; + default: + break; + } + + kfree(action); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c new file mode 100644 index 000000000000..461b39376daf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, + u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + int err; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport); + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *icm_address_rx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_rx); + *icm_address_tx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_tx); + return 0; +} + +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, + u16 vport_number, u16 *gvmi) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, other_vport); + MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size); + if (err) { + kfree(out); + return err; + } + + *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); + + kfree(out); + return 0; +} + +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps) +{ + caps->drop_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_rx); + caps->drop_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_tx); + caps->uplink_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_rx); + caps->uplink_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_tx); + caps->sw_owner = + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, + sw_owner); + + return 0; +} + +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps) +{ + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); + caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + + if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { + caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); + caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); + } + + if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) { + caps->flex_parser_id_icmpv6_dw0 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); + caps->flex_parser_id_icmpv6_dw1 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); + } + + caps->nic_rx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); + caps->nic_tx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address); + caps->nic_tx_allow_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); + + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + + caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); + caps->hdr_modify_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); + + return 0; +} + +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output) +{ + u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {}; + int err; + + MLX5_SET(query_flow_table_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_TABLE); + + MLX5_SET(query_flow_table_in, in, table_type, type); + MLX5_SET(query_flow_table_in, in, table_id, table_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + output->status = MLX5_GET(query_flow_table_out, out, status); + output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level); + + output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_1); + output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_0); + + return 0; +} + +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev) +{ + u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {}; + u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {}; + + MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context; + unsigned int inlen; + void *in_dests; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */ + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, table_type, table_type); + MLX5_SET(set_fte_in, in, table_id, table_id); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id); + MLX5_SET(flow_context, in_flow_context, destination_list_size, 1); + MLX5_SET(flow_context, in_flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + MLX5_SET(dest_format_struct, in_dests, destination_type, + MLX5_FLOW_DESTINATION_TYPE_VPORT); + MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, table_type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + void *p_actions; + u32 inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + + num_of_actions * sizeof(u64); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions); + p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(p_actions, actions, num_of_actions * sizeof(u64)); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, table_type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + +out: + kfree(in); + return err; +} + +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {}; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, table_type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + void *ft_mdev; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, table_type, attr->table_type); + + ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); + MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, attr->level); + + if (attr->sw_owner) { + /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX + * icm_addr_1 used for FDB TX + */ + if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_tx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_1, attr->icm_addr_tx); + } + } + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + attr->decap_en); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + attr->reformat_en); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB && + fdb_rx_icm_addr) + *fdb_rx_icm_addr = + (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40; + + return 0; +} + +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, table_type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + size_t inlen, cmd_data_sz, cmd_total_sz; + void *prctx; + void *pdata; + void *in; + int err; + + cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in); + cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in, + packet_reformat_context.reformat_data); + inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + + prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context); + pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data); + + MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt); + MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size); + memcpy(pdata, reformat_data, reformat_size); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + return err; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + kvfree(in); + + return err; +} + +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + reformat_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {}; + int err; + + MLX5_SET(query_roce_address_in, in, opcode, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS); + + MLX5_SET(query_roce_address_in, in, roce_address_index, index); + MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + memcpy(&attr->gid, + MLX5_ADDR_OF(query_roce_address_out, + out, roce_address.source_l3_address), + sizeof(attr->gid)); + memcpy(attr->mac, + MLX5_ADDR_OF(query_roce_address_out, out, + roce_address.source_mac_47_32), + sizeof(attr->mac)); + + if (MLX5_GET(query_roce_address_out, out, + roce_address.roce_version) == MLX5_ROCE_VERSION_2) + attr->roce_ver = MLX5_ROCE_VERSION_2; + else + attr->roce_ver = MLX5_ROCE_VERSION_1; + + return 0; +} + +static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, + struct mlx5dr_cmd_fte_info *fte, + bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + int num_encap = 0; + int i; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context, *vlan; + bool extended_dest = false; + void *in_match_value; + unsigned int inlen; + int dst_cnt_size; + void *in_dests; + u32 *in; + int err; + int i; + + if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); + } + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + unsigned int id, type = fte->dest_arr[i].type; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = fte->dest_arr[i].ft_num; + type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = fte->dest_arr[i].ft_id; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + fte->dest_arr[i].vport.vhca_id); + if (extended_dest && (fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + fte->dest_arr[i].vport.reformat_id); + } + break; + default: + id = fte->dest_arr[i].tir_num; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + fte->dest_arr[i].counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c new file mode 100644 index 000000000000..a9da961d4d2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/mlx5/eswitch.h> +#include "dr_types.h" + +static int dr_domain_init_cache(struct mlx5dr_domain *dmn) +{ + /* Per vport cached FW FT for checksum recalculation, this + * recalculation is needed due to a HW bug. + */ + dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports, + sizeof(dmn->cache.recalc_cs_ft[0]), + GFP_KERNEL); + if (!dmn->cache.recalc_cs_ft) + return -ENOMEM; + + return 0; +} + +static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn) +{ + int i; + + for (i = 0; i < dmn->info.caps.num_vports; i++) { + if (!dmn->cache.recalc_cs_ft[i]) + continue; + + mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]); + } + + kfree(dmn->cache.recalc_cs_ft); +} + +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + + recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num]; + if (!recalc_cs_ft) { + /* Table not in cache, need to allocate a new one */ + recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num); + if (!recalc_cs_ft) + return -EINVAL; + + dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft; + } + + *rx_icm_addr = recalc_cs_ft->rx_icm_addr; + + return 0; +} + +static int dr_domain_init_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't allocate PD\n"); + return ret; + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); + if (!dmn->uar) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); + ret = -ENOMEM; + goto clean_pd; + } + + dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE); + if (!dmn->ste_icm_pool) { + mlx5dr_err(dmn, "Couldn't get icm memory\n"); + ret = -ENOMEM; + goto clean_uar; + } + + dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION); + if (!dmn->action_icm_pool) { + mlx5dr_err(dmn, "Couldn't get action icm memory\n"); + ret = -ENOMEM; + goto free_ste_icm_pool; + } + + ret = mlx5dr_send_ring_alloc(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send-ring\n"); + goto free_action_icm_pool; + } + + return 0; + +free_action_icm_pool: + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); +free_ste_icm_pool: + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); +clean_uar: + mlx5_put_uars_page(dmn->mdev, dmn->uar); +clean_pd: + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); + + return ret; +} + +static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_ring_free(dmn, dmn->send_ring); + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); + mlx5_put_uars_page(dmn->mdev, dmn->uar); + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); +} + +static int dr_domain_query_vport(struct mlx5dr_domain *dmn, + bool other_vport, + u16 vport_number) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + int ret; + + vport_caps = &dmn->info.caps.vports_caps[vport_number]; + + ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, + other_vport, + vport_number, + &vport_caps->icm_address_rx, + &vport_caps->icm_address_tx); + if (ret) + return ret; + + ret = mlx5dr_cmd_query_gvmi(dmn->mdev, + other_vport, + vport_number, + &vport_caps->vport_gvmi); + if (ret) + return ret; + + vport_caps->num = vport_number; + vport_caps->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_vports(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps; + struct mlx5dr_cmd_vport_cap *wire_vport; + int vport; + int ret; + + /* Query vports (except wire vport) */ + for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) { + ret = dr_domain_query_vport(dmn, !!vport, vport); + if (ret) + return ret; + } + + /* Last vport is the wire port */ + wire_vport = &dmn->info.caps.vports_caps[vport]; + wire_vport->num = WIRE_PORT; + wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx; + wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx; + wire_vport->vport_gvmi = 0; + wire_vport->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + int ret; + + if (!dmn->info.caps.eswitch_manager) + return -EOPNOTSUPP; + + ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps); + if (ret) + return ret; + + dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; + dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; + + dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports, + sizeof(dmn->info.caps.vports_caps[0]), + GFP_KERNEL); + if (!dmn->info.caps.vports_caps) + return -ENOMEM; + + ret = dr_domain_query_vports(dmn); + if (ret) { + mlx5dr_dbg(dmn, "Failed to query vports caps\n"); + goto free_vports_caps; + } + + dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1; + + return 0; + +free_vports_caps: + kfree(dmn->info.caps.vports_caps); + dmn->info.caps.vports_caps = NULL; + return ret; +} + +static int dr_domain_caps_init(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + int ret; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { + mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n"); + return -EOPNOTSUPP; + } + + dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev); + + ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps); + if (ret) + return ret; + + ret = dr_domain_query_fdb_caps(mdev, dmn); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + if (!dmn->info.caps.rx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address; + dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + if (!dmn->info.caps.tx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_FDB: + if (!dmn->info.caps.eswitch_manager) + return -ENOTSUPP; + + if (!dmn->info.caps.fdb_sw_owner) + return -ENOTSUPP; + + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get esw manager vport\n"); + return -ENOENT; + } + + dmn->info.supp_sw_steering = true; + dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx; + dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx; + dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; + break; + default: + mlx5dr_dbg(dmn, "Invalid domain\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn) +{ + kfree(dmn->info.caps.vports_caps); +} + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) +{ + struct mlx5dr_domain *dmn; + int ret; + + if (type > MLX5DR_DOMAIN_TYPE_FDB) + return NULL; + + dmn = kzalloc(sizeof(*dmn), GFP_KERNEL); + if (!dmn) + return NULL; + + dmn->mdev = mdev; + dmn->type = type; + refcount_set(&dmn->refcount, 1); + mutex_init(&dmn->mutex); + + if (dr_domain_caps_init(mdev, dmn)) { + mlx5dr_dbg(dmn, "Failed init domain, no caps\n"); + goto free_domain; + } + + dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; + dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, + dmn->info.caps.log_icm_size); + + if (!dmn->info.supp_sw_steering) { + mlx5dr_err(dmn, "SW steering is not supported\n"); + goto uninit_caps; + } + + /* Allocate resources */ + ret = dr_domain_init_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed init domain resources\n"); + goto uninit_caps; + } + + ret = dr_domain_init_cache(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed initialize domain cache\n"); + goto uninit_resourses; + } + + return dmn; + +uninit_resourses: + dr_domain_uninit_resources(dmn); +uninit_caps: + dr_domain_caps_uninit(dmn); +free_domain: + kfree(dmn); + return NULL; +} + +/* Assure synchronization of the device steering tables with updates made by SW + * insertion. + */ +int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) +{ + int ret = 0; + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { + mutex_lock(&dmn->mutex); + ret = mlx5dr_send_ring_force_drain(dmn); + mutex_unlock(&dmn->mutex); + if (ret) + return ret; + } + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) + ret = mlx5dr_cmd_sync_steering(dmn->mdev); + + return ret; +} + +int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) +{ + if (refcount_read(&dmn->refcount) > 1) + return -EBUSY; + + /* make sure resources are not used by the hardware */ + mlx5dr_cmd_sync_steering(dmn->mdev); + dr_domain_uninit_cache(dmn); + dr_domain_uninit_resources(dmn); + dr_domain_caps_uninit(dmn); + mutex_destroy(&dmn->mutex); + kfree(dmn); + return 0; +} + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) +{ + mutex_lock(&dmn->mutex); + + if (dmn->peer_dmn) + refcount_dec(&dmn->peer_dmn->refcount); + + dmn->peer_dmn = peer_dmn; + + if (dmn->peer_dmn) + refcount_inc(&dmn->peer_dmn->refcount); + + mutex_unlock(&dmn->mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c new file mode 100644 index 000000000000..1fbcd012bb85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + u32 table_id, group_id, modify_hdr_id; + u64 rx_icm_addr, modify_ttl_action; + int ret; + + recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL); + if (!recalc_cs_ft) + return NULL; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 1; + ft_attr.term_tbl = true; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, + &ft_attr, + &rx_icm_addr, + &table_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); + goto free_ttl_tbl; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, &group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret); + goto destroy_flow_table; + } + + /* Modify TTL action by adding zero to trigger CS recalculation */ + modify_ttl_action = 0; + MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD); + MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL); + + ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1, + &modify_ttl_action, + &modify_hdr_id); + if (ret) { + mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret); + goto destroy_flow_group; + } + + ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id, modify_hdr_id, + vport_num); + if (ret) { + mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret); + goto dealloc_modify_header; + } + + recalc_cs_ft->modify_hdr_id = modify_hdr_id; + recalc_cs_ft->rx_icm_addr = rx_icm_addr; + recalc_cs_ft->table_id = table_id; + recalc_cs_ft->group_id = group_id; + + return recalc_cs_ft; + +dealloc_modify_header: + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id); +destroy_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id); +destroy_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB); +free_ttl_tbl: + kfree(recalc_cs_ft); + return NULL; +} + +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id); + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id, + recalc_cs_ft->group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, + recalc_cs_ft->table_id, + MLX5_FLOW_TABLE_TYPE_FDB); + + kfree(recalc_cs_ft); +} + +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_cmd_fte_info fte_info = {}; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {}; + struct mlx5dr_cmd_ft_info ft_info = {}; + int ret; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 2; + ft_attr.reformat_en = reformat_req; + ft_attr.decap_en = reformat_req; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret); + return ret; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret); + goto free_flow_table; + } + + ft_info.id = *tbl_id; + ft_info.type = FS_FT_FDB; + fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_info.dests_size = num_dest; + fte_info.val = val; + fte_info.dest_arr = dest; + + ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); + if (ret) { + mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret); + goto free_flow_group; + } + + return 0; + +free_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, *group_id); +free_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); + return ret; +} + +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, + u32 tbl_id, u32 group_id) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + tbl_id, group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c new file mode 100644 index 000000000000..d7c7467e2d53 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -0,0 +1,571 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 +#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024) + +struct mlx5dr_icm_pool; + +struct mlx5dr_icm_bucket { + struct mlx5dr_icm_pool *pool; + + /* Chunks that aren't visible to HW not directly and not in cache */ + struct list_head free_list; + unsigned int free_list_count; + + /* Used chunks, HW may be accessing this memory */ + struct list_head used_list; + unsigned int used_list_count; + + /* HW may be accessing this memory but at some future, + * undetermined time, it might cease to do so. Before deciding to call + * sync_ste, this list is moved to sync_list + */ + struct list_head hot_list; + unsigned int hot_list_count; + + /* Pending sync list, entries from the hot list are moved to this list. + * sync_ste is executed and then sync_list is concatenated to the free list + */ + struct list_head sync_list; + unsigned int sync_list_count; + + u32 total_chunks; + u32 num_of_entries; + u32 entry_size; + /* protect the ICM bucket */ + struct mutex mutex; +}; + +struct mlx5dr_icm_pool { + struct mlx5dr_icm_bucket *buckets; + enum mlx5dr_icm_type icm_type; + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + enum mlx5dr_icm_chunk_size num_of_buckets; + struct list_head icm_mr_list; + /* protect the ICM MR list */ + struct mutex mr_mutex; + struct mlx5dr_domain *dmn; +}; + +struct mlx5dr_icm_dm { + u32 obj_id; + enum mlx5_sw_icm_type type; + phys_addr_t addr; + size_t length; +}; + +struct mlx5dr_icm_mr { + struct mlx5dr_icm_pool *pool; + struct mlx5_core_mkey mkey; + struct mlx5dr_icm_dm dm; + size_t used_length; + size_t length; + u64 icm_start_addr; + struct list_head mr_list; +}; + +static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, + u32 pd, u64 length, u64 start_addr, int mode, + struct mlx5_core_mkey *mkey) +{ + u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, mode); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + } + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, pd); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); + + return mlx5_core_create_mkey(mdev, mkey, in, inlen); +} + +static struct mlx5dr_icm_mr * +dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool, + enum mlx5_sw_icm_type type, + size_t align_base) +{ + struct mlx5_core_dev *mdev = pool->dmn->mdev; + struct mlx5dr_icm_mr *icm_mr; + size_t align_diff; + int err; + + icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); + if (!icm_mr) + return NULL; + + icm_mr->pool = pool; + INIT_LIST_HEAD(&icm_mr->mr_list); + + icm_mr->dm.type = type; + + /* 2^log_biggest_table * entry-size * double-for-alignment */ + icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * 2; + + err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + &icm_mr->dm.addr, &icm_mr->dm.obj_id); + if (err) { + mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); + goto free_icm_mr; + } + + /* Register device memory */ + err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, + icm_mr->dm.length, + icm_mr->dm.addr, + MLX5_MKC_ACCESS_MODE_SW_ICM, + &icm_mr->mkey); + if (err) { + mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); + goto free_dm; + } + + icm_mr->icm_start_addr = icm_mr->dm.addr; + + /* align_base is always a power of 2 */ + align_diff = icm_mr->icm_start_addr & (align_base - 1); + if (align_diff) + icm_mr->used_length = align_base - align_diff; + + list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list); + + return icm_mr; + +free_dm: + mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + icm_mr->dm.addr, icm_mr->dm.obj_id); +free_icm_mr: + kvfree(icm_mr); + return NULL; +} + +static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) +{ + struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev; + struct mlx5dr_icm_dm *dm = &icm_mr->dm; + + list_del(&icm_mr->mr_list); + mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); + mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, + dm->addr, dm->obj_id); + kvfree(icm_mr); +} + +static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + chunk->ste_arr = kvzalloc(bucket->num_of_entries * + sizeof(chunk->ste_arr[0]), GFP_KERNEL); + if (!chunk->ste_arr) + return -ENOMEM; + + chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries * + DR_STE_SIZE_REDUCED, GFP_KERNEL); + if (!chunk->hw_ste_arr) + goto out_free_ste_arr; + + chunk->miss_list = kvmalloc(bucket->num_of_entries * + sizeof(chunk->miss_list[0]), GFP_KERNEL); + if (!chunk->miss_list) + goto out_free_hw_ste_arr; + + return 0; + +out_free_hw_ste_arr: + kvfree(chunk->hw_ste_arr); +out_free_ste_arr: + kvfree(chunk->ste_arr); + return -ENOMEM; +} + +static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket) +{ + size_t mr_free_size, mr_req_size, mr_row_size; + struct mlx5dr_icm_pool *pool = bucket->pool; + struct mlx5dr_icm_mr *icm_mr = NULL; + struct mlx5dr_icm_chunk *chunk; + enum mlx5_sw_icm_type dm_type; + size_t align_base; + int i, err = 0; + + mr_req_size = bucket->num_of_entries * bucket->entry_size; + mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type); + + if (pool->icm_type == DR_ICM_TYPE_STE) { + dm_type = MLX5_SW_ICM_TYPE_STEERING; + /* Align base is the biggest chunk size / row size */ + align_base = mr_row_size; + } else { + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; + /* Align base is 64B */ + align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE; + } + + mutex_lock(&pool->mr_mutex); + if (!list_empty(&pool->icm_mr_list)) { + icm_mr = list_last_entry(&pool->icm_mr_list, + struct mlx5dr_icm_mr, mr_list); + + if (icm_mr) + mr_free_size = icm_mr->dm.length - icm_mr->used_length; + } + + if (!icm_mr || mr_free_size < mr_row_size) { + icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base); + if (!icm_mr) { + err = -ENOMEM; + goto out_err; + } + } + + /* Create memory aligned chunks */ + for (i = 0; i < mr_row_size / mr_req_size; i++) { + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) { + err = -ENOMEM; + goto out_err; + } + + chunk->bucket = bucket; + chunk->rkey = icm_mr->mkey.key; + /* mr start addr is zero based */ + chunk->mr_addr = icm_mr->used_length; + chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length; + icm_mr->used_length += mr_req_size; + chunk->num_of_entries = bucket->num_of_entries; + chunk->byte_size = chunk->num_of_entries * bucket->entry_size; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + err = dr_icm_chunk_ste_init(chunk); + if (err) + goto out_free_chunk; + } + + INIT_LIST_HEAD(&chunk->chunk_list); + list_add(&chunk->chunk_list, &bucket->free_list); + bucket->free_list_count++; + bucket->total_chunks++; + } + mutex_unlock(&pool->mr_mutex); + return 0; + +out_free_chunk: + kvfree(chunk); +out_err: + mutex_unlock(&pool->mr_mutex); + return err; +} + +static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) +{ + kvfree(chunk->miss_list); + kvfree(chunk->hw_ste_arr); + kvfree(chunk->ste_arr); +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + list_del(&chunk->chunk_list); + bucket->total_chunks--; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_cleanup(chunk); + + kvfree(chunk); +} + +static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket, + enum mlx5dr_icm_chunk_size chunk_size) +{ + if (pool->icm_type == DR_ICM_TYPE_STE) + bucket->entry_size = DR_STE_SIZE; + else + bucket->entry_size = DR_MODIFY_ACTION_SIZE; + + bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + bucket->pool = pool; + mutex_init(&bucket->mutex); + INIT_LIST_HEAD(&bucket->free_list); + INIT_LIST_HEAD(&bucket->used_list); + INIT_LIST_HEAD(&bucket->hot_list); + INIT_LIST_HEAD(&bucket->sync_list); +} + +static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket) +{ + struct mlx5dr_icm_chunk *chunk, *next; + + mutex_destroy(&bucket->mutex); + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + list_splice_tail_init(&bucket->hot_list, &bucket->free_list); + + list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list) + dr_icm_chunk_destroy(chunk); + + WARN_ON(bucket->total_chunks != 0); + + /* Cleanup of unreturned chunks */ + list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list) + dr_icm_chunk_destroy(chunk); +} + +static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool) +{ + u64 hot_size = 0; + int chunk_order; + + for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++) + hot_size += pool->buckets[chunk_order].hot_list_count * + mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type); + + return hot_size; +} + +static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket) +{ + u64 bytes_for_sync; + + bytes_for_sync = dr_icm_hot_mem_size(pool); + if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count) + return false; + + return true; +} + +static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->hot_list, &bucket->sync_list); + bucket->sync_list_count += bucket->hot_list_count; + bucket->hot_list_count = 0; +} + +static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + bucket->free_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->hot_list); + bucket->hot_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_start(bucket); + continue; + } + + /* Freeing the mutex is done at the end of that process, after + * sync_ste was executed at dr_icm_chill_buckets_end func. + */ + if (mutex_trylock(&bucket->mutex)) { + dr_icm_chill_bucket_start(bucket); + buckets[i] = true; + } + } +} + +static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_end(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_end(bucket); + mutex_unlock(&bucket->mutex); + } +} + +static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_abort(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_abort(bucket); + mutex_unlock(&bucket->mutex); + } +} + +/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and + * also memory used for HW STE management for optimizations. + */ +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size) +{ + struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */ + bool buckets[DR_CHUNK_SIZE_MAX] = {}; + struct mlx5dr_icm_bucket *bucket; + int err; + + if (chunk_size > pool->max_log_chunk_sz) + return NULL; + + bucket = &pool->buckets[chunk_size]; + + mutex_lock(&bucket->mutex); + + /* Take chunk from pool if available, otherwise allocate new chunks */ + if (list_empty(&bucket->free_list)) { + if (dr_icm_reuse_hot_entries(pool, bucket)) { + dr_icm_chill_buckets_start(pool, bucket, buckets); + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + dr_icm_chill_buckets_abort(pool, bucket, buckets); + mlx5dr_dbg(pool->dmn, "Sync_steering failed\n"); + chunk = NULL; + goto out; + } + dr_icm_chill_buckets_end(pool, bucket, buckets); + } else { + dr_icm_chunks_create(bucket); + } + } + + if (!list_empty(&bucket->free_list)) { + chunk = list_last_entry(&bucket->free_list, + struct mlx5dr_icm_chunk, + chunk_list); + if (chunk) { + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->used_list); + bucket->free_list_count--; + bucket->used_list_count++; + } + } +out: + mutex_unlock(&bucket->mutex); + return chunk; +} + +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) { + memset(chunk->ste_arr, 0, + bucket->num_of_entries * sizeof(chunk->ste_arr[0])); + memset(chunk->hw_ste_arr, 0, + bucket->num_of_entries * DR_STE_SIZE_REDUCED); + } + + mutex_lock(&bucket->mutex); + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->hot_list); + bucket->hot_list_count++; + bucket->used_list_count--; + mutex_unlock(&bucket->mutex); +} + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type) +{ + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_icm_pool *pool; + int i; + + if (icm_type == DR_ICM_TYPE_STE) + max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + else + max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->buckets = kcalloc(max_log_chunk_sz + 1, + sizeof(pool->buckets[0]), + GFP_KERNEL); + if (!pool->buckets) + goto free_pool; + + pool->dmn = dmn; + pool->icm_type = icm_type; + pool->max_log_chunk_sz = max_log_chunk_sz; + pool->num_of_buckets = max_log_chunk_sz + 1; + INIT_LIST_HEAD(&pool->icm_mr_list); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_init(pool, &pool->buckets[i], i); + + mutex_init(&pool->mr_mutex); + + return pool; + +free_pool: + kvfree(pool); + return NULL; +} + +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_mr *icm_mr, *next; + int i; + + mutex_destroy(&pool->mr_mutex); + + list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list) + dr_icm_pool_mr_destroy(icm_mr); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_cleanup(&pool->buckets[i]); + + kfree(pool->buckets); + kvfree(pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c new file mode 100644 index 000000000000..c6dbd856df94 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->smac_47_16 || spec->smac_15_0); +} + +static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dmac_47_16 || spec->dmac_15_0); +} + +static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->src_ip_127_96 || spec->src_ip_95_64 || + spec->src_ip_63_32 || spec->src_ip_31_0); +} + +static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || + spec->dst_ip_63_32 || spec->dst_ip_31_0); +} + +static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->ip_protocol || spec->frag || spec->tcp_flags || + spec->ip_ecn || spec->ip_dscp); +} + +static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->tcp_sport || spec->tcp_dport || + spec->udp_sport || spec->udp_dport); +} + +static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_31_0 || spec->src_ip_31_0); +} + +static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec) +{ + return (dr_mask_is_l3_base_set(spec) || + dr_mask_is_tcp_udp_base_set(spec) || + dr_mask_is_ipv4_set(spec)); +} + +static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc) +{ + return misc->vxlan_vni; +} + +static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ttl_hoplimit; +} + +#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ + (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ + (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ + (_spec).ethertype || (_spec).ip_version || \ + (_misc)._inner_outer##_second_vid || \ + (_misc)._inner_outer##_second_cfi || \ + (_misc)._inner_outer##_second_prio || \ + (_misc)._inner_outer##_second_cvlan_tag || \ + (_misc)._inner_outer##_second_svlan_tag) + +#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \ + dr_mask_is_l3_base_set(&(_spec)) || \ + dr_mask_is_tcp_udp_base_set(&(_spec)) || \ + dr_mask_is_ttl_set(&(_spec)) || \ + (_misc)._inner_outer##_ipv6_flow_label) + +#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \ + (_misc3)._inner_outer##_tcp_seq_num || \ + (_misc3)._inner_outer##_tcp_ack_num) + +#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \ + (_misc2)._inner_outer##_first_mpls_label || \ + (_misc2)._inner_outer##_first_mpls_exp || \ + (_misc2)._inner_outer##_first_mpls_s_bos || \ + (_misc2)._inner_outer##_first_mpls_ttl) + +static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) +{ + return (misc->gre_key_h || misc->gre_key_l || + misc->gre_protocol || misc->gre_c_present || + misc->gre_k_present || misc->gre_s_present); +} + +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \ + (_misc2).outer_first_mpls_over_##gre_udp##_label || \ + (_misc2).outer_first_mpls_over_##gre_udp##_exp || \ + (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ + (_misc2).outer_first_mpls_over_##gre_udp##_ttl) + +#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) + +static bool +dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->outer_vxlan_gpe_vni || + misc3->outer_vxlan_gpe_next_protocol || + misc3->outer_vxlan_gpe_flags); +} + +static bool +dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & + MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; +} + +static bool +dr_mask_is_flex_parser_tnl_vxlan_gpe_set(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_misc3_vxlan_gpe_set(&mask->misc3) && + dr_matcher_supp_flex_parser_vxlan_gpe(&dmn->info.caps); +} + +static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc) +{ + return misc->geneve_vni || + misc->geneve_oam || + misc->geneve_protocol_type || + misc->geneve_opt_len; +} + +static bool +dr_matcher_supp_flex_parser_geneve(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & + MLX5_FLEX_PARSER_GENEVE_ENABLED; +} + +static bool +dr_mask_is_flex_parser_tnl_geneve_set(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_misc_geneve_set(&mask->misc) && + dr_matcher_supp_flex_parser_geneve(&dmn->info.caps); +} + +static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->icmpv6_type || misc3->icmpv6_code || + misc3->icmpv6_header_data); +} + +static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) +{ + return misc2->metadata_reg_a; +} + +static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 || + misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3); +} + +static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 || + misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7); +} + +static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) +{ + return (misc->source_sqn || misc->source_port); +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + nic_matcher->ste_builder = + nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + nic_matcher->num_of_builders = + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv]; + + if (!nic_matcher->num_of_builders) { + mlx5dr_dbg(matcher->tbl->dmn, + "Rule not supported on this matcher due to IP related fields\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param mask = {}; + struct mlx5dr_match_misc3 *misc3; + struct mlx5dr_ste_build *sb; + bool inner, rx; + int idx = 0; + int ret, i; + + sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + + /* Create a temporary mask to track and clear used mask fields */ + if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER) + mask.outer = matcher->mask.outer; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC) + mask.misc = matcher->mask.misc; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER) + mask.inner = matcher->mask.inner; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2) + mask.misc2 = matcher->mask.misc2; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) + mask.misc3 = matcher->mask.misc3; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, NULL); + if (ret) + return ret; + + /* Outer */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = false; + + if (dr_mask_is_wqe_metadata_set(&mask.misc2)) + mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_0_3_set(&mask.misc2)) + mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_4_7_set(&mask.misc2)) + mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && + (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { + ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask, + dmn, inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer) && + dr_mask_is_dmac_set(&mask.outer)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (outer_ipv == DR_RULE_IPV6) { + if (dr_mask_is_dst_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (dr_mask_is_flex_parser_tnl_vxlan_gpe_set(&mask, dmn)) + mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(&sb[idx++], + &mask, + inner, rx); + else if (dr_mask_is_flex_parser_tnl_geneve_set(&mask, dmn)) + mlx5dr_ste_build_flex_parser_tnl_geneve(&sb[idx++], + &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, + inner, rx); + + misc3 = &mask.misc3; + if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || + (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { + ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + if (ret) + return ret; + } + if (dr_mask_is_gre_set(&mask.misc)) + mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx); + } + + /* Inner */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = true; + + if (dr_mask_is_eth_l2_tnl_set(&mask.misc)) + mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_smac_set(&mask.inner) && + dr_mask_is_dmac_set(&mask.inner)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + &mask, inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.inner)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (inner_ipv == DR_RULE_IPV6) { + if (dr_mask_is_dst_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); + } + /* Empty matcher, takes all */ + if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) + mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); + + if (idx == 0) { + mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n"); + return -EINVAL; + } + + /* Check that all mask fields were consumed */ + for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { + if (((u8 *)&mask)[i] != 0) { + mlx5dr_info(dmn, "Mask contains unsupported parameters\n"); + return -EOPNOTSUPP; + } + } + + nic_matcher->ste_builder = sb; + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx; + + return 0; +} + +static int dr_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_htbl; + int ret; + + /* Connect end anchor hash table to next_htbl or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + } + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->e_anchor, + &info, info.type == CONNECT_HIT); + if (ret) + return ret; + + /* Connect start hash table to end anchor */ + info.type = CONNECT_MISS; + info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->s_htbl, + &info, false); + if (ret) + return ret; + + /* Connect previous hash table to matcher start hash table */ + if (prev_nic_matcher) + prev_htbl = prev_nic_matcher->e_anchor; + else + prev_htbl = nic_tbl->s_anchor; + + info.type = CONNECT_HIT; + info.hit_next_htbl = curr_nic_matcher->s_htbl; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl, + &info, true); + if (ret) + return ret; + + /* Update the pointing ste and next hash table */ + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr; + prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + + if (next_nic_matcher) { + next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr; + curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } + + return 0; +} + +static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + bool first = true; + int ret; + + next_matcher = NULL; + if (!list_empty(&tbl->matcher_list)) + list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { + if (tmp_matcher->prio >= matcher->prio) { + next_matcher = tmp_matcher; + break; + } + first = false; + } + + prev_matcher = NULL; + if (next_matcher && !first) + prev_matcher = list_prev_entry(next_matcher, matcher_list); + else if (!first) + prev_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, + matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_connect(dmn, &matcher->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_connect(dmn, &matcher->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + if (prev_matcher) + list_add(&matcher->matcher_list, &prev_matcher->matcher_list); + else if (next_matcher) + list_add_tail(&matcher->matcher_list, + &next_matcher->matcher_list); + else + list_add(&matcher->matcher_list, &tbl->matcher_list); + + return 0; +} + +static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + mlx5dr_htbl_put(nic_matcher->s_htbl); + mlx5dr_htbl_put(nic_matcher->e_anchor); +} + +static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher) +{ + dr_matcher_uninit_nic(&matcher->rx); + dr_matcher_uninit_nic(&matcher->tx); +} + +static void dr_matcher_uninit(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_matcher_uninit_nic(&matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_matcher_uninit_nic(&matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_matcher_uninit_fdb(matcher); + break; + default: + WARN_ON(true); + break; + } +} + +static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6); + + if (!nic_matcher->ste_builder) { + mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret; + + ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher); + if (ret) + return ret; + + nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_matcher->e_anchor) + return -ENOMEM; + + nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + nic_matcher->ste_builder[0].lu_type, + nic_matcher->ste_builder[0].byte_mask); + if (!nic_matcher->s_htbl) { + ret = -ENOMEM; + goto free_e_htbl; + } + + /* make sure the tables exist while empty */ + mlx5dr_htbl_get(nic_matcher->s_htbl); + mlx5dr_htbl_get(nic_matcher->e_anchor); + + return 0; + +free_e_htbl: + mlx5dr_ste_htbl_free(nic_matcher->e_anchor); + return ret; +} + +static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) +{ + int ret; + + ret = dr_matcher_init_nic(matcher, &matcher->rx); + if (ret) + return ret; + + ret = dr_matcher_init_nic(matcher, &matcher->tx); + if (ret) + goto uninit_nic_rx; + + return 0; + +uninit_nic_rx: + dr_matcher_uninit_nic(&matcher->rx); + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { + mlx5dr_info(dmn, "Invalid match criteria attribute\n"); + return -EINVAL; + } + + if (mask) { + if (mask->match_sz > sizeof(struct mlx5dr_match_param)) { + mlx5dr_info(dmn, "Invalid match size attribute\n"); + return -EINVAL; + } + mlx5dr_ste_copy_param(matcher->match_criteria, + &matcher->mask, mask); + } + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + matcher->rx.nic_tbl = &tbl->rx; + ret = dr_matcher_init_nic(matcher, &matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_nic(matcher, &matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + matcher->rx.nic_tbl = &tbl->rx; + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_fdb(matcher); + break; + default: + WARN_ON(true); + return -EINVAL; + } + + return ret; +} + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *tbl, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_matcher *matcher; + int ret; + + refcount_inc(&tbl->refcount); + + matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); + if (!matcher) + goto dec_ref; + + matcher->tbl = tbl; + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); + INIT_LIST_HEAD(&matcher->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_matcher_init(matcher, mask); + if (ret) + goto free_matcher; + + ret = dr_matcher_add_to_tbl(matcher); + if (ret) + goto matcher_uninit; + + mutex_unlock(&tbl->dmn->mutex); + + return matcher; + +matcher_uninit: + dr_matcher_uninit(matcher); +free_matcher: + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_anchor; + + if (prev_nic_matcher) + prev_anchor = prev_nic_matcher->e_anchor; + else + prev_anchor = nic_tbl->s_anchor; + + /* Connect previous anchor hash table to next matcher or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr; + prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + prev_anchor->ste_arr[0].next_htbl = NULL; + } + + return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, + &info, true); +} + +static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *prev_matcher, *next_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret = 0; + + if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) + next_matcher = NULL; + else + next_matcher = list_next_entry(matcher, matcher_list); + + if (matcher->matcher_list.prev == &tbl->matcher_list) + prev_matcher = NULL; + else + prev_matcher = list_prev_entry(matcher, matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_disconnect(dmn, &tbl->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_disconnect(dmn, &tbl->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + list_del(&matcher->matcher_list); + + return 0; +} + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_table *tbl = matcher->tbl; + + if (refcount_read(&matcher->refcount) > 1) + return -EBUSY; + + mutex_lock(&tbl->dmn->mutex); + + dr_matcher_remove_from_tbl(matcher); + dr_matcher_uninit(matcher); + refcount_dec(&matcher->tbl->refcount); + + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c new file mode 100644 index 000000000000..e4cff7abb348 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -0,0 +1,1262 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) + +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info_last; + struct mlx5dr_ste *last_ste; + + /* The new entry will be inserted after the last */ + last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node); + WARN_ON(!last_ste); + + ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL); + if (!ste_info_last) + return -ENOMEM; + + mlx5dr_ste_set_miss_addr(last_ste->hw_ste, + mlx5dr_ste_get_icm_addr(new_last_ste)); + list_add_tail(&new_last_ste->miss_list_node, miss_list); + + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED, + 0, last_ste->hw_ste, + ste_info_last, send_list, true); + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *ste; + + /* Create new table for miss entry */ + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!new_htbl) { + mlx5dr_dbg(dmn, "Failed allocating collision table\n"); + return NULL; + } + + /* One and only entry, never grows */ + ste = new_htbl->ste_arr; + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + mlx5dr_htbl_get(new_htbl); + + return ste; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste, + struct mlx5dr_ste *orig_ste) +{ + struct mlx5dr_ste *ste; + + ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n"); + return NULL; + } + + ste->ste_chain_location = orig_ste->ste_chain_location; + + /* In collision entry, all members share the same miss_list_head */ + ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + + /* Next table */ + if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto free_tbl; + } + + return ste; + +free_tbl: + mlx5dr_ste_free(ste, matcher, nic_matcher); + return NULL; +} + +static int +dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, + struct mlx5dr_domain *dmn) +{ + int ret; + + list_del(&ste_info->send_list); + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + /* Copy data to ste, only reduced size, the last 16B (mask) + * is already written to the hw. + */ + memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + +out: + kfree(ste_info); + return ret; +} + +static int dr_rule_send_update_list(struct list_head *send_ste_list, + struct mlx5dr_domain *dmn, + bool is_reverse) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + int ret; + + if (is_reverse) { + list_for_each_entry_safe_reverse(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } else { + list_for_each_entry_safe(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) +{ + struct mlx5dr_ste *ste; + + if (list_empty(miss_list)) + return NULL; + + /* Check if hw_ste is present in the list */ + list_for_each_entry(ste, miss_list, miss_list_node) { + if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste)) + return ste; + } + + return NULL; +} + +static struct mlx5dr_ste * +dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *update_list, + struct mlx5dr_ste *col_ste, + u8 *hw_ste) +{ + struct mlx5dr_ste *new_ste; + int ret; + + new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!new_ste) + return NULL; + + /* In collision entry, all members share the same miss_list_head */ + new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); + + /* Update the previous from the list */ + ret = dr_rule_append_to_miss_list(new_ste, + mlx5dr_ste_get_miss_list(col_ste), + update_list); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n"); + goto err_exit; + } + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste *new_ste) +{ + new_ste->next_htbl = cur_ste->next_htbl; + new_ste->ste_chain_location = cur_ste->ste_chain_location; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location)) + new_ste->next_htbl->pointing_ste = new_ste; + + /* We need to copy the refcount since this ste + * may have been traversed several times + */ + new_ste->refcount = cur_ste->refcount; + + /* Link old STEs rule_mem list to the new ste */ + mlx5dr_rule_update_rule_member(cur_ste, new_ste); + INIT_LIST_HEAD(&new_ste->rule_list); + list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list); +} + +static struct mlx5dr_ste * +dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste_send_info *ste_info; + bool use_update_list = false; + u8 hw_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste *new_ste; + int new_idx; + u8 sb_idx; + + /* Copy STE mask from the matcher */ + sb_idx = cur_ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Copy STE control and tag */ + memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); + new_ste = &new_htbl->ste_arr[new_idx]; + + if (mlx5dr_ste_not_used_ste(new_ste)) { + mlx5dr_htbl_get(new_htbl); + list_add_tail(&new_ste->miss_list_node, + mlx5dr_ste_get_miss_list(new_ste)); + } else { + new_ste = dr_rule_rehash_handle_collision(matcher, + nic_matcher, + update_list, + new_ste, + hw_ste); + if (!new_ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n", + new_idx); + return NULL; + } + new_htbl->ctrl.num_of_collisions++; + use_update_list = true; + } + + memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED); + + new_htbl->ctrl.num_of_valid_entries++; + + if (use_update_list) { + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto err_exit; + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, + hw_ste, ste_info, + update_list, true); + } + + dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste); + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *cur_miss_list, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste; + + if (list_empty(cur_miss_list)) + return 0; + + list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) { + new_ste = dr_rule_rehash_copy_ste(matcher, + nic_matcher, + cur_ste, + new_htbl, + update_list); + if (!new_ste) + goto err_insert; + + list_del(&cur_ste->miss_list_node); + mlx5dr_htbl_put(cur_ste->htbl); + } + return 0; + +err_insert: + mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n"); + WARN_ON(true); + return -EINVAL; +} + +static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *cur_ste; + int cur_entries; + int err = 0; + int i; + + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size); + + if (cur_entries < 1) { + mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); + return -EINVAL; + } + + for (i = 0; i < cur_entries; i++) { + cur_ste = &cur_htbl->ste_arr[i]; + if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */ + continue; + + err = dr_rule_rehash_copy_miss_list(matcher, + nic_matcher, + mlx5dr_ste_get_miss_list(cur_ste), + new_htbl, + update_list); + if (err) + goto clean_copy; + } + +clean_copy: + return err; +} + +static struct mlx5dr_ste_htbl * +dr_rule_rehash_htbl(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list, + enum mlx5dr_icm_chunk_size new_size) +{ + struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_domain_rx_tx *nic_dmn; + u8 formatted_ste[DR_STE_SIZE] = {}; + LIST_HEAD(rehash_table_send_list); + struct mlx5dr_ste *ste_to_update; + struct mlx5dr_ste_htbl *new_htbl; + int err; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + new_size, + cur_htbl->lu_type, + cur_htbl->byte_mask); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed to allocate new hash table\n"); + goto free_ste_info; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + new_htbl, + formatted_ste, + &info); + + new_htbl->pointing_ste = cur_htbl->pointing_ste; + new_htbl->pointing_ste->next_htbl = new_htbl; + err = dr_rule_rehash_copy_htbl(matcher, + nic_matcher, + cur_htbl, + new_htbl, + &rehash_table_send_list); + if (err) + goto free_new_htbl; + + if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste, + nic_matcher->ste_builder[ste_location - 1].bit_mask)) { + mlx5dr_err(dmn, "Failed writing table to HW\n"); + goto free_new_htbl; + } + + /* Writing to the hw is done in regular order of rehash_table_send_list, + * in order to have the origin data written before the miss address of + * collision entries, if exists. + */ + if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) { + mlx5dr_err(dmn, "Failed updating table to HW\n"); + goto free_ste_list; + } + + /* Connect previous hash table to current */ + if (ste_location == 1) { + /* The previous table is an anchor, anchors size is always one STE */ + struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl; + + /* On matcher s_anchor we keep an extra refcount */ + mlx5dr_htbl_get(new_htbl); + mlx5dr_htbl_put(cur_htbl); + + nic_matcher->s_htbl = new_htbl; + + /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here + * (48B len) which works only on first 32B + */ + mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste, + new_htbl->chunk->icm_addr, + new_htbl->chunk->num_of_entries); + + ste_to_update = &prev_htbl->ste_arr[0]; + } else { + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste, + new_htbl); + ste_to_update = cur_htbl->pointing_ste; + } + + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED, + 0, ste_to_update->hw_ste, ste_info, + update_list, false); + + return new_htbl; + +free_ste_list: + /* Clean all ste_info's from the new table */ + list_for_each_entry_safe(del_ste_info, tmp_ste_info, + &rehash_table_send_list, send_list) { + list_del(&del_ste_info->send_list); + kfree(del_ste_info); + } + +free_new_htbl: + mlx5dr_ste_htbl_free(new_htbl); +free_ste_info: + kfree(ste_info); + mlx5dr_info(dmn, "Failed creating rehash table\n"); + return NULL; +} + +static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + enum mlx5dr_icm_chunk_size new_size; + + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size); + new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); + + if (new_size == cur_htbl->chunk_size) + return NULL; /* Skip rehash, we already at the max size */ + + return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, + update_list, new_size); +} + +static struct mlx5dr_ste * +dr_rule_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_ste *new_ste; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste); + if (!new_ste) + goto free_send_info; + + if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n"); + goto err_exit; + } + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + ste->htbl->ctrl.num_of_collisions++; + ste->htbl->ctrl.num_of_valid_entries++; + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); +free_send_info: + kfree(ste_info); + return NULL; +} + +static void dr_rule_remove_action_members(struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + struct mlx5dr_rule_action_member *tmp; + + list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) { + list_del(&action_mem->list); + refcount_dec(&action_mem->action->refcount); + kvfree(action_mem); + } +} + +static int dr_rule_add_action_members(struct mlx5dr_rule *rule, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule_action_member *action_mem; + int i; + + for (i = 0; i < num_actions; i++) { + action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL); + if (!action_mem) + goto free_action_members; + + action_mem->action = actions[i]; + INIT_LIST_HEAD(&action_mem->list); + list_add_tail(&action_mem->list, &rule->rule_actions_list); + refcount_inc(&action_mem->action->refcount); + } + + return 0; + +free_action_members: + dr_rule_remove_action_members(rule); + return -ENOMEM; +} + +/* While the pointer of ste is no longer valid, like while moving ste to be + * the first in the miss_list, and to be in the origin table, + * all rule-members that are attached to this ste should update their ste member + * to the new pointer + */ +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste, + struct mlx5dr_ste *new_ste) +{ + struct mlx5dr_rule_member *rule_mem; + + if (!list_empty(&ste->rule_list)) + list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list) + rule_mem->ste = new_ste; +} + +static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + struct mlx5dr_rule_member *rule_mem; + struct mlx5dr_rule_member *tmp_mem; + + if (list_empty(&nic_rule->rule_members_list)) + return; + list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) { + list_del(&rule_mem->list); + list_del(&rule_mem->use_ste_list); + mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher); + kvfree(rule_mem); + } +} + +static u16 dr_get_bits_per_mask(u16 byte_mask) +{ + u16 bits = 0; + + while (byte_mask) { + byte_mask = byte_mask & (byte_mask - 1); + bits++; + } + + return bits; +} + +static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) + return false; + + if (!ctrl->may_grow) + return false; + + if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size) + return false; + + if (ctrl->num_of_collisions >= ctrl->increase_threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold) + return true; + + return false; +} + +static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste) +{ + struct mlx5dr_rule_member *rule_mem; + + rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL); + if (!rule_mem) + return -ENOMEM; + + INIT_LIST_HEAD(&rule_mem->list); + INIT_LIST_HEAD(&rule_mem->use_ste_list); + + rule_mem->ste = ste; + list_add_tail(&rule_mem->list, &nic_rule->rule_members_list); + + list_add_tail(&rule_mem->use_ste_list, &ste->rule_list); + + return 0; +} + +static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste *last_ste, + u8 *hw_ste_arr, + u32 new_hw_ste_arr_sz) +{ + struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher; + struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES]; + u8 num_of_builders = nic_matcher->num_of_builders; + struct mlx5dr_matcher *matcher = rule->matcher; + u8 *curr_hw_ste, *prev_hw_ste; + struct mlx5dr_ste *action_ste; + int i, k, ret; + + /* Two cases: + * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste + * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added + * to support the action. + */ + if (num_of_builders == new_hw_ste_arr_sz) + return 0; + + for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { + curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; + prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE); + action_ste = dr_rule_create_collision_htbl(matcher, + nic_matcher, + curr_hw_ste); + if (!action_ste) + return -ENOMEM; + + mlx5dr_ste_get(action_ste); + + /* While free ste we go over the miss list, so add this ste to the list */ + list_add_tail(&action_ste->miss_list_node, + mlx5dr_ste_get_miss_list(action_ste)); + + ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]), + GFP_KERNEL); + if (!ste_info_arr[k]) + goto err_exit; + + /* Point current ste to the new action */ + mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl); + ret = dr_rule_add_member(nic_rule, action_ste); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n"); + goto free_ste_info; + } + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, + curr_hw_ste, + ste_info_arr[k], + send_ste_list, false); + } + + return 0; + +free_ste_info: + kfree(ste_info_arr[k]); +err_exit: + mlx5dr_ste_put(action_ste, matcher, nic_matcher); + return -ENOMEM; +} + +static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste *ste, + u8 ste_location, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + + /* Take ref on table, only on first time this ste is used */ + mlx5dr_htbl_get(cur_htbl); + + /* new entry -> new branch */ + list_add_tail(&ste->miss_list_node, miss_list); + + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + ste->ste_chain_location = ste_location; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto clean_ste_setting; + + if (mlx5dr_ste_create_next_htbl(matcher, + nic_matcher, + ste, + hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto clean_ste_info; + } + + cur_htbl->ctrl.num_of_valid_entries++; + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + return 0; + +clean_ste_info: + kfree(ste_info); +clean_ste_setting: + list_del_init(&ste->miss_list_node); + mlx5dr_htbl_put(cur_htbl); + + return -ENOMEM; +} + +static struct mlx5dr_ste * +dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *cur_htbl, + u8 *hw_ste, + u8 ste_location, + struct mlx5dr_ste_htbl **put_htbl) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *matched_ste; + struct list_head *miss_list; + bool skip_rehash = false; + struct mlx5dr_ste *ste; + int index; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + +again: + index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); + miss_list = &cur_htbl->chunk->miss_list[index]; + ste = &cur_htbl->ste_arr[index]; + + if (mlx5dr_ste_not_used_ste(ste)) { + if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, + ste, ste_location, + hw_ste, miss_list, + send_ste_list)) + return NULL; + } else { + /* Hash table index in use, check if this ste is in the miss list */ + matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste); + if (matched_ste) { + /* If it is last STE in the chain, and has the same tag + * it means that all the previous stes are the same, + * if so, this rule is duplicated. + */ + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location)) + return matched_ste; + + mlx5dr_dbg(dmn, "Duplicate rule inserted\n"); + } + + if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { + /* Hash table index in use, try to resize of the hash */ + skip_rehash = true; + + /* Hold the table till we update. + * Release in dr_rule_create_rule() + */ + *put_htbl = cur_htbl; + mlx5dr_htbl_get(cur_htbl); + + new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, + ste_location, send_ste_list); + if (!new_htbl) { + mlx5dr_htbl_put(cur_htbl); + mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk_size); + } else { + cur_htbl = new_htbl; + } + goto again; + } else { + /* Hash table index in use, add another collision (miss) */ + ste = dr_rule_handle_collision(matcher, + nic_matcher, + ste, + hw_ste, + miss_list, + send_ste_list); + if (!ste) { + mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n", + index); + return NULL; + } + } + } + return ste; +} + +static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value, + u32 s_idx, u32 e_idx) +{ + u32 i; + + for (i = s_idx; i < e_idx; i++) { + if (value[i] & ~mask[i]) { + pr_info("Rule parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static bool dr_rule_verify(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + struct mlx5dr_match_param *param) +{ + u8 match_criteria = matcher->match_criteria; + size_t value_size = value->match_sz; + u8 *mask_p = (u8 *)&matcher->mask; + u8 *param_p = (u8 *)param; + u32 s_idx, e_idx; + + if (!value_size || + (value_size > sizeof(struct mlx5dr_match_param) || + (value_size % sizeof(u32)))) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + return false; + } + + mlx5dr_ste_copy_param(matcher->match_criteria, param, value); + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + s_idx = offsetof(struct mlx5dr_match_param, outer); + e_idx = min(s_idx + sizeof(param->outer), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + s_idx = offsetof(struct mlx5dr_match_param, misc); + e_idx = min(s_idx + sizeof(param->misc), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + s_idx = offsetof(struct mlx5dr_match_param, inner); + e_idx = min(s_idx + sizeof(param->inner), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + s_idx = offsetof(struct mlx5dr_match_param, misc2); + e_idx = min(s_idx + sizeof(param->misc2), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + s_idx = offsetof(struct mlx5dr_match_param, misc3); + e_idx = min(s_idx + sizeof(param->misc3), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + dr_rule_clean_rule_members(rule, nic_rule); + return 0; +} + +static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule) +{ + dr_rule_destroy_rule_nic(rule, &rule->rx); + dr_rule_destroy_rule_nic(rule, &rule->tx); + return 0; +} + +static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_rule_destroy_rule_nic(rule, &rule->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_rule_destroy_rule_nic(rule, &rule->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_rule_destroy_rule_fdb(rule); + break; + default: + return -EINVAL; + } + + dr_rule_remove_action_members(rule); + kfree(rule); + return 0; +} + +static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6) + return DR_RULE_IPV6; + + return DR_RULE_IPV4; +} + +static bool dr_rule_skip(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (domain != MLX5DR_DOMAIN_TYPE_FDB) + return false; + + if (mask->misc.source_port) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if (value->misc.source_port != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if (value->misc.source_port == WIRE_PORT) + return true; + } + + /* Metadata C can be used to describe the source vport */ + if (mask->misc2.metadata_reg_c_0) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT) + return true; + } + return false; +} + +static int +dr_rule_create_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *htbl = NULL; + struct mlx5dr_ste_htbl *cur_htbl; + struct mlx5dr_ste *ste = NULL; + LIST_HEAD(send_ste_list); + u8 *hw_ste_arr = NULL; + u32 new_hw_ste_arr_sz; + int ret, i; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + INIT_LIST_HEAD(&nic_rule->rule_members_list); + + if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param)) + return 0; + + ret = mlx5dr_matcher_select_builders(matcher, + nic_matcher, + dr_rule_get_ipv(¶m->outer), + dr_rule_get_ipv(¶m->inner)); + if (ret) + goto out_err; + + hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); + if (!hw_ste_arr) { + ret = -ENOMEM; + goto out_err; + } + + /* Set the tag values inside the ste array */ + ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); + if (ret) + goto free_hw_ste; + + /* Set the actions values/addresses inside the ste array */ + ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, + num_actions, hw_ste_arr, + &new_hw_ste_arr_sz); + if (ret) + goto free_hw_ste; + + cur_htbl = nic_matcher->s_htbl; + + /* Go over the array of STEs, and build dr_ste accordingly. + * The loop is over only the builders which are equal or less to the + * number of stes, in case we have actions that lives in other stes. + */ + for (i = 0; i < nic_matcher->num_of_builders; i++) { + /* Calculate CRC and keep new ste entry */ + u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE); + + ste = dr_rule_handle_ste_branch(rule, + nic_rule, + &send_ste_list, + cur_htbl, + cur_hw_ste_ent, + i + 1, + &htbl); + if (!ste) { + mlx5dr_err(dmn, "Failed creating next branch\n"); + ret = -ENOENT; + goto free_rule; + } + + cur_htbl = ste->next_htbl; + + /* Keep all STEs in the rule struct */ + ret = dr_rule_add_member(nic_rule, ste); + if (ret) { + mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i); + goto free_ste; + } + + mlx5dr_ste_get(ste); + } + + /* Connect actions */ + ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list, + ste, hw_ste_arr, new_hw_ste_arr_sz); + if (ret) { + mlx5dr_dbg(dmn, "Failed apply actions\n"); + goto free_rule; + } + ret = dr_rule_send_update_list(&send_ste_list, dmn, true); + if (ret) { + mlx5dr_err(dmn, "Failed sending ste!\n"); + goto free_rule; + } + + if (htbl) + mlx5dr_htbl_put(htbl); + + kfree(hw_ste_arr); + + return 0; + +free_ste: + mlx5dr_ste_put(ste, matcher, nic_matcher); +free_rule: + dr_rule_clean_rule_members(rule, nic_rule); + /* Clean all ste_info's */ + list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) { + list_del(&ste_info->send_list); + kfree(ste_info); + } +free_hw_ste: + kfree(hw_ste_arr); +out_err: + return ret; +} + +static int +dr_rule_create_rule_fdb(struct mlx5dr_rule *rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_match_param copy_param = {}; + int ret; + + /* Copy match_param since they will be consumed during the first + * nic_rule insertion. + */ + memcpy(©_param, param, sizeof(struct mlx5dr_match_param)); + + ret = dr_rule_create_rule_nic(rule, &rule->rx, param, + num_actions, actions); + if (ret) + return ret; + + ret = dr_rule_create_rule_nic(rule, &rule->tx, ©_param, + num_actions, actions); + if (ret) + goto destroy_rule_nic_rx; + + return 0; + +destroy_rule_nic_rx: + dr_rule_destroy_rule_nic(rule, &rule->rx); + return ret; +} + +static struct mlx5dr_rule * +dr_rule_create_rule(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param param = {}; + struct mlx5dr_rule *rule; + int ret; + + if (!dr_rule_verify(matcher, value, ¶m)) + return NULL; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->matcher = matcher; + INIT_LIST_HEAD(&rule->rule_actions_list); + + ret = dr_rule_add_action_members(rule, num_actions, actions); + if (ret) + goto free_rule; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + rule->rx.nic_matcher = &matcher->rx; + ret = dr_rule_create_rule_nic(rule, &rule->rx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_nic(rule, &rule->tx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + rule->rx.nic_matcher = &matcher->rx; + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_fdb(rule, ¶m, + num_actions, actions); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto remove_action_members; + + return rule; + +remove_action_members: + dr_rule_remove_action_members(rule); +free_rule: + kfree(rule); + mlx5dr_info(dmn, "Failed creating rule\n"); + return NULL; +} + +struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule *rule; + + mutex_lock(&matcher->tbl->dmn->mutex); + refcount_inc(&matcher->refcount); + + rule = dr_rule_create_rule(matcher, value, num_actions, actions); + if (!rule) + refcount_dec(&matcher->refcount); + + mutex_unlock(&matcher->tbl->dmn->mutex); + + return rule; +} + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_table *tbl = rule->matcher->tbl; + int ret; + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_rule_destroy_rule(rule); + + mutex_unlock(&tbl->dmn->mutex); + + if (!ret) + refcount_dec(&matcher->refcount); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c new file mode 100644 index 000000000000..c7f10d4f8f8d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -0,0 +1,978 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/smp.h> +#include "dr_types.h" + +#define QUEUE_SIZE 128 +#define SIGNAL_PER_DIV_QUEUE 16 +#define TH_NUMS_TO_DRAIN 2 + +enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; + +struct dr_data_seg { + u64 addr; + u32 length; + u32 lkey; + unsigned int send_flags; +}; + +struct postsend_info { + struct dr_data_seg write; + struct dr_data_seg read; + u64 remote_addr; + u32 rkey; +}; + +struct dr_qp_rtr_attr { + struct mlx5dr_cmd_gid_attr dgid_attr; + enum ib_mtu mtu; + u32 qp_num; + u16 port_num; + u8 min_rnr_timer; + u8 sgid_index; + u16 udp_src_port; +}; + +struct dr_qp_rts_attr { + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; +}; + +struct dr_qp_init_attr { + u32 cqn; + u32 pdn; + u32 max_send_wr; + struct mlx5_uars_page *uar; +}; + +static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) +{ + unsigned int idx; + u8 opcode; + + opcode = get_cqe_opcode(cqe64); + if (opcode == MLX5_CQE_REQ_ERR) { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + } else if (opcode == MLX5_CQE_RESP_ERR) { + ++dr_cq->qp->sq.cc; + } else { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + + return CQ_OK; + } + + return CQ_POLL_ERR; +} + +static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) +{ + struct mlx5_cqe64 *cqe64; + int err; + + cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); + if (!cqe64) + return CQ_EMPTY; + + mlx5_cqwq_pop(&dr_cq->wq); + err = dr_parse_cqe(dr_cq, cqe64); + mlx5_cqwq_update_db_record(&dr_cq->wq); + + return err; +} + +static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) +{ + int npolled; + int err = 0; + + for (npolled = 0; npolled < ne; ++npolled) { + err = dr_cq_poll_one(dr_cq); + if (err != CQ_OK) + break; + } + + return err == CQ_POLL_ERR ? err : npolled; +} + +static void dr_qp_event(struct mlx5_core_qp *mqp, int event) +{ + pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn); +} + +static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, + struct dr_qp_init_attr *attr) +{ + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + struct mlx5_wq_param wqp; + struct mlx5dr_qp *dr_qp; + int inlen; + void *qpc; + void *in; + int err; + + dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); + if (!dr_qp) + return NULL; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + dr_qp->rq.pc = 0; + dr_qp->rq.cc = 0; + dr_qp->rq.wqe_cnt = 4; + dr_qp->sq.pc = 0; + dr_qp->sq.cc = 0; + dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, + &dr_qp->wq_ctrl); + if (err) { + mlx5_core_info(mdev, "Can't create QP WQ\n"); + goto err_wq; + } + + dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, + sizeof(dr_qp->sq.wqe_head[0]), + GFP_KERNEL); + + if (!dr_qp->sq.wqe_head) { + mlx5_core_warn(mdev, "Can't allocate wqe head\n"); + goto err_wqe_head; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + dr_qp->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, attr->pdn); + MLX5_SET(qpc, qpc, uar_page, attr->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); + MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + + err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen); + kfree(in); + + if (err) { + mlx5_core_warn(mdev, " Can't create QP\n"); + goto err_in; + } + dr_qp->mqp.event = dr_qp_event; + dr_qp->uar = attr->uar; + + return dr_qp; + +err_in: + kfree(dr_qp->sq.wqe_head); +err_wqe_head: + mlx5_wq_destroy(&dr_qp->wq_ctrl); +err_wq: + kfree(dr_qp); + return NULL; +} + +static void dr_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp) +{ + mlx5_core_destroy_qp(mdev, &dr_qp->mqp); + kfree(dr_qp->sq.wqe_head); + mlx5_wq_destroy(&dr_qp->wq_ctrl); + kfree(dr_qp); +} + +static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) +{ + dma_wmb(); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); + + /* After wmb() the hw aware of new work */ + wmb(); + + mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, int nreq) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_ctrl_seg *wq_ctrl; + struct mlx5_wqe_data_seg *wq_dseg; + unsigned int size; + unsigned int idx; + + size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + + sizeof(*wq_raddr) / 16; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + wq_ctrl->imm = 0; + wq_ctrl->fm_ce_se = (data_seg->send_flags) ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8); + wq_raddr = (void *)(wq_ctrl + 1); + wq_raddr->raddr = cpu_to_be64(remote_addr); + wq_raddr->rkey = cpu_to_be32(rkey); + wq_raddr->reserved = 0; + + wq_dseg = (void *)(wq_raddr + 1); + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; + + if (nreq) + dr_cmd_notify_hw(dr_qp, wq_ctrl); +} + +static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) +{ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, 1); +} + +/** + * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent + * with send_list parameters: + * + * @ste: The data that attached to this specific ste + * @size: of data to write + * @offset: of the data from start of the hw_ste entry + * @data: data + * @ste_info: ste to be sent with send_list + * @send_list: to append into it + * @copy_data: if true indicates that the data should be kept because + * it's not backuped any where (like in re-hash). + * if false, it lets the data to be updated after + * it was added to the list. + */ +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data) +{ + ste_info->size = size; + ste_info->ste = ste; + ste_info->offset = offset; + + if (copy_data) { + memcpy(ste_info->data_cont, data, size); + ste_info->data = ste_info->data_cont; + } else { + ste_info->data = data; + } + + list_add_tail(&ste_info->send_list, send_list); +} + +/* The function tries to consume one wc each time, unless the queue is full, in + * that case, which means that the hw is behind the sw in a full queue len + * the function will drain the cq till it empty. + */ +static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + bool is_drain = false; + int ne; + + if (send_ring->pending_wqe < send_ring->signal_th) + return 0; + + /* Queue is full start drain it */ + if (send_ring->pending_wqe >= + dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) + is_drain = true; + + do { + ne = dr_poll_cq(send_ring->cq, 1); + if (ne < 0) + return ne; + else if (ne == 1) + send_ring->pending_wqe -= send_ring->signal_th; + } while (is_drain && send_ring->pending_wqe); + + return 0; +} + +static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + + send_ring->pending_wqe++; + send_info->read.length = send_info->write.length; + /* Read into the same write area */ + send_info->read.addr = (uintptr_t)send_info->write.addr; + send_info->read.lkey = send_ring->mr->mkey.key; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->read.send_flags = IB_SEND_SIGNALED; + else + send_info->read.send_flags = 0; +} + +static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, + struct postsend_info *send_info) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + u32 buff_offset; + int ret; + + ret = dr_handle_pending_wc(dmn, send_ring); + if (ret) + return ret; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey.key; + } + + send_ring->tx_head++; + dr_fill_data_segs(send_ring, send_info); + dr_post_send(send_ring->qp, send_info); + + return 0; +} + +static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 **data, + u32 *byte_size, + int *iterations, + int *num_stes) +{ + int alloc_size; + + if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { + *iterations = htbl->chunk->byte_size / + dmn->send_ring->max_post_send_size; + *byte_size = dmn->send_ring->max_post_send_size; + alloc_size = *byte_size; + *num_stes = *byte_size / DR_STE_SIZE; + } else { + *iterations = 1; + *num_stes = htbl->chunk->num_of_entries; + alloc_size = *num_stes * DR_STE_SIZE; + } + + *data = kzalloc(alloc_size, GFP_KERNEL); + if (!*data) + return -ENOMEM; + + return 0; +} + +/** + * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. + * + * @dmn: Domain + * @ste: The ste struct that contains the data (at + * least part of it) + * @data: The real data to send size data + * @size: for writing. + * @offset: The offset from the icm mapped data to + * start write to this for write only part of the + * buffer. + * + * Return: 0 on success. + */ +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, + u8 *data, u16 size, u16 offset) +{ + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = size; + send_info.write.lkey = 0; + send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; + send_info.rkey = ste->htbl->chunk->rkey; + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask) +{ + u32 byte_size = htbl->chunk->byte_size; + int num_stes_per_iter; + int iterations; + u8 *data; + int ret; + int i; + int j; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes_per_iter); + if (ret) + return ret; + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u32 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + /* Copy all ste's on the data buffer + * need to add the bit_mask + */ + for (j = 0; j < num_stes_per_iter; j++) { + u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; + u32 ste_off = j * DR_STE_SIZE; + + if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { + memcpy(data + ste_off, + formatted_ste, DR_STE_SIZE); + } else { + /* Copy data */ + memcpy(data + ste_off, + htbl->ste_arr[ste_index + j].hw_ste, + DR_STE_SIZE_REDUCED); + /* Copy bit_mask */ + memcpy(data + ste_off + DR_STE_SIZE_REDUCED, + mask, DR_STE_SIZE_MASK); + } + } + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +/* Initialize htble with default STEs */ +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste) +{ + u32 byte_size = htbl->chunk->byte_size; + int iterations; + int num_stes; + u8 *data; + int ret; + int i; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes); + if (ret) + return ret; + + for (i = 0; i < num_stes; i++) { + u8 *copy_dst; + + /* Copy the same ste on the data buffer */ + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + + if (update_hw_ste) { + /* Copy the reduced ste to hash table ste_arr */ + copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); + } + } + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u8 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)action->rewrite.data; + send_info.write.length = action->rewrite.chunk->byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = action->rewrite.chunk->mr_addr; + send_info.rkey = action->rewrite.chunk->rkey; + + mutex_lock(&dmn->mutex); + ret = dr_postsend_icm_data(dmn, &send_info); + mutex_unlock(&dmn->mutex); + + return ret; +} + +static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + int port) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rts_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, log_ack_req_freq, 0); + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rtr_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, mtu, attr->mtu); + MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); + MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + attr->sgid_index); + + if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + attr->udp_src_port); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; + struct dr_qp_rts_attr rts_attr = {}; + struct dr_qp_rtr_attr rtr_attr = {}; + enum ib_mtu mtu = IB_MTU_1024; + u16 gid_index = 0; + int port = 1; + int ret; + + /* Init */ + ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); + if (ret) + return ret; + + /* RTR */ + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.mtu = mtu; + rtr_attr.qp_num = dr_qp->mqp.qpn; + rtr_attr.min_rnr_timer = 12; + rtr_attr.port_num = port; + rtr_attr.sgid_index = gid_index; + rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); + if (ret) + return ret; + + /* RTS */ + rts_attr.timeout = 14; + rts_attr.retry_cnt = 7; + rts_attr.rnr_retry = 7; + + ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); + if (ret) + return ret; + + return 0; +} + +static void dr_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_uars_page *uar, + size_t ncqe) +{ + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + struct mlx5dr_cq *cq; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + int vector; + u32 i; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return NULL; + + ncqe = roundup_pow_of_two(ncqe); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + goto out; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + goto err_cqwq; + + vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); + + cq->mcq.event = dr_cq_event; + + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.vector = 0; + cq->mcq.irqn = irqn; + cq->mcq.uar = uar; + + return cq; + +err_cqwq: + mlx5_wq_destroy(&cq->wq_ctrl); +out: + kfree(cq); + return NULL; +} + +static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); + kfree(cq); +} + +static int +dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); +} + +static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, + u32 pdn, void *buf, size_t size) +{ + struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + if (!mr) + return NULL; + + dma_device = &mdev->pdev->dev; + dma_addr = dma_map_single(dma_device, buf, size, + DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_warn(mdev, "Can't dma buf\n"); + kfree(mr); + return NULL; + } + + err = dr_create_mkey(mdev, pdn, &mr->mkey); + if (err) { + mlx5_core_warn(mdev, "Can't create mkey\n"); + dma_unmap_single(dma_device, dma_addr, size, + DMA_BIDIRECTIONAL); + kfree(mr); + return NULL; + } + + mr->dma_addr = dma_addr; + mr->size = size; + mr->addr = buf; + + return mr; +} + +static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) +{ + mlx5_core_destroy_mkey(mdev, &mr->mkey); + dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, + DMA_BIDIRECTIONAL); + kfree(mr); +} + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) +{ + struct dr_qp_init_attr init_attr = {}; + int cq_size; + int size; + int ret; + + dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); + if (!dmn->send_ring) + return -ENOMEM; + + cq_size = QUEUE_SIZE + 1; + dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); + if (!dmn->send_ring->cq) { + ret = -ENOMEM; + goto free_send_ring; + } + + init_attr.cqn = dmn->send_ring->cq->mcq.cqn; + init_attr.pdn = dmn->pdn; + init_attr.uar = dmn->uar; + init_attr.max_send_wr = QUEUE_SIZE; + + dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); + if (!dmn->send_ring->qp) { + ret = -ENOMEM; + goto clean_cq; + } + + dmn->send_ring->cq->qp = dmn->send_ring->qp; + + dmn->info.max_send_wr = QUEUE_SIZE; + dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, + DR_STE_SIZE); + + dmn->send_ring->signal_th = dmn->info.max_send_wr / + SIGNAL_PER_DIV_QUEUE; + + /* Prepare qp to be used */ + ret = dr_prepare_qp_to_rts(dmn); + if (ret) + goto clean_qp; + + dmn->send_ring->max_post_send_size = + mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, + DR_ICM_TYPE_STE); + + /* Allocating the max size as a buffer for writing */ + size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; + dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); + if (!dmn->send_ring->buf) { + ret = -ENOMEM; + goto clean_qp; + } + + dmn->send_ring->buf_size = size; + + dmn->send_ring->mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->buf, size); + if (!dmn->send_ring->mr) { + ret = -ENOMEM; + goto free_mem; + } + + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->sync_buff, + MIN_READ_SYNC); + if (!dmn->send_ring->sync_mr) { + ret = -ENOMEM; + goto clean_mr; + } + + return 0; + +clean_mr: + dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); +free_mem: + kfree(dmn->send_ring->buf); +clean_qp: + dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); +clean_cq: + dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); +free_send_ring: + kfree(dmn->send_ring); + + return ret; +} + +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + dr_destroy_qp(dmn->mdev, send_ring->qp); + dr_destroy_cq(dmn->mdev, send_ring->cq); + dr_dereg_mr(dmn->mdev, send_ring->sync_mr); + dr_dereg_mr(dmn->mdev, send_ring->mr); + kfree(send_ring->buf); + kfree(send_ring); +} + +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + struct postsend_info send_info = {}; + u8 data[DR_STE_SIZE]; + int num_of_sends_req; + int ret; + int i; + + /* Sending this amount of requests makes sure we will get drain */ + num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; + + /* Send fake requests forcing the last to be signaled */ + send_info.write.addr = (uintptr_t)data; + send_info.write.length = DR_STE_SIZE; + send_info.write.lkey = 0; + /* Using the sync_mr in order to write/read */ + send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; + send_info.rkey = send_ring->sync_mr->mkey.key; + + for (i = 0; i < num_of_sends_req; i++) { + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + } + + ret = dr_handle_pending_wc(dmn, send_ring); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c new file mode 100644 index 000000000000..c6c7d1defbd7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -0,0 +1,2362 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include <linux/crc32.h> +#include "dr_types.h" + +#define DR_STE_CRC_POLY 0xEDB88320L +#define STE_IPV4 0x1 +#define STE_IPV6 0x2 +#define STE_TCP 0x1 +#define STE_UDP 0x2 +#define STE_SPI 0x3 +#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 +#define STE_SVLAN 0x1 +#define STE_CVLAN 0x2 + +#define DR_STE_ENABLE_FLOW_TAG BIT(31) + +/* Set to STE a specific value using DR_STE_SET */ +#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \ + if ((spec)->s_fname) { \ + MLX5_SET(ste_##lookup_type, tag, t_fname, value); \ + (spec)->s_fname = 0; \ + } \ +} while (0) + +/* Set to STE spec->s_fname to tag->t_fname */ +#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname) + +/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1) + +/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname) + +#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \ + MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \ +} while (0) + +#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) +#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \ + ((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \ + (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \ + MLX5DR_STE_LU_TYPE_##lookup_type##_O) + +enum dr_ste_tunl_action { + DR_STE_TUNL_ACTION_NONE = 0, + DR_STE_TUNL_ACTION_ENABLE = 1, + DR_STE_TUNL_ACTION_DECAP = 2, + DR_STE_TUNL_ACTION_L3_DECAP = 3, + DR_STE_TUNL_ACTION_POP_VLAN = 4, +}; + +enum dr_ste_action_type { + DR_STE_ACTION_TYPE_PUSH_VLAN = 1, + DR_STE_ACTION_TYPE_ENCAP_L3 = 3, + DR_STE_ACTION_TYPE_ENCAP = 4, +}; + +struct dr_hw_ste_format { + u8 ctrl[DR_STE_SIZE_CTRL]; + u8 tag[DR_STE_SIZE_TAG]; + u8 mask[DR_STE_SIZE_MASK]; +}; + +static u32 dr_ste_crc32_calc(const void *input_data, size_t length) +{ + u32 crc = crc32(0, input_data, length); + + return htonl(crc); +} + +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 masked[DR_STE_SIZE_TAG] = {}; + u32 crc32, index; + u16 bit; + int i; + + /* Don't calculate CRC if the result is predicted */ + if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0) + return 0; + + /* Mask tag using byte mask, bit per byte */ + bit = 1 << (DR_STE_SIZE_TAG - 1); + for (i = 0; i < DR_STE_SIZE_TAG; i++) { + if (htbl->byte_mask & bit) + masked[i] = hw_ste->tag[i]; + + bit = bit >> 1; + } + + crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG); + index = crc32 & (htbl->chunk->num_of_entries - 1); + + return index; +} + +static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) +{ + u16 byte_mask = 0; + int i; + + for (i = 0; i < DR_STE_SIZE_MASK; i++) { + byte_mask = byte_mask << 1; + if (bit_mask[i] == 0xff) + byte_mask |= 1; + } + return byte_mask; +} + +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK); +} + +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer, + DR_STE_ENABLE_FLOW_TAG | flow_tag); +} + +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + /* This can be used for both rx_steering_mult and for sx_transmit */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16); +} + +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1); +} + +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, + bool go_back) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + DR_STE_ACTION_TYPE_PUSH_VLAN); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); + /* Due to HW limitation we need to set this bit, otherwise reforamt + + * push vlan will not work. + */ + if (go_back) + mlx5dr_ste_set_go_back_bit(hw_ste_p); +} + +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id); +} + +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_DECAP); +} + +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_POP_VLAN); +} + +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_L3_DECAP); + MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); +} + +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); +} + +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, entry_type); +} + +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions, + num_of_actions); + MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer, + re_write_index); +} + +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi); +} + +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, + u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); + MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type); + MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + + /* Set GVMI once, this is the same for RX/TX + * bits 63_48 of next table base / miss address encode the next GVMI + */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi); +} + +static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste) +{ + memset(&hw_ste->tag, 0, sizeof(hw_ste->tag)); + memset(&hw_ste->mask, 0, sizeof(hw_ste->mask)); +} + +static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) +{ + hw_ste->tag[0] = 0xdc; + hw_ste->mask[0] = 0; +} + +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste) +{ + u64 index = + (MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) | + MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26); + + return index << 6; +} + +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index); +} + +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index; +} + +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index; +} + +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return &ste->htbl->miss_list[index]; +} + +static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask); + MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type); + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + + dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste); +} + +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location) +{ + return ste_location == nic_matcher->num_of_builders; +} + +/* Replace relevant fields, except of: + * htbl - keep the origin htbl + * miss_list + list - already took the src from the list. + * icm_addr/mr_addr - depends on the hosting table. + * + * Before: + * | a | -> | b | -> | c | -> + * + * After: + * | a | -> | c | -> + * While the data that was in b copied to a. + */ +static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) +{ + memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED); + dst->next_htbl = src->next_htbl; + if (dst->next_htbl) + dst->next_htbl->pointing_ste = dst; + + dst->refcount = src->refcount; + + INIT_LIST_HEAD(&dst->rule_list); + list_splice_tail_init(&src->rule_list, &dst->rule_list); +} + +/* Free ste which is the head and the only one in miss_list */ +static void +dr_ste_remove_head_ste(struct mlx5dr_ste *ste, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + u8 tmp_data_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste tmp_ste = {}; + u64 miss_addr; + + tmp_ste.hw_ste = tmp_data_ste; + + /* Use temp ste because dr_ste_always_miss_addr + * touches bit_mask area which doesn't exist at ste->hw_ste. + */ + memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + miss_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr); + memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED); + + list_del_init(&ste->miss_list_node); + + /* Write full STE size in order to have "always_miss" */ + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, tmp_data_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste which is the head but NOT the only one in miss_list: + * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 + */ +static void +dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) + +{ + struct mlx5dr_ste_htbl *next_miss_htbl; + + next_miss_htbl = next_ste->htbl; + + /* Remove from the miss_list the next_ste before copy */ + list_del_init(&next_ste->miss_list_node); + + /* All rule-members that use next_ste should know about that */ + mlx5dr_rule_update_rule_member(next_ste, ste); + + /* Move data from next into ste */ + dr_ste_replace(ste, next_ste); + + /* Del the htbl that contains the next_ste. + * The origin htbl stay with the same number of entries. + */ + mlx5dr_htbl_put(next_miss_htbl); + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED, + 0, ste->hw_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_collisions--; + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste that is located in the middle of the miss list: + * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_| + */ +static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + struct mlx5dr_ste *prev_ste; + u64 miss_addr; + + prev_ste = list_prev_entry(ste, miss_list_node); + if (WARN_ON(!prev_ste)) + return; + + miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste); + mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr); + + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0, + prev_ste->hw_ste, ste_info, + send_ste_list, true /* Copy data*/); + + list_del_init(&ste->miss_list_node); + + stats_tbl->ctrl.num_of_valid_entries--; + stats_tbl->ctrl.num_of_collisions--; +} + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info ste_info_head; + struct mlx5dr_ste *next_ste, *first_ste; + bool put_on_origin_table = true; + struct mlx5dr_ste_htbl *stats_tbl; + LIST_HEAD(send_ste_list); + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + stats_tbl = first_ste->htbl; + + /* Two options: + * 1. ste is head: + * a. head ste is the only ste in the miss list + * b. head ste is not the only ste in the miss-list + * 2. ste is not head + */ + if (first_ste == ste) { /* Ste is the head */ + struct mlx5dr_ste *last_ste; + + last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + if (last_ste == first_ste) + next_ste = NULL; + else + next_ste = list_next_entry(ste, miss_list_node); + + if (!next_ste) { + /* One and only entry in the list */ + dr_ste_remove_head_ste(ste, nic_matcher, + &ste_info_head, + &send_ste_list, + stats_tbl); + } else { + /* First but not only entry in the list */ + dr_ste_replace_head_ste(ste, next_ste, &ste_info_head, + &send_ste_list, stats_tbl); + put_on_origin_table = false; + } + } else { /* Ste in the middle of the list */ + dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl); + } + + /* Update HW */ + list_for_each_entry_safe(cur_ste_info, tmp_ste_info, + &send_ste_list, send_list) { + list_del(&cur_ste_info->send_list); + mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste, + cur_ste_info->data, cur_ste_info->size, + cur_ste_info->offset); + } + + if (put_on_origin_table) + mlx5dr_htbl_put(ste->htbl); +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst) +{ + struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src; + struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst; + + return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); +} + +void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + /* Miss address for TX and RX STEs located in the same offsets */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index); +} + +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr) +{ + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + mlx5dr_ste_set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); +} + +/* The assumption here is that we don't update the ste->hw_ste if it is not + * used ste, so it will be all zero, checking the next_lu_type. + */ +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste; + + if (MLX5_GET(ste_general, hw_ste, next_lu_type) == + MLX5DR_STE_LU_TYPE_NOP) + return true; + + return false; +} + +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste) +{ + return !ste->refcount; +} + +/* Init one ste as a pattern for ste data array */ +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info) +{ + struct mlx5dr_ste ste = {}; + + mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi); + ste.hw_ste = formatted_ste; + + if (connect_info->type == CONNECT_HIT) + dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl); + else + mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr); +} + +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste) +{ + u8 formatted_ste[DR_STE_SIZE] = {}; + + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + htbl, + formatted_ste, + connect_info); + + return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste); +} + +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *next_htbl; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) { + u8 next_lu_type; + u16 byte_mask; + + next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type); + byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask); + + next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + log_table_size, + next_lu_type, + byte_mask); + if (!next_htbl) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + return -ENOMEM; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, + &info, false)) { + mlx5dr_info(dmn, "Failed writing table to HW\n"); + goto free_table; + } + + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl); + ste->next_htbl = next_htbl; + next_htbl->pointing_ste = ste; + } + + return 0; + +free_table: + mlx5dr_ste_htbl_free(next_htbl); + return -ENOENT; +} + +static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int num_of_entries; + + htbl->ctrl.may_grow = true; + + if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) + htbl->ctrl.may_grow = false; + + /* Threshold is 50%, one is added to table of size 1 */ + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size); + ctrl->increase_threshold = (num_of_entries + 1) / 2; +} + +struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask) +{ + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste_htbl *htbl; + int i; + + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return NULL; + + chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size); + if (!chunk) + goto out_free_htbl; + + htbl->chunk = chunk; + htbl->lu_type = lu_type; + htbl->byte_mask = byte_mask; + htbl->ste_arr = chunk->ste_arr; + htbl->hw_ste_arr = chunk->hw_ste_arr; + htbl->miss_list = chunk->miss_list; + htbl->refcount = 0; + + for (i = 0; i < chunk->num_of_entries; i++) { + struct mlx5dr_ste *ste = &htbl->ste_arr[i]; + + ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + ste->htbl = htbl; + ste->refcount = 0; + INIT_LIST_HEAD(&ste->miss_list_node); + INIT_LIST_HEAD(&htbl->miss_list[i]); + INIT_LIST_HEAD(&ste->rule_list); + } + + htbl->chunk_size = chunk_size; + dr_ste_set_ctrl(htbl); + return htbl; + +out_free_htbl: + kfree(htbl); + return NULL; +} + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl) +{ + if (htbl->refcount) + return -EBUSY; + + mlx5dr_icm_free_chunk(htbl->chunk); + kfree(htbl); + return 0; +} + +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (mask->misc.source_port && mask->misc.source_port != 0xffff) { + mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_build *sb; + int ret, i; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, value); + if (ret) + return ret; + + sb = nic_matcher->ste_builder; + for (i = 0; i < nic_matcher->num_of_builders; i++) { + mlx5dr_ste_init(ste_arr, + sb->lu_type, + nic_dmn->ste_type, + dmn->info.caps.gvmi); + + mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); + + ret = sb->ste_build_tag_func(value, sb, ste_arr); + if (ret) + return ret; + + /* Connect the STEs */ + if (i < (nic_matcher->num_of_builders - 1)) { + /* Need the next builder for these fields, + * not relevant for the last ste in the chain. + */ + sb++; + MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type); + MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask); + } + ste_arr += DR_STE_SIZE; + } + return 0; +} + +static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + if (mask->smac_47_16 || mask->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32, + mask->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0, + mask->smac_47_16 << 16 | mask->smac_15_0); + mask->smac_47_16 = 0; + mask->smac_15_0 = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } + + if (mask->cvlan_tag || mask->svlan_tag) { + pr_info("Invalid c/svlan mask configuration\n"); + return -EINVAL; + } + + return 0; +} + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) +{ + spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present); + spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present); + spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present); + spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port); + spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn); + + spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port); + spec->source_eswitch_owner_vhca_id = MLX5_GET(fte_match_set_misc, mask, + source_eswitch_owner_vhca_id); + + spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio); + spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi); + spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid); + spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio); + spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi); + spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid); + + spec->outer_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag); + spec->inner_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag); + spec->outer_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag); + spec->inner_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag); + + spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol); + + spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi); + spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo); + + spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni); + + spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni); + spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam); + + spec->outer_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label); + + spec->inner_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label); + + spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len); + spec->geneve_protocol_type = + MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type); + + spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp); +} + +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) +{ + u32 raw_ip[4]; + + spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16); + + spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0); + spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype); + + spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16); + + spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0); + spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio); + spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi); + spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid); + + spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol); + spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp); + spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn); + spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag); + spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag); + spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag); + spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version); + spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags); + spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport); + spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport); + + spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit); + + spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport); + spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); +} + +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec) +{ + spec->outer_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label); + spec->outer_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp); + spec->outer_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos); + spec->outer_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl); + spec->inner_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label); + spec->inner_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp); + spec->inner_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos); + spec->inner_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl); + spec->outer_first_mpls_over_gre_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label); + spec->outer_first_mpls_over_gre_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp); + spec->outer_first_mpls_over_gre_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos); + spec->outer_first_mpls_over_gre_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl); + spec->outer_first_mpls_over_udp_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label); + spec->outer_first_mpls_over_udp_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp); + spec->outer_first_mpls_over_udp_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos); + spec->outer_first_mpls_over_udp_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl); + spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7); + spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6); + spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5); + spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4); + spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3); + spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2); + spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1); + spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0); + spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a); + spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b); +} + +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) +{ + spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num); + spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num); + spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num); + spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num); + spec->outer_vxlan_gpe_vni = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni); + spec->outer_vxlan_gpe_next_protocol = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol); + spec->outer_vxlan_gpe_flags = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags); + spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data); + spec->icmpv6_header_data = + MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data); + spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type); + spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); + spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); + spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); +} + +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask) +{ + u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; + u8 *data = (u8 *)mask->match_buf; + size_t param_location; + void *buff; + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data, mask->match_sz); + buff = tail_param; + } else { + buff = mask->match_buf; + } + dr_ste_copy_mask_spec(buff, &set_param->outer); + } + param_location = sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc(buff, &set_param->misc); + } + param_location += sizeof(struct mlx5dr_match_misc); + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_spec(buff, &set_param->inner); + } + param_location += sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc2)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc2(buff, &set_param->misc2); + } + + param_location += sizeof(struct mlx5dr_match_misc2); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc3)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc3(buff, &set_param->misc3); + } +} + +static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); + + if (spec->smac_47_16 || spec->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32, + spec->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0, + spec->smac_47_16 << 16 | spec->smac_15_0); + spec->smac_47_16 = 0; + spec->smac_15_0 = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag; + + return 0; +} + +static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag; +} + +static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag; +} + +static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value, + bool inner, + u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_address, mask, dst_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_address, mask, src_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + ecn, mask, ip_ecn); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag; +} + +static void +dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask); + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag; +} + +static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag; +} + +static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + return 0; +} + +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag; +} + +static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit); +} + +static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag; +} + +static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag; +} + +static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + return 0; +} + +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx) +{ + sb->rx = rx; + sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE; + sb->byte_mask = 0; + sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag; +} + +static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + + if (inner) + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask); + else + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask); +} + +static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (sb->inner) + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag); + else + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag); + + return 0; +} + +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_mpls_tag; +} + +static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol); + DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l); + + DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present); +} + +static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); + + DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present); + + DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GRE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_gre_tag; +} + +static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } +} + +static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } + return 0; +} + +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag; +} + +#define ICMP_TYPE_OFFSET_FIRST_DW 24 +#define ICMP_CODE_OFFSET_FIRST_DW 16 +#define ICMP_HEADER_DATA_OFFSET_SECOND_DW 0 + +static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3; + bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask); + u32 icmp_header_data_mask; + u32 icmp_type_mask; + u32 icmp_code_mask; + int dw0_location; + int dw1_location; + + if (is_ipv4_mask) { + icmp_header_data_mask = misc_3_mask->icmpv4_header_data; + icmp_type_mask = misc_3_mask->icmpv4_type; + icmp_code_mask = misc_3_mask->icmpv4_code; + dw0_location = caps->flex_parser_id_icmp_dw0; + dw1_location = caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data_mask = misc_3_mask->icmpv6_header_data; + icmp_type_mask = misc_3_mask->icmpv6_type; + icmp_code_mask = misc_3_mask->icmpv6_code; + dw0_location = caps->flex_parser_id_icmpv6_dw0; + dw1_location = caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_type = 0; + else + misc_3_mask->icmpv6_type = 0; + } + if (icmp_code_mask) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_code = 0; + else + misc_3_mask->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5, + (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_header_data = 0; + else + misc_3_mask->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc_3 = &value->misc3; + u8 *tag = hw_ste->tag; + u32 icmp_header_data; + int dw0_location; + int dw1_location; + u32 icmp_type; + u32 icmp_code; + bool is_ipv4; + + is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3); + if (is_ipv4) { + icmp_header_data = misc_3->icmpv4_header_data; + icmp_type = misc_3->icmpv4_type; + icmp_code = misc_3->icmpv4_code; + dw0_location = sb->caps->flex_parser_id_icmp_dw0; + dw1_location = sb->caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data = misc_3->icmpv6_header_data; + icmp_type = misc_3->icmpv6_type; + icmp_code = misc_3->icmpv6_code; + dw0_location = sb->caps->flex_parser_id_icmpv6_dw0; + dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_type = 0; + else + misc_3->icmpv6_type = 0; + } + + if (icmp_code) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, tag, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_code = 0; + else + misc_3->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_5, + (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4) + misc_3->icmpv4_header_data = 0; + else + misc_3->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag; + + return 0; +} + +static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(general_purpose, bit_mask, + general_purpose_lookup_field, misc_2_mask, + metadata_reg_a); +} + +static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc_2_mask, metadata_reg_a); + + return 0; +} + +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag; +} + +static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + if (inner) { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + inner_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + inner_tcp_ack_num); + } else { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + outer_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + outer_tcp_ack_num); + } +} + +static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag; +} + +static void +dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask, + outer_vxlan_gpe_flags, + misc_3_mask, outer_vxlan_gpe_flags); + DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask, + outer_vxlan_gpe_next_protocol, + misc_3_mask, outer_vxlan_gpe_next_protocol); + DR_STE_SET_MASK_V(flex_parser_tnl_vxlan_gpe, bit_mask, + outer_vxlan_gpe_vni, + misc_3_mask, outer_vxlan_gpe_vni); +} + +static int +dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(mask, inner, + sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static void +dr_ste_build_flex_parser_tnl_geneve_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask, + geneve_protocol_type, + misc_mask, geneve_protocol_type); + DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask, + geneve_oam, + misc_mask, geneve_oam); + DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask, + geneve_opt_len, + misc_mask, geneve_opt_len); + DR_STE_SET_MASK_V(flex_parser_tnl_geneve, bit_mask, + geneve_vni, + misc_mask, geneve_vni); +} + +static int +dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_tnl_geneve_bit_mask(mask, sb->bit_mask); + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_geneve_tag; +} + +static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h, + misc_2_mask, metadata_reg_c_0); + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l, + misc_2_mask, metadata_reg_c_1); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h, + misc_2_mask, metadata_reg_c_2); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l, + misc_2_mask, metadata_reg_c_3); +} + +static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_0_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_0_tag; +} + +static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h, + misc_2_mask, metadata_reg_c_4); + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l, + misc_2_mask, metadata_reg_c_5); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h, + misc_2_mask, metadata_reg_c_6); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l, + misc_2_mask, metadata_reg_c_7); +} + +static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_1_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_1_tag; +} + +static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + /* Partial misc source_port is not supported */ + if (misc_mask->source_port && misc_mask->source_port != 0xffff) + return -EINVAL; + + /* Partial misc source_eswitch_owner_vhca_id is not supported */ + if (misc_mask->source_eswitch_owner_vhca_id && + misc_mask->source_eswitch_owner_vhca_id != 0xffff) + return -EINVAL; + + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; + + return 0; +} + +static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_cmd_caps *caps; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + caps = &dmn->info.caps; + else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == + dmn->peer_dmn->info.caps.gvmi)) + caps = &dmn->peer_dmn->info.caps; + else + return -EINVAL; + } else { + caps = &dmn->info.caps; + } + + vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port); + if (!vport_cap) + return -EINVAL; + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_eswitch_owner_vhca_id = 0; + misc->source_port = 0; + + return 0; +} + +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx) +{ + int ret; + + /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */ + sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id; + + ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->dmn = dmn; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c new file mode 100644 index 000000000000..14ce2d7dbb66 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; + int ret; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; + + mutex_lock(&tbl->dmn->mutex); + + if (!list_empty(&tbl->matcher_list)) + last_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, + matcher_list); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->rx.e_anchor; + else + last_htbl = tbl->rx.s_anchor; + + tbl->rx.default_icm_addr = action ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + tbl->rx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->rx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->rx.nic_dmn, + last_htbl, + &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret); + goto out; + } + } + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->tx.e_anchor; + else + last_htbl = tbl->tx.s_anchor; + + tbl->tx.default_icm_addr = action ? + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr : + tbl->tx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->tx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->tx.nic_dmn, + last_htbl, &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret); + goto out; + } + } + + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + /* Set new miss action */ + tbl->miss_action = action; + if (tbl->miss_action) + refcount_inc(&action->refcount); + +out: + mutex_unlock(&tbl->dmn->mutex); + return ret; +} + +static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl) +{ + mlx5dr_htbl_put(nic_tbl->s_anchor); +} + +static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) +{ + dr_table_uninit_nic(&tbl->rx); + dr_table_uninit_nic(&tbl->tx); +} + +static void dr_table_uninit(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_table_uninit_nic(&tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_table_uninit_nic(&tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_table_uninit_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); +} + +static int dr_table_init_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + int ret; + + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; + + nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_tbl->s_anchor) + return -ENOMEM; + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_dmn->default_icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + nic_tbl->s_anchor, + &info, true); + if (ret) + goto free_s_anchor; + + mlx5dr_htbl_get(nic_tbl->s_anchor); + + return 0; + +free_s_anchor: + mlx5dr_ste_htbl_free(nic_tbl->s_anchor); + return ret; +} + +static int dr_table_init_fdb(struct mlx5dr_table *tbl) +{ + int ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + if (ret) + return ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + if (ret) + goto destroy_rx; + + return 0; + +destroy_rx: + dr_table_uninit_nic(&tbl->rx); + return ret; +} + +static int dr_table_init(struct mlx5dr_table *tbl) +{ + int ret = 0; + + INIT_LIST_HEAD(&tbl->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); + + return ret; +} + +static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev, + tbl->table_id, + tbl->table_type); +} + +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + u64 icm_addr_rx = 0; + u64 icm_addr_tx = 0; + int ret; + + if (tbl->rx.s_anchor) + icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr; + + if (tbl->tx.s_anchor) + icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; + + ft_attr.table_type = tbl->table_type; + ft_attr.icm_addr_rx = icm_addr_rx; + ft_attr.icm_addr_tx = icm_addr_tx; + ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1; + ft_attr.sw_owner = true; + ft_attr.decap_en = en_decap; + ft_attr.reformat_en = en_encap; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr, + NULL, &tbl->table_id); + + return ret; +} + +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags) +{ + struct mlx5dr_table *tbl; + int ret; + + refcount_inc(&dmn->refcount); + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (!tbl) + goto dec_ref; + + tbl->dmn = dmn; + tbl->level = level; + tbl->flags = flags; + refcount_set(&tbl->refcount, 1); + + ret = dr_table_init(tbl); + if (ret) + goto free_tbl; + + ret = dr_table_create_sw_owned_tbl(tbl); + if (ret) + goto uninit_tbl; + + return tbl; + +uninit_tbl: + dr_table_uninit(tbl); +free_tbl: + kfree(tbl); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +int mlx5dr_table_destroy(struct mlx5dr_table *tbl) +{ + int ret; + + if (refcount_read(&tbl->refcount) > 1) + return -EBUSY; + + ret = dr_table_destroy_sw_owned_tbl(tbl); + if (ret) + return ret; + + dr_table_uninit(tbl); + + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + refcount_dec(&tbl->dmn->refcount); + kfree(tbl); + + return ret; +} + +u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) +{ + return tbl->table_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h new file mode 100644 index 000000000000..dffe35145d19 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -0,0 +1,1126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _DR_TYPES_ +#define _DR_TYPES_ + +#include <linux/mlx5/driver.h> +#include <linux/refcount.h> +#include "fs_core.h" +#include "wq.h" +#include "lib/mlx5.h" +#include "mlx5_ifc_dr.h" +#include "mlx5dr.h" + +#define DR_RULE_MAX_STES 17 +#define DR_ACTION_MAX_STES 5 +#define WIRE_PORT 0xFFFF +#define DR_STE_SVLAN 0x1 +#define DR_STE_CVLAN 0x2 + +#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) +#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) +#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) + +enum mlx5dr_icm_chunk_size { + DR_CHUNK_SIZE_1, + DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ + DR_CHUNK_SIZE_2, + DR_CHUNK_SIZE_4, + DR_CHUNK_SIZE_8, + DR_CHUNK_SIZE_16, + DR_CHUNK_SIZE_32, + DR_CHUNK_SIZE_64, + DR_CHUNK_SIZE_128, + DR_CHUNK_SIZE_256, + DR_CHUNK_SIZE_512, + DR_CHUNK_SIZE_1K, + DR_CHUNK_SIZE_2K, + DR_CHUNK_SIZE_4K, + DR_CHUNK_SIZE_8K, + DR_CHUNK_SIZE_16K, + DR_CHUNK_SIZE_32K, + DR_CHUNK_SIZE_64K, + DR_CHUNK_SIZE_128K, + DR_CHUNK_SIZE_256K, + DR_CHUNK_SIZE_512K, + DR_CHUNK_SIZE_1024K, + DR_CHUNK_SIZE_2048K, + DR_CHUNK_SIZE_MAX, +}; + +enum mlx5dr_icm_type { + DR_ICM_TYPE_STE, + DR_ICM_TYPE_MODIFY_ACTION, +}; + +static inline enum mlx5dr_icm_chunk_size +mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) +{ + chunk += 2; + if (chunk < DR_CHUNK_SIZE_MAX) + return chunk; + + return DR_CHUNK_SIZE_MAX; +} + +enum { + DR_STE_SIZE = 64, + DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_TAG = 16, + DR_STE_SIZE_MASK = 16, +}; + +enum { + DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, +}; + +enum { + DR_MODIFY_ACTION_SIZE = 8, +}; + +enum mlx5dr_matcher_criteria { + DR_MATCHER_CRITERIA_EMPTY = 0, + DR_MATCHER_CRITERIA_OUTER = 1 << 0, + DR_MATCHER_CRITERIA_MISC = 1 << 1, + DR_MATCHER_CRITERIA_INNER = 1 << 2, + DR_MATCHER_CRITERIA_MISC2 = 1 << 3, + DR_MATCHER_CRITERIA_MISC3 = 1 << 4, + DR_MATCHER_CRITERIA_MAX = 1 << 5, +}; + +enum mlx5dr_action_type { + DR_ACTION_TYP_TNL_L2_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L2, + DR_ACTION_TYP_TNL_L3_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L3, + DR_ACTION_TYP_DROP, + DR_ACTION_TYP_QP, + DR_ACTION_TYP_FT, + DR_ACTION_TYP_CTR, + DR_ACTION_TYP_TAG, + DR_ACTION_TYP_MODIFY_HDR, + DR_ACTION_TYP_VPORT, + DR_ACTION_TYP_POP_VLAN, + DR_ACTION_TYP_PUSH_VLAN, + DR_ACTION_TYP_MAX, +}; + +enum mlx5dr_ipv { + DR_RULE_IPV4, + DR_RULE_IPV6, + DR_RULE_IPV_MAX, +}; + +struct mlx5dr_icm_pool; +struct mlx5dr_icm_chunk; +struct mlx5dr_icm_bucket; +struct mlx5dr_ste_htbl; +struct mlx5dr_match_param; +struct mlx5dr_cmd_caps; +struct mlx5dr_matcher_rx_tx; + +struct mlx5dr_ste { + u8 *hw_ste; + /* refcount: indicates the num of rules that using this ste */ + u32 refcount; + + /* attached to the miss_list head at each htbl entry */ + struct list_head miss_list_node; + + /* each rule member that uses this ste attached here */ + struct list_head rule_list; + + /* this ste is member of htbl */ + struct mlx5dr_ste_htbl *htbl; + + struct mlx5dr_ste_htbl *next_htbl; + + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; +}; + +struct mlx5dr_ste_htbl_ctrl { + /* total number of valid entries belonging to this hash table. This + * includes the non collision and collision entries + */ + unsigned int num_of_valid_entries; + + /* total number of collisions entries attached to this table */ + unsigned int num_of_collisions; + unsigned int increase_threshold; + u8 may_grow:1; +}; + +struct mlx5dr_ste_htbl { + u8 lu_type; + u16 byte_mask; + u32 refcount; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + + struct list_head *miss_list; + + enum mlx5dr_icm_chunk_size chunk_size; + struct mlx5dr_ste *pointing_ste; + + struct mlx5dr_ste_htbl_ctrl ctrl; +}; + +struct mlx5dr_ste_send_info { + struct mlx5dr_ste *ste; + struct list_head send_list; + u16 size; + u16 offset; + u8 data_cont[DR_STE_SIZE]; + u8 *data; +}; + +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data); + +struct mlx5dr_ste_build { + u8 inner:1; + u8 rx:1; + u8 vhca_id_valid:1; + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_caps *caps; + u8 lu_type; + u16 byte_mask; + u8 bit_mask[DR_STE_SIZE_MASK]; + int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p); +}; + +struct mlx5dr_ste_htbl * +mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask); + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl); + +static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount--; + if (!htbl->refcount) + mlx5dr_ste_htbl_free(htbl); +} + +static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount++; +} + +/* STE utils */ +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi); +void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl); +void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr); +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste); +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi); +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size); +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr); +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste); +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location); +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag); +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id); +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, + int size, bool encap_l3); +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p); +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan); +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p); +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid, + bool go_back); +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type); +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p); +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index); +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p); +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste); +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher); +static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + ste->refcount--; + if (!ste->refcount) + mlx5dr_ste_free(ste, matcher, nic_matcher); +} + +/* initial as 0, increased only when ste appears in a new rule */ +static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) +{ + ste->refcount++; +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl); +bool mlx5dr_ste_equal_tag(void *src, void *dst); +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size); + +/* STE build functions */ +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value); +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr); +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx); +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); + +/* Actions utils */ +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz); + +struct mlx5dr_match_spec { + u32 smac_47_16; /* Source MAC address of incoming packet */ + /* Incoming packet Ethertype - this is the Ethertype + * following the last VLAN tag of the packet + */ + u32 ethertype:16; + u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 dmac_47_16; /* Destination MAC address of incoming packet */ + /* VLAN ID of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_vid:12; + /* CFI bit of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_cfi:1; + /* Priority of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_prio:3; + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + */ + u32 tcp_flags:9; + u32 ip_version:4; /* IP version */ + u32 frag:1; /* Packet is an IP fragment */ + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 svlan_tag:1; + /* The first vlan in the packet is c-vlan (0x8100). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 cvlan_tag:1; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_ecn:2; + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_dscp:6; + u32 ip_protocol:8; /* IP protocol */ + /* TCP destination port. + * tcp and udp sport/dport are mutually exclusive + */ + u32 tcp_dport:16; + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; + u32 ttl_hoplimit:8; + u32 reserved:24; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_sport:16; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_127_96; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_95_64; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_63_32; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_31_0; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_127_96; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_95_64; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_63_32; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_31_0; +}; + +struct mlx5dr_match_misc { + u32 source_sqn:24; /* Source SQN */ + u32 source_vhca_port:4; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + u32 reserved_auto1:1; + /* used with GRE, checksum exist when gre_c_present == 1 */ + u32 gre_c_present:1; + /* Source port.;0xffff determines wire port */ + u32 source_port:16; + u32 source_eswitch_owner_vhca_id:16; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_prio:3; + /* VLAN ID of first VLAN tag the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_vid:12; + /* CFI bit of first VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_cfi:1; + /* Priority of second VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_prio:3; + u32 gre_protocol:16; /* GRE Protocol (outer) */ + u32 reserved_auto3:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_svlan_tag:1; + /* The second vlan in the inner header of the packet is c-vlan (0x8100). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_cvlan_tag:1; + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ + u32 reserved_auto4:8; + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + u32 reserved_auto5:7; + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + u32 reserved_auto6:12; + u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ + u32 reserved_auto7:12; + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ + u32 reserved_auto8:10; + u32 bth_dst_qp:24; /* Destination QP in BTH header */ + u32 reserved_auto9:8; + u8 reserved_auto10[20]; +}; + +struct mlx5dr_match_misc2 { + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ + u32 metadata_reg_c_6; /* metadata_reg_c_6 */ + u32 metadata_reg_c_5; /* metadata_reg_c_5 */ + u32 metadata_reg_c_4; /* metadata_reg_c_4 */ + u32 metadata_reg_c_3; /* metadata_reg_c_3 */ + u32 metadata_reg_c_2; /* metadata_reg_c_2 */ + u32 metadata_reg_c_1; /* metadata_reg_c_1 */ + u32 metadata_reg_c_0; /* metadata_reg_c_0 */ + u32 metadata_reg_a; /* metadata_reg_a */ + u32 metadata_reg_b; /* metadata_reg_b */ + u8 reserved_auto2[8]; +}; + +struct mlx5dr_match_misc3 { + u32 inner_tcp_seq_num; + u32 outer_tcp_seq_num; + u32 inner_tcp_ack_num; + u32 outer_tcp_ack_num; + u32 outer_vxlan_gpe_vni:24; + u32 reserved_auto1:8; + u32 reserved_auto2:16; + u32 outer_vxlan_gpe_flags:8; + u32 outer_vxlan_gpe_next_protocol:8; + u32 icmpv4_header_data; + u32 icmpv6_header_data; + u32 icmpv6_code:8; + u32 icmpv6_type:8; + u32 icmpv4_code:8; + u32 icmpv4_type:8; + u8 reserved_auto3[0x1c]; +}; + +struct mlx5dr_match_param { + struct mlx5dr_match_spec outer; + struct mlx5dr_match_misc misc; + struct mlx5dr_match_spec inner; + struct mlx5dr_match_misc2 misc2; + struct mlx5dr_match_misc3 misc3; +}; + +#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) + +struct mlx5dr_esw_caps { + u64 drop_icm_address_rx; + u64 drop_icm_address_tx; + u64 uplink_icm_address_rx; + u64 uplink_icm_address_tx; + bool sw_owner; +}; + +struct mlx5dr_cmd_vport_cap { + u16 vport_gvmi; + u16 vhca_gvmi; + u64 icm_address_rx; + u64 icm_address_tx; + u32 num; +}; + +struct mlx5dr_cmd_caps { + u16 gvmi; + u64 nic_rx_drop_address; + u64 nic_tx_drop_address; + u64 nic_tx_allow_address; + u64 esw_rx_drop_address; + u64 esw_tx_drop_address; + u32 log_icm_size; + u64 hdr_modify_icm_addr; + u32 flex_protocols; + u8 flex_parser_id_icmp_dw0; + u8 flex_parser_id_icmp_dw1; + u8 flex_parser_id_icmpv6_dw0; + u8 flex_parser_id_icmpv6_dw1; + u8 max_ft_level; + u16 roce_min_src_udp; + u8 num_esw_ports; + bool eswitch_manager; + bool rx_sw_owner; + bool tx_sw_owner; + bool fdb_sw_owner; + u32 num_vports; + struct mlx5dr_esw_caps esw_caps; + struct mlx5dr_cmd_vport_cap *vports_caps; + bool prio_tag_required; +}; + +struct mlx5dr_domain_rx_tx { + u64 drop_icm_addr; + u64 default_icm_addr; + enum mlx5dr_ste_entry_type ste_type; +}; + +struct mlx5dr_domain_info { + bool supp_sw_steering; + u32 max_inline_size; + u32 max_send_wr; + u32 max_log_sw_icm_sz; + u32 max_log_action_icm_sz; + struct mlx5dr_domain_rx_tx rx; + struct mlx5dr_domain_rx_tx tx; + struct mlx5dr_cmd_caps caps; +}; + +struct mlx5dr_domain_cache { + struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft; +}; + +struct mlx5dr_domain { + struct mlx5dr_domain *peer_dmn; + struct mlx5_core_dev *mdev; + u32 pdn; + struct mlx5_uars_page *uar; + enum mlx5dr_domain_type type; + refcount_t refcount; + struct mutex mutex; /* protect domain */ + struct mlx5dr_icm_pool *ste_icm_pool; + struct mlx5dr_icm_pool *action_icm_pool; + struct mlx5dr_send_ring *send_ring; + struct mlx5dr_domain_info info; + struct mlx5dr_domain_cache cache; +}; + +struct mlx5dr_table_rx_tx { + struct mlx5dr_ste_htbl *s_anchor; + struct mlx5dr_domain_rx_tx *nic_dmn; + u64 default_icm_addr; +}; + +struct mlx5dr_table { + struct mlx5dr_domain *dmn; + struct mlx5dr_table_rx_tx rx; + struct mlx5dr_table_rx_tx tx; + u32 level; + u32 table_type; + u32 table_id; + u32 flags; + struct list_head matcher_list; + struct mlx5dr_action *miss_action; + refcount_t refcount; +}; + +struct mlx5dr_matcher_rx_tx { + struct mlx5dr_ste_htbl *s_htbl; + struct mlx5dr_ste_htbl *e_anchor; + struct mlx5dr_ste_build *ste_builder; + struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX] + [DR_RULE_IPV_MAX] + [DR_RULE_MAX_STES]; + u8 num_of_builders; + u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX]; + u64 default_icm_addr; + struct mlx5dr_table_rx_tx *nic_tbl; +}; + +struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; + struct list_head matcher_list; + u16 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; + refcount_t refcount; + struct mlx5dv_flow_matcher *dv_matcher; +}; + +struct mlx5dr_rule_member { + struct mlx5dr_ste *ste; + /* attached to mlx5dr_rule via this */ + struct list_head list; + /* attached to mlx5dr_ste via this */ + struct list_head use_ste_list; +}; + +struct mlx5dr_action { + enum mlx5dr_action_type action_type; + refcount_t refcount; + union { + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u32 data_size; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; + } rewrite; + struct { + struct mlx5dr_domain *dmn; + u32 reformat_id; + u32 reformat_size; + } reformat; + struct { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; + } fw_tbl; + }; + } dest_tbl; + struct { + u32 ctr_id; + u32 offeset; + } ctr; + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; + } vport; + struct { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ + } push_vlan; + u32 flow_tag; + }; +}; + +enum mlx5dr_connect_type { + CONNECT_HIT = 1, + CONNECT_MISS = 2, +}; + +struct mlx5dr_htbl_connect_info { + enum mlx5dr_connect_type type; + union { + struct mlx5dr_ste_htbl *hit_next_htbl; + u64 miss_icm_addr; + }; +}; + +struct mlx5dr_rule_rx_tx { + struct list_head rule_members_list; + struct mlx5dr_matcher_rx_tx *nic_matcher; +}; + +struct mlx5dr_rule { + struct mlx5dr_matcher *matcher; + struct mlx5dr_rule_rx_tx rx; + struct mlx5dr_rule_rx_tx tx; + struct list_head rule_actions_list; +}; + +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, + struct mlx5dr_ste *ste); + +struct mlx5dr_icm_chunk { + struct mlx5dr_icm_bucket *bucket; + struct list_head chunk_list; + u32 rkey; + u32 num_of_entries; + u32 byte_size; + u64 icm_addr; + u64 mr_addr; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + struct list_head *miss_list; +}; + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; +} + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv); + +static inline u32 +mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) +{ + return 1 << chunk_size; +} + +static inline int +mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, + enum mlx5dr_icm_type icm_type) +{ + int num_of_entries; + int entry_size; + + if (icm_type == DR_ICM_TYPE_STE) + entry_size = DR_STE_SIZE; + else + entry_size = DR_MODIFY_ACTION_SIZE; + + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + + return entry_size * num_of_entries; +} + +static inline struct mlx5dr_cmd_vport_cap * +mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport) +{ + if (!caps->vports_caps || + (vport >= caps->num_vports && vport != WIRE_PORT)) + return NULL; + + if (vport == WIRE_PORT) + vport = caps->num_vports; + + return &caps->vports_caps[vport]; +} + +struct mlx5dr_cmd_query_flow_table_details { + u8 status; + u8 level; + u64 sw_owner_icm_root_1; + u64 sw_owner_icm_root_0; +}; + +struct mlx5dr_cmd_create_flow_table_attr { + u32 table_type; + u64 icm_addr_rx; + u64 icm_addr_tx; + u8 level; + bool sw_owner; + bool term_tbl; + bool decap_en; + bool reformat_en; +}; + +/* internal API functions */ +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps); +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx); +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, u16 *gvmi); +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps); +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev); +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id); +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id); +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id); +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id); +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id); +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id); +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id); +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type); +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output); +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id); +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id); + +struct mlx5dr_cmd_gid_attr { + u8 gid[16]; + u8 mac[6]; + u32 roce_ver; +}; + +struct mlx5dr_cmd_qp_create_attr { + u32 page_id; + u32 pdn; + u32 cqn; + u32 pm_state; + u32 service_type; + u32 buff_umem_id; + u32 db_umem_id; + u32 sq_wqe_cnt; + u32 rq_wqe_cnt; + u32 rq_wqe_shift; +}; + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr); + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type); +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); + +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size); +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste); +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste); +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info); +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask); + +struct mlx5dr_qp { + struct mlx5_core_dev *mdev; + struct mlx5_wq_qp wq; + struct mlx5_uars_page *uar; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int *wqe_head; + unsigned int wqe_cnt; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int wqe_cnt; + } rq; + int max_inline_data; +}; + +struct mlx5dr_cq { + struct mlx5_core_dev *mdev; + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct mlx5dr_qp *qp; +}; + +struct mlx5dr_mr { + struct mlx5_core_dev *mdev; + struct mlx5_core_mkey mkey; + dma_addr_t dma_addr; + void *addr; + size_t size; +}; + +#define MAX_SEND_CQE 64 +#define MIN_READ_SYNC 64 + +struct mlx5dr_send_ring { + struct mlx5dr_cq *cq; + struct mlx5dr_qp *qp; + struct mlx5dr_mr *mr; + /* How much wqes are waiting for completion */ + u32 pending_wqe; + /* Signal request per this trash hold value */ + u16 signal_th; + /* Each post_send_size less than max_post_send_size */ + u32 max_post_send_size; + /* manage the send queue */ + u32 tx_head; + void *buf; + u32 buf_size; + struct ib_wc wc[MAX_SEND_CQE]; + u8 sync_buff[MIN_READ_SYNC]; + struct mlx5dr_mr *sync_mr; +}; + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring); +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, + struct mlx5dr_ste *ste, + u8 *data, + u16 size, + u16 offset); +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask); +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste); +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action); + +struct mlx5dr_cmd_ft_info { + u32 id; + u16 vport; + enum fs_flow_table_type type; +}; + +struct mlx5dr_cmd_flow_destination_hw_info { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + u32 ft_num; + u32 ft_id; + u32 counter_id; + struct { + u16 num; + u16 vhca_id; + u32 reformat_id; + u8 flags; + } vport; + }; +}; + +struct mlx5dr_cmd_fte_info { + u32 dests_size; + u32 index; + struct mlx5_flow_context flow_context; + u32 *val; + struct mlx5_flow_act action; + struct mlx5dr_cmd_flow_destination_hw_info *dest_arr; +}; + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte); + +struct mlx5dr_fw_recalc_cs_ft { + u64 rx_icm_addr; + u32 table_id; + u32 group_id; + u32 modify_hdr_id; +}; + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num); +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft); +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr); +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id); +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, + u32 group_id); +#endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c new file mode 100644 index 000000000000..3abfc8125926 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5dr.h" +#include "fs_dr.h" + +static bool mlx5_dr_is_fw_table(u32 flags) +{ + if (flags & MLX5_FLOW_TABLE_TERMINATION) + return true; + + return false; +} + +static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int set_miss_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_action *old_miss_action; + struct mlx5dr_action *action = NULL; + struct mlx5dr_table *next_tbl; + int err; + + next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL; + if (next_tbl) { + action = mlx5dr_action_create_dest_table(next_tbl); + if (!action) + return -EINVAL; + } + old_miss_action = ft->fs_dr_table.miss_action; + err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); + if (err && action) { + err = mlx5dr_action_destroy(action); + if (err) { + action = NULL; + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + } + ft->fs_dr_table.miss_action = action; + if (old_miss_action) { + err = mlx5dr_action_destroy(old_miss_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + + return err; +} + +static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + unsigned int log_size, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_table *tbl; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, + log_size, + next_ft); + + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, + ft->level, ft->flags); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); + return -EINVAL; + } + + ft->fs_dr_table.dr_table = tbl; + ft->id = mlx5dr_table_get_id(tbl); + + if (next_ft) { + err = set_miss_action(ns, ft, next_ft); + if (err) { + mlx5dr_table_destroy(tbl); + ft->fs_dr_table.dr_table = NULL; + return err; + } + } + + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action = ft->fs_dr_table.miss_action; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", + err); + return err; + } + if (action) { + err = mlx5dr_action_destroy(action); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n", + err); + return err; + } + } + + return err; +} + +static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return set_miss_action(ns, ft, next_ft); +} + +static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5dr_matcher *matcher; + u16 priority = MLX5_GET(create_flow_group_in, in, + start_flow_index); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + in, + match_criteria_enable); + struct mlx5dr_match_parameters mask; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, + fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table, + priority, + match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_dr_matcher.dr_matcher = matcher; + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); +} + +static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num, + dest_attr->vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; + + if (mlx5_dr_is_fw_table(dest_ft->flags)) + return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft); + return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); +} + +static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, + struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + u32 vlan_hdr; + + vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; + return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); +} + +static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) +{ + return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; +} + +#define MLX5_FLOW_CONTEXT_ACTION_MAX 20 +static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action_dest *term_actions; + struct mlx5dr_match_parameters params; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5dr_action **fs_dr_actions; + struct mlx5dr_action *tmp_action; + struct mlx5dr_action **actions; + bool delay_encap_set = false; + struct mlx5dr_rule *rule; + struct mlx5_flow_rule *dst; + int fs_dr_num_actions = 0; + int num_term_actions = 0; + int num_actions = 0; + size_t match_sz; + int err = 0; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), + GFP_KERNEL); + if (!actions) { + err = -ENOMEM; + goto out_err; + } + + fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_dr_actions), GFP_KERNEL); + if (!fs_dr_actions) { + err = -ENOMEM; + goto free_actions_alloc; + } + + term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*term_actions), GFP_KERNEL); + if (!term_actions) { + err = -ENOMEM; + goto free_fs_dr_actions_alloc; + } + + match_sz = sizeof(fte->val); + + /* Drop reformat action bit if destination vport set with reformat */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + if (!contain_vport_reformat_action(dst)) + continue; + + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + break; + } + } + + /* The order of the actions are must to be keep, only the following + * order is supported by SW steering: + * TX: push vlan -> modify header -> encap + * RX: decap -> pop vlan -> modify header + */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + enum mlx5dr_action_reformat_type decap_type = + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2; + + tmp_action = mlx5dr_action_create_packet_reformat(domain, + decap_type, 0, + NULL); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + bool is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + + if (is_decap) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + else + delay_encap_set = true; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + actions[num_actions++] = + fte->action.modify_hdr->action.dr_action; + + if (delay_encap_set) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + + /* The order of the actions below is not important */ + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + tmp_action = mlx5dr_action_create_drop(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + } + + if (fte->flow_context.flow_tag) { + tmp_action = + mlx5dr_action_create_tag(fte->flow_context.flow_tag); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + tmp_action = create_vport_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions].dest = tmp_action; + + if (dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + term_actions[num_term_actions].reformat = + dst->dest_attr.vport.pkt_reformat->action.dr_action; + + num_term_actions++; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + } + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + list_for_each_entry(dst, &fte->node.children, node.list) { + u32 id; + + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + + id = dst->dest_attr.counter_id; + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + } + + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + if (num_term_actions == 1) { + if (term_actions->reformat) + actions[num_actions++] = term_actions->reformat; + + actions[num_actions++] = term_actions->dest; + } else if (num_term_actions > 1) { + tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, + term_actions, + num_term_actions); + if (!tmp_action) { + err = -EOPNOTSUPP; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, + ¶ms, + num_actions, + actions); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + kfree(term_actions); + kfree(actions); + + fte->fs_dr_rule.dr_rule = rule; + fte->fs_dr_rule.num_actions = fs_dr_num_actions; + fte->fs_dr_rule.dr_actions = fs_dr_actions; + + return 0; + +free_actions: + /* Free in reverse order to handle action dependencies */ + for (i = fs_dr_num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(fs_dr_actions[i])) + mlx5dr_action_destroy(fs_dr_actions[i]); + + kfree(term_actions); +free_fs_dr_actions_alloc: + kfree(fs_dr_actions); +free_actions_alloc: + kfree(actions); +out_err: + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); + return err; +} + +static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + int dr_reformat; + + switch (reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2; + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2; + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3; + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + reformat_type); + return -EOPNOTSUPP; + } + + action = mlx5dr_action_create_packet_reformat(dr_domain, + dr_reformat, + size, + reformat_data); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n"); + return -EINVAL; + } + + pkt_reformat->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + mlx5dr_action_destroy(pkt_reformat->action.dr_action); +} + +static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + size_t actions_sz; + + actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * + num_actions; + action = mlx5dr_action_create_modify_header(dr_domain, 0, + actions_sz, + modify_actions); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating modify-header action\n"); + return -EINVAL; + } + + modify_hdr->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + mlx5dr_action_destroy(modify_hdr->action.dr_action); +} + +static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule; + int err; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5dr_rule_destroy(rule->dr_rule); + if (err) + return err; + + /* Free in reverse order to handle action dependencies */ + for (i = rule->num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(rule->dr_actions[i])) + mlx5dr_action_destroy(rule->dr_actions[i]); + + kfree(rule->dr_actions); + return 0; +} + +static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + struct mlx5dr_domain *peer_domain = NULL; + + if (peer_ns) + peer_domain = peer_ns->fs_dr_domain.dr_domain; + mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, + peer_domain); + return 0; +} + +static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns) +{ + ns->fs_dr_domain.dr_domain = + mlx5dr_domain_create(ns->dev, + MLX5DR_DOMAIN_TYPE_FDB); + if (!ns->fs_dr_domain.dr_domain) { + mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); +} + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5dr_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { + .create_flow_table = mlx5_cmd_dr_create_flow_table, + .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table, + .modify_flow_table = mlx5_cmd_dr_modify_flow_table, + .create_flow_group = mlx5_cmd_dr_create_flow_group, + .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group, + .create_fte = mlx5_cmd_dr_create_fte, + .update_fte = mlx5_cmd_dr_update_fte, + .delete_fte = mlx5_cmd_dr_delete_fte, + .update_root_ft = mlx5_cmd_dr_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .set_peer = mlx5_cmd_dr_set_peer, + .create_ns = mlx5_cmd_dr_create_ns, + .destroy_ns = mlx5_cmd_dr_destroy_ns, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return &mlx5_flow_cmds_dr; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h new file mode 100644 index 000000000000..1fb185d6ac7f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2019 Mellanox Technologies + */ + +#ifndef _MLX5_FS_DR_ +#define _MLX5_FS_DR_ + +#include "mlx5dr.h" + +struct mlx5_flow_root_namespace; +struct fs_fte; + +struct mlx5_fs_dr_action { + struct mlx5dr_action *dr_action; +}; + +struct mlx5_fs_dr_ns { + struct mlx5_dr_ns *dr_ns; +}; + +struct mlx5_fs_dr_rule { + struct mlx5dr_rule *dr_rule; + /* Only actions created by fs_dr */ + struct mlx5dr_action **dr_actions; + int num_actions; +}; + +struct mlx5_fs_dr_domain { + struct mlx5dr_domain *dr_domain; +}; + +struct mlx5_fs_dr_matcher { + struct mlx5dr_matcher *dr_matcher; +}; + +struct mlx5_fs_dr_table { + struct mlx5dr_table *dr_table; + struct mlx5dr_action *miss_action; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); + +#else + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return NULL; +} + +static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +#endif /* CONFIG_MLX5_SW_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h new file mode 100644 index 000000000000..e01c3766c7de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -0,0 +1,644 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef MLX5_IFC_DR_H +#define MLX5_IFC_DR_H + +enum { + MLX5DR_ACTION_MDFY_HW_FLD_L2_0 = 0, + MLX5DR_ACTION_MDFY_HW_FLD_L2_1 = 1, + MLX5DR_ACTION_MDFY_HW_FLD_L2_2 = 2, + MLX5DR_ACTION_MDFY_HW_FLD_L3_0 = 3, + MLX5DR_ACTION_MDFY_HW_FLD_L3_1 = 4, + MLX5DR_ACTION_MDFY_HW_FLD_L3_2 = 5, + MLX5DR_ACTION_MDFY_HW_FLD_L3_3 = 6, + MLX5DR_ACTION_MDFY_HW_FLD_L3_4 = 7, + MLX5DR_ACTION_MDFY_HW_FLD_L4_0 = 8, + MLX5DR_ACTION_MDFY_HW_FLD_L4_1 = 9, + MLX5DR_ACTION_MDFY_HW_FLD_MPLS = 10, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0 = 11, + MLX5DR_ACTION_MDFY_HW_FLD_REG_0 = 12, + MLX5DR_ACTION_MDFY_HW_FLD_REG_1 = 13, + MLX5DR_ACTION_MDFY_HW_FLD_REG_2 = 14, + MLX5DR_ACTION_MDFY_HW_FLD_REG_3 = 15, + MLX5DR_ACTION_MDFY_HW_FLD_L4_2 = 16, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0 = 17, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1 = 18, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2 = 19, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3 = 20, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1 = 21, + MLX5DR_ACTION_MDFY_HW_FLD_METADATA = 22, + MLX5DR_ACTION_MDFY_HW_FLD_RESERVED = 23, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_OP_COPY = 0x1, + MLX5DR_ACTION_MDFY_HW_OP_SET = 0x2, + MLX5DR_ACTION_MDFY_HW_OP_ADD = 0x3, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4 = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6 = 0x2, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP = 0x2, +}; + +enum { + MLX5DR_STE_LU_TYPE_NOP = 0x00, + MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP = 0x05, + MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I = 0x0a, + MLX5DR_STE_LU_TYPE_ETHL2_DST_O = 0x06, + MLX5DR_STE_LU_TYPE_ETHL2_DST_I = 0x07, + MLX5DR_STE_LU_TYPE_ETHL2_DST_D = 0x1b, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_O = 0x08, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_I = 0x09, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_D = 0x1c, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O = 0x36, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I = 0x37, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D = 0x38, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b, + MLX5DR_STE_LU_TYPE_ETHL4_O = 0x13, + MLX5DR_STE_LU_TYPE_ETHL4_I = 0x14, + MLX5DR_STE_LU_TYPE_ETHL4_D = 0x21, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_O = 0x2c, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_I = 0x2d, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_D = 0x2e, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_O = 0x15, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_I = 0x24, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_D = 0x25, + MLX5DR_STE_LU_TYPE_GRE = 0x16, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_0 = 0x22, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_1 = 0x23, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19, + MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE = 0x18, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f, +}; + +enum mlx5dr_ste_entry_type { + MLX5DR_STE_TYPE_TX = 1, + MLX5DR_STE_TYPE_RX = 2, + MLX5DR_STE_TYPE_MODIFY_PKT = 6, +}; + +struct mlx5_ifc_ste_general_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 reserved_at_60[0xa0]; + u8 tag_value[0x60]; + u8 bit_mask[0x60]; +}; + +struct mlx5_ifc_ste_sx_transmit_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 sx_wire[0x1]; + u8 sx_func_lb[0x1]; + u8 sx_sniffer[0x1]; + u8 sx_wire_enable[0x1]; + u8 sx_func_lb_enable[0x1]; + u8 sx_sniffer_enable[0x1]; + u8 action_type[0x3]; + u8 reserved_at_69[0x1]; + u8 action_description[0x6]; + u8 gvmi[0x10]; + + u8 encap_pointer_vlan_data[0x20]; + + u8 loopback_syndome_en[0x8]; + u8 loopback_syndome[0x8]; + u8 counter_trigger[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 go_back[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_rx_steering_mult_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 member_count[0x10]; + u8 gvmi[0x10]; + + u8 qp_list_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_modify_packet_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 number_of_re_write_actions[0x10]; + u8 gvmi[0x10]; + + u8 header_re_write_actions_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_eth_l2_src_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 l3_type[0x2]; + u8 reserved_at_66[0x6]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits { + u8 destination_address[0x20]; + + u8 source_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_62[0x2]; + u8 reserved_at_64[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 reserved_at_76[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits { + u8 dst_ip_127_96[0x20]; + + u8 dst_ip_95_64[0x20]; + + u8 dst_ip_63_32[0x20]; + + u8 dst_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 l2_tunneling_network_id[0x20]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 reserved_at_6c[0x3]; + u8 gre_key_flag[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_src_bits { + u8 src_ip_127_96[0x20]; + + u8 src_ip_95_64[0x20]; + + u8 src_ip_63_32[0x20]; + + u8 src_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits { + u8 version[0x4]; + u8 ihl[0x4]; + u8 reserved_at_8[0x8]; + u8 total_length[0x10]; + + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 time_to_live[0x8]; + u8 reserved_at_48[0x8]; + u8 checksum[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_eth_l4_bits { + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_2[0x6]; + u8 protocol[0x8]; + u8 dst_port[0x10]; + + u8 ipv6_version[0x4]; + u8 reserved_at_24[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 src_port[0x10]; + + u8 ipv6_payload_length[0x10]; + u8 ipv6_hop_limit[0x8]; + u8 dscp[0x6]; + u8 reserved_at_5e[0x2]; + + u8 tcp_data_offset[0x4]; + u8 reserved_at_64[0x8]; + u8 flow_label[0x14]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_bits { + u8 checksum[0x10]; + u8 length[0x10]; + + u8 seq_num[0x20]; + + u8 ack_num[0x20]; + + u8 urgent_pointer[0x10]; + u8 window_size[0x10]; +}; + +struct mlx5_ifc_ste_mpls_bits { + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; + + u8 reserved_at_60[0x16]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; +}; + +struct mlx5_ifc_ste_register_0_bits { + u8 register_0_h[0x20]; + + u8 register_0_l[0x20]; + + u8 register_1_h[0x20]; + + u8 register_1_l[0x20]; +}; + +struct mlx5_ifc_ste_register_1_bits { + u8 register_2_h[0x20]; + + u8 register_2_l[0x20]; + + u8 register_3_h[0x20]; + + u8 register_3_l[0x20]; +}; + +struct mlx5_ifc_ste_gre_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_30[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 checksum[0x10]; + u8 offset[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 seq_num[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_0_bits { + u8 parser_3_label[0x14]; + u8 parser_3_exp[0x3]; + u8 parser_3_s_bos[0x1]; + u8 parser_3_ttl[0x8]; + + u8 flex_parser_2[0x20]; + + u8 flex_parser_1[0x20]; + + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_1_bits { + u8 flex_parser_7[0x20]; + + u8 flex_parser_6[0x20]; + + u8 flex_parser_5[0x20]; + + u8 flex_parser_4[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_bits { + u8 flex_parser_tunneling_header_63_32[0x20]; + + u8 flex_parser_tunneling_header_31_0[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_vxlan_gpe_bits { + u8 outer_vxlan_gpe_flags[0x8]; + u8 reserved_at_8[0x10]; + u8 outer_vxlan_gpe_next_protocol[0x8]; + + u8 outer_vxlan_gpe_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits { + u8 reserved_at_0[0x2]; + u8 geneve_opt_len[0x6]; + u8 geneve_oam[0x1]; + u8 reserved_at_9[0x7]; + u8 geneve_protocol_type[0x10]; + + u8 geneve_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_general_purpose_bits { + u8 general_purpose_lookup_field[0x20]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_bits { + u8 loopback_syndrome[0x8]; + u8 reserved_at_8[0x8]; + u8 source_gvmi[0x10]; + + u8 reserved_at_20[0x5]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 source_is_requestor[0x1]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_l2_hdr_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 ethertype[0x10]; + u8 vlan_type[0x10]; + + u8 vlan[0x10]; + u8 reserved_at_90[0x10]; +}; + +/* Both HW set and HW add share the same HW format with different opcodes */ +struct mlx5_ifc_dr_action_hw_set_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x3]; + u8 destination_length[0x5]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_dr_action_hw_copy_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_field_code[0x8]; + u8 reserved_at_30[0x2]; + u8 source_left_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +#endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h new file mode 100644 index 000000000000..e1edc9c247b7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _MLX5DR_H_ +#define _MLX5DR_H_ + +struct mlx5dr_domain; +struct mlx5dr_table; +struct mlx5dr_matcher; +struct mlx5dr_rule; +struct mlx5dr_action; + +enum mlx5dr_domain_type { + MLX5DR_DOMAIN_TYPE_NIC_RX, + MLX5DR_DOMAIN_TYPE_NIC_TX, + MLX5DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dr_domain_sync_flags { + MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, +}; + +enum mlx5dr_action_reformat_type { + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, + DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, +}; + +struct mlx5dr_match_parameters { + size_t match_sz; + u64 *match_buf; /* Device spec format */ +}; + +struct mlx5dr_action_dest { + struct mlx5dr_action *dest; + struct mlx5dr_action *reformat; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); + +int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); + +int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn); + +struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags); + +int mlx5dr_table_destroy(struct mlx5dr_table *table); + +u32 mlx5dr_table_get_id(struct mlx5dr_table *table); + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask); + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]); + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id); + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests); + +struct mlx5dr_action *mlx5dr_action_create_drop(void); + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id); + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data); + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); + +struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); + +int mlx5dr_action_destroy(struct mlx5dr_action *action); + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); +} + +#else /* CONFIG_MLX5_SW_STEERING */ + +static inline struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } + +static inline int +mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } + +static inline int +mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } + +static inline void +mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) { } + +static inline struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; } + +static inline int +mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } + +static inline u32 +mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } + +static inline struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) { return NULL; } + +static inline int +mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } + +static inline struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) { return NULL; } + +static inline int +mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } + +static inline int +mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) { return 0; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_drop(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_tag(u32 tag_value) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_pop_vlan(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, + __be32 vlan_hdr) { return NULL; } + +static inline int +mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_SW_STEERING */ + +#endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c912d82ca64b..1faac31f74d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -122,12 +122,13 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline_mode) { switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode)) + break; + /* fall through */ case MLX5_CAP_INLINE_MODE_L2: *min_inline_mode = MLX5_INLINE_MODE_L2; break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: *min_inline_mode = MLX5_INLINE_MODE_NONE; break; @@ -1063,26 +1064,13 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context); MLX5_SET(hca_vport_context, ctx, field_select, req->field_select); - MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware); - MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi); - MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw); - MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy); - MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state); - MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state); - MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); - MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); - MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); - MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm); - MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2); - MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm); - MLX5_SET(hca_vport_context, ctx, lid, req->lid); - MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply); - MLX5_SET(hca_vport_context, ctx, lmc, req->lmc); - MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout); - MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid); - MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl); - MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter); - MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter); + if (req->field_select & MLX5_HCA_VPORT_SEL_STATE_POLICY) + MLX5_SET(hca_vport_context, ctx, vport_state_policy, + req->policy); + if (req->field_select & MLX5_HCA_VPORT_SEL_PORT_GUID) + MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); + if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID) + MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); ex: kfree(in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 953cc8efba69..02f7e4a39578 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -34,21 +34,6 @@ #include "wq.h" #include "mlx5_core.h" -u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) -{ - return (u32)wq->fbc.sz_m1 + 1; -} - -u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) -{ - return wq->fbc.sz_m1 + 1; -} - -u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) -{ - return (u32)wq->fbc.sz_m1 + 1; -} - static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) { return ((u32)1 << log_sz) << log_stride; @@ -91,6 +76,24 @@ err_db_free: return err; } +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) +{ + size_t len; + void *wqe; + + if (!net_ratelimit()) + return; + + nstrides = max_t(u8, nstrides, 1); + + len = nstrides << wq->fbc.log_stride; + wqe = mlx5_wq_cyc_get_wqe(wq, ix); + + pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n", + mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); +} + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index f1ec58c9e9e3..d9a94bc223c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -79,7 +79,7 @@ struct mlx5_wq_ll { int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); -u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -88,15 +88,18 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl); -u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq); int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl); -u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); +static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq) { return wq->cur_sz == wq->sz; @@ -167,6 +170,16 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) return !equal && !smaller; } +static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.sz_m1 + 1; +} + +static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.log_stride; +} + static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr) { return ctr & wq->fbc.sz_m1; @@ -223,6 +236,11 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) return cqe; } +static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) { return wq->cur_sz == wq->fbc.sz_m1; diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 67990406cba2..29e95d0a6ad1 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -66,6 +66,8 @@ retry: return err; if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { + fsm_state_err = min_t(enum mlxfw_fsm_state_err, + fsm_state_err, MLXFW_FSM_STATE_ERR_MAX); pr_err("Firmware flash failed: %s\n", mlxfw_fsm_state_err_str[fsm_state_err]); NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed"); diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 544344ac4894..79057af4fe99 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netlink.h> +#include <linux/vmalloc.h> #include <linux/xz.h> #include "mlxfw_mfa2.h" #include "mlxfw_mfa2_file.h" @@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_size = be32_to_cpu(comp->size); comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; - comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); + comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); if (!comp_data) return ERR_PTR(-ENOMEM); comp_data->comp.data_size = comp_size; @@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; return &comp_data->comp; err_out: - kfree(comp_data); + vfree(comp_data); return ERR_PTR(err); } @@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) const struct mlxfw_mfa2_comp_data *comp_data; comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); - kfree(comp_data); + vfree(comp_data); } void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 06c80343d9ed..f458fd1ce9f8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -71,7 +71,7 @@ config MLXSW_SWITCHX2 module will be called mlxsw_switchx2. config MLXSW_SPECTRUM - tristate "Mellanox Technologies Spectrum support" + tristate "Mellanox Technologies Spectrum family support" depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n @@ -87,8 +87,8 @@ config MLXSW_SPECTRUM select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m ---help--- - This driver supports Mellanox Technologies Spectrum Ethernet - Switch ASICs. + This driver supports Mellanox Technologies + Spectrum/Spectrum-2/Spectrum-3 Ethernet Switch ASICs. To compile this driver as a module, choose M here: the module will be called mlxsw_spectrum. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 171b36bd8a4e..0e86a581d45b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -29,7 +29,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_mr_tcam.o spectrum_mr.o \ spectrum_qdisc.o spectrum_span.o \ spectrum_nve.o spectrum_nve_vxlan.o \ - spectrum_dpipe.o + spectrum_dpipe.o spectrum_trap.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 17ceac7505e5..e9f791c43f20 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -71,6 +71,7 @@ struct mlxsw_core { struct list_head trans_list; spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; + bool enable_string_tlv; } emad; struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ @@ -80,7 +81,6 @@ struct mlxsw_core { struct mlxsw_thermal *thermal; struct mlxsw_core_port *ports; unsigned int max_ports; - bool reload_fail; bool fw_flash_in_progress; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ @@ -128,6 +128,16 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_res_query_enabled); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev) +{ + return rev->minor > req_rev->minor || + (rev->minor == req_rev->minor && + rev->subminor >= req_rev->subminor); +} +EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; @@ -240,6 +250,25 @@ MLXSW_ITEM32(emad, op_tlv, class, 0x04, 0, 8); */ MLXSW_ITEM64(emad, op_tlv, tid, 0x08, 0, 64); +/* emad_string_tlv_type + * Type of the TLV. + * Must be set to 0x2 (string TLV). + */ +MLXSW_ITEM32(emad, string_tlv, type, 0x00, 27, 5); + +/* emad_string_tlv_len + * Length of the string TLV in u32. + */ +MLXSW_ITEM32(emad, string_tlv, len, 0x00, 16, 11); + +#define MLXSW_EMAD_STRING_TLV_STRING_LEN 128 + +/* emad_string_tlv_string + * String provided by the device's firmware in case of erroneous register access + */ +MLXSW_ITEM_BUF(emad, string_tlv, string, 0x04, + MLXSW_EMAD_STRING_TLV_STRING_LEN); + /* emad_reg_tlv_type * Type of the TLV. * Must be set to 0x3 (register TLV). @@ -295,6 +324,12 @@ static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, memcpy(reg_tlv + sizeof(u32), payload, reg->len); } +static void mlxsw_emad_pack_string_tlv(char *string_tlv) +{ + mlxsw_emad_string_tlv_type_set(string_tlv, MLXSW_EMAD_TLV_TYPE_STRING); + mlxsw_emad_string_tlv_len_set(string_tlv, MLXSW_EMAD_STRING_TLV_LEN); +} + static void mlxsw_emad_pack_op_tlv(char *op_tlv, const struct mlxsw_reg_info *reg, enum mlxsw_core_reg_access_type type, @@ -336,7 +371,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type, - u64 tid) + u64 tid, bool enable_string_tlv) { char *buf; @@ -346,26 +381,82 @@ static void mlxsw_emad_construct(struct sk_buff *skb, buf = skb_push(skb, reg->len + sizeof(u32)); mlxsw_emad_pack_reg_tlv(buf, reg, payload); + if (enable_string_tlv) { + buf = skb_push(skb, MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_string_tlv(buf); + } + buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); mlxsw_emad_pack_op_tlv(buf, reg, type, tid); mlxsw_emad_construct_eth_hdr(skb); } +struct mlxsw_emad_tlv_offsets { + u16 op_tlv; + u16 string_tlv; + u16 reg_tlv; +}; + +static bool mlxsw_emad_tlv_is_string_tlv(const char *tlv) +{ + u8 tlv_type = mlxsw_emad_string_tlv_type_get(tlv); + + return tlv_type == MLXSW_EMAD_TLV_TYPE_STRING; +} + +static void mlxsw_emad_tlv_parse(struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + offsets->op_tlv = MLXSW_EMAD_ETH_HDR_LEN; + offsets->string_tlv = 0; + offsets->reg_tlv = MLXSW_EMAD_ETH_HDR_LEN + + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); + + /* If string TLV is present, it must come after the operation TLV. */ + if (mlxsw_emad_tlv_is_string_tlv(skb->data + offsets->reg_tlv)) { + offsets->string_tlv = offsets->reg_tlv; + offsets->reg_tlv += MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32); + } +} + static char *mlxsw_emad_op_tlv(const struct sk_buff *skb) { - return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN)); + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + return ((char *) (skb->data + offsets->op_tlv)); +} + +static char *mlxsw_emad_string_tlv(const struct sk_buff *skb) +{ + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + if (!offsets->string_tlv) + return NULL; + + return ((char *) (skb->data + offsets->string_tlv)); } static char *mlxsw_emad_reg_tlv(const struct sk_buff *skb) { - return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN + - MLXSW_EMAD_OP_TLV_LEN * sizeof(u32))); + struct mlxsw_emad_tlv_offsets *offsets = + (struct mlxsw_emad_tlv_offsets *) skb->cb; + + return ((char *) (skb->data + offsets->reg_tlv)); +} + +static char *mlxsw_emad_reg_payload(const char *reg_tlv) +{ + return ((char *) (reg_tlv + sizeof(u32))); } -static char *mlxsw_emad_reg_payload(const char *op_tlv) +static char *mlxsw_emad_reg_payload_cmd(const char *mbox) { - return ((char *) (op_tlv + (MLXSW_EMAD_OP_TLV_LEN + 1) * sizeof(u32))); + return ((char *) (mbox + (MLXSW_EMAD_OP_TLV_LEN + 1) * sizeof(u32))); } static u64 mlxsw_emad_get_tid(const struct sk_buff *skb) @@ -431,10 +522,31 @@ struct mlxsw_reg_trans { const struct mlxsw_reg_info *reg; enum mlxsw_core_reg_access_type type; int err; + char *emad_err_string; enum mlxsw_emad_op_tlv_status emad_status; struct rcu_head rcu; }; +static void mlxsw_emad_process_string_tlv(const struct sk_buff *skb, + struct mlxsw_reg_trans *trans) +{ + char *string_tlv; + char *string; + + string_tlv = mlxsw_emad_string_tlv(skb); + if (!string_tlv) + return; + + trans->emad_err_string = kzalloc(MLXSW_EMAD_STRING_TLV_STRING_LEN, + GFP_ATOMIC); + if (!trans->emad_err_string) + return; + + string = mlxsw_emad_string_tlv_string_data(string_tlv); + strlcpy(trans->emad_err_string, string, + MLXSW_EMAD_STRING_TLV_STRING_LEN); +} + #define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS 3000 #define MLXSW_EMAD_TIMEOUT_MS 200 @@ -526,12 +638,14 @@ static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core, mlxsw_emad_transmit_retry(mlxsw_core, trans); } else { if (err == 0) { - char *op_tlv = mlxsw_emad_op_tlv(skb); + char *reg_tlv = mlxsw_emad_reg_tlv(skb); if (trans->cb) trans->cb(mlxsw_core, - mlxsw_emad_reg_payload(op_tlv), + mlxsw_emad_reg_payload(reg_tlv), trans->reg->len, trans->cb_priv); + } else { + mlxsw_emad_process_string_tlv(skb, trans); } mlxsw_emad_trans_finish(trans, err); } @@ -547,6 +661,8 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0, skb->data, skb->len); + mlxsw_emad_tlv_parse(skb); + if (!mlxsw_emad_is_resp(skb)) goto free_skb; @@ -622,7 +738,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) } static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, - u16 reg_len) + u16 reg_len, bool enable_string_tlv) { struct sk_buff *skb; u16 emad_len; @@ -630,6 +746,8 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, emad_len = (reg_len + sizeof(u32) + MLXSW_EMAD_ETH_HDR_LEN + (MLXSW_EMAD_OP_TLV_LEN + MLXSW_EMAD_END_TLV_LEN) * sizeof(u32) + mlxsw_core->driver->txhdr_len); + if (enable_string_tlv) + emad_len += MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32); if (emad_len > MLXSW_EMAD_MAX_FRAME_LEN) return NULL; @@ -651,6 +769,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv, u64 tid) { + bool enable_string_tlv; struct sk_buff *skb; int err; @@ -658,7 +777,12 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, tid, reg->id, mlxsw_reg_id_str(reg->id), mlxsw_core_reg_access_type_str(type)); - skb = mlxsw_emad_alloc(mlxsw_core, reg->len); + /* Since this can be changed during emad_reg_access, read it once and + * use the value all the way. + */ + enable_string_tlv = mlxsw_core->emad.enable_string_tlv; + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len, enable_string_tlv); if (!skb) return -ENOMEM; @@ -675,7 +799,8 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, trans->reg = reg; trans->type = type; - mlxsw_emad_construct(skb, reg, payload, type, trans->tid); + mlxsw_emad_construct(skb, reg, payload, type, trans->tid, + enable_string_tlv); mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); spin_lock_bh(&mlxsw_core->emad.trans_list_lock); @@ -984,23 +1109,30 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, return 0; } -static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink, - struct netlink_ext_ack *extack) +static int +mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + bool netns_change, + struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - int err; if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET)) return -EOPNOTSUPP; mlxsw_core_bus_device_unregister(mlxsw_core, true); - err = mlxsw_core_bus_device_register(mlxsw_core->bus_info, - mlxsw_core->bus, - mlxsw_core->bus_priv, true, - devlink); - mlxsw_core->reload_fail = !!err; + return 0; +} - return err; +static int +mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + return mlxsw_core_bus_device_register(mlxsw_core->bus_info, + mlxsw_core->bus, + mlxsw_core->bus_priv, true, + devlink, extack); } static int mlxsw_devlink_flash_update(struct devlink *devlink, @@ -1017,8 +1149,57 @@ static int mlxsw_devlink_flash_update(struct devlink *devlink, component, extack); } +static int mlxsw_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_init(mlxsw_core, trap, trap_ctx); +} + +static void mlxsw_devlink_trap_fini(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_fini) + return; + mlxsw_driver->trap_fini(mlxsw_core, trap, trap_ctx); +} + +static int mlxsw_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_action_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_action_set(mlxsw_core, trap, action); +} + +static int +mlxsw_devlink_trap_group_init(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_init(mlxsw_core, group); +} + static const struct devlink_ops mlxsw_devlink_ops = { - .reload = mlxsw_devlink_core_bus_device_reload, + .reload_down = mlxsw_devlink_core_bus_device_reload_down, + .reload_up = mlxsw_devlink_core_bus_device_reload_up, .port_type_set = mlxsw_devlink_port_type_set, .port_split = mlxsw_devlink_port_split, .port_unsplit = mlxsw_devlink_port_unsplit, @@ -1034,13 +1215,18 @@ static const struct devlink_ops mlxsw_devlink_ops = { .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, .info_get = mlxsw_devlink_info_get, .flash_update = mlxsw_devlink_flash_update, + .trap_init = mlxsw_devlink_trap_init, + .trap_fini = mlxsw_devlink_trap_fini, + .trap_action_set = mlxsw_devlink_trap_action_set, + .trap_group_init = mlxsw_devlink_trap_group_init, }; static int __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; @@ -1114,7 +1300,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } if (mlxsw_driver->init) { - err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack); if (err) goto err_driver_init; } @@ -1128,9 +1314,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register && !reload) + if (mlxsw_driver->params_register) devlink_params_publish(devlink); + if (!reload) + devlink_reload_enable(devlink); + return 0; err_thermal_init: @@ -1165,14 +1354,16 @@ err_devlink_alloc: int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink) + struct devlink *devlink, + struct netlink_ext_ack *extack) { bool called_again = false; int err; again: err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus, - bus_priv, reload, devlink); + bus_priv, reload, + devlink, extack); /* -EAGAIN is returned in case the FW was updated. FW needs * a reset, so lets try to call __mlxsw_core_bus_device_register() * again. @@ -1191,7 +1382,9 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); - if (mlxsw_core->reload_fail) { + if (!reload) + devlink_reload_disable(devlink); + if (devlink_is_reload_failed(devlink)) { if (!reload) /* Only the parts that were not de-initialized in the * failed reload attempt need to be de-initialized. @@ -1201,7 +1394,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } - if (mlxsw_core->driver->params_unregister && !reload) + if (mlxsw_core->driver->params_unregister) devlink_params_unpublish(devlink); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); @@ -1318,12 +1511,16 @@ static void mlxsw_core_event_listener_func(struct sk_buff *skb, u8 local_port, struct mlxsw_event_listener_item *event_listener_item = priv; struct mlxsw_reg_info reg; char *payload; - char *op_tlv = mlxsw_emad_op_tlv(skb); - char *reg_tlv = mlxsw_emad_reg_tlv(skb); + char *reg_tlv; + char *op_tlv; + + mlxsw_emad_tlv_parse(skb); + op_tlv = mlxsw_emad_op_tlv(skb); + reg_tlv = mlxsw_emad_reg_tlv(skb); reg.id = mlxsw_emad_op_tlv_register_id_get(op_tlv); reg.len = (mlxsw_emad_reg_tlv_len_get(reg_tlv) - 1) * sizeof(u32); - payload = mlxsw_emad_reg_payload(op_tlv); + payload = mlxsw_emad_reg_payload(reg_tlv); event_listener_item->el.func(®, payload, event_listener_item->priv); dev_kfree_skb(skb); } @@ -1477,6 +1674,18 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); +int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + enum mlxsw_reg_hpkt_action action) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + listener->trap_group, listener->is_ctrl); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); +} +EXPORT_SYMBOL(mlxsw_core_trap_action_set); + static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) { return atomic64_inc_return(&mlxsw_core->emad.tid); @@ -1529,8 +1738,11 @@ int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_reg_trans_write); +#define MLXSW_REG_TRANS_ERR_STRING_SIZE 256 + static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) { + char err_string[MLXSW_REG_TRANS_ERR_STRING_SIZE]; struct mlxsw_core *mlxsw_core = trans->core; int err; @@ -1548,9 +1760,17 @@ static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) mlxsw_core_reg_access_type_str(trans->type), trans->emad_status, mlxsw_emad_op_tlv_status_str(trans->emad_status)); + + snprintf(err_string, MLXSW_REG_TRANS_ERR_STRING_SIZE, + "(tid=%llx,reg_id=%x(%s)) %s (%s)\n", trans->tid, + trans->reg->id, mlxsw_reg_id_str(trans->reg->id), + mlxsw_emad_op_tlv_status_str(trans->emad_status), + trans->emad_err_string ? trans->emad_err_string : ""); + trace_devlink_hwerr(priv_to_devlink(mlxsw_core), - trans->emad_status, - mlxsw_emad_op_tlv_status_str(trans->emad_status)); + trans->emad_status, err_string); + + kfree(trans->emad_err_string); } list_del(&trans->bulk_list); @@ -1624,7 +1844,7 @@ retry: } if (!err) - memcpy(payload, mlxsw_emad_reg_payload(out_mbox), + memcpy(payload, mlxsw_emad_reg_payload_cmd(out_mbox), reg->len); mlxsw_cmd_mbox_free(out_mbox); @@ -1794,11 +2014,12 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_res_get); -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, - u32 port_number, bool split, - u32 split_port_subnumber, - const unsigned char *switch_id, - unsigned char switch_id_len) +static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_port_subnumber, + const unsigned char *switch_id, + unsigned char switch_id_len) { struct devlink *devlink = priv_to_devlink(mlxsw_core); struct mlxsw_core_port *mlxsw_core_port = @@ -1807,17 +2028,16 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, int err; mlxsw_core_port->local_port = local_port; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_number, split, split_port_subnumber, + devlink_port_attrs_set(devlink_port, flavour, port_number, + split, split_port_subnumber, switch_id, switch_id_len); err = devlink_port_register(devlink, devlink_port, local_port); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); return err; } -EXPORT_SYMBOL(mlxsw_core_port_init); -void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) +static void __mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) { struct mlxsw_core_port *mlxsw_core_port = &mlxsw_core->ports[local_port]; @@ -1826,8 +2046,53 @@ void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) devlink_port_unregister(devlink_port); memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); } + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + u32 port_number, bool split, + u32 split_port_subnumber, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + return __mlxsw_core_port_init(mlxsw_core, local_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber, + switch_id, switch_id_len); +} +EXPORT_SYMBOL(mlxsw_core_port_init); + +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) +{ + __mlxsw_core_port_fini(mlxsw_core, local_port); +} EXPORT_SYMBOL(mlxsw_core_port_fini); +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[MLXSW_PORT_CPU_PORT]; + int err; + + err = __mlxsw_core_port_init(mlxsw_core, MLXSW_PORT_CPU_PORT, + DEVLINK_PORT_FLAVOUR_CPU, + 0, false, 0, + switch_id, switch_id_len); + if (err) + return err; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + return 0; +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_init); + +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core) +{ + __mlxsw_core_port_fini(mlxsw_core, MLXSW_PORT_CPU_PORT); +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_fini); + void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev) { @@ -1888,6 +2153,35 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get); +int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module) +{ + enum mlxsw_reg_pmtm_module_type module_type; + char pmtm_pl[MLXSW_REG_PMTM_LEN]; + int err; + + mlxsw_reg_pmtm_pack(pmtm_pl, module); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl); + if (err) + return err; + mlxsw_reg_pmtm_unpack(pmtm_pl, &module_type); + + /* Here we need to get the module width according to the module type. */ + + switch (module_type) { + case MLXSW_REG_PMTM_MODULE_TYPE_BP_4X: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP: + return 4; + case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X: + return 2; + case MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP: /* fall through */ + case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X: + return 1; + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(mlxsw_core_module_max_width); + static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, const char *buf, size_t size) { @@ -2047,6 +2341,12 @@ u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_read_frc_l); +void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->emad.enable_string_tlv = true; +} +EXPORT_SYMBOL(mlxsw_core_emad_string_tlv_enable); + static int __init mlxsw_core_module_init(void) { int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8efcff4b59cb..543476a2e503 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/workqueue.h> +#include <linux/net_namespace.h> #include <net/devlink.h> #include "trap.h" @@ -23,6 +24,7 @@ struct mlxsw_core_port; struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +struct mlxsw_fw_rev; unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); @@ -30,13 +32,18 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); +bool +mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, + const struct mlxsw_fw_rev *req_rev); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const struct mlxsw_bus *mlxsw_bus, void *bus_priv, bool reload, - struct devlink *devlink); + struct devlink *devlink, + struct netlink_ext_ack *extack); void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload); struct mlxsw_tx_info { @@ -128,6 +135,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); +int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + enum mlxsw_reg_hpkt_action action); typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, size_t payload_len, unsigned long cb_priv); @@ -174,6 +184,11 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, const unsigned char *switch_id, unsigned char switch_id_len); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port); +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len); +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev); void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, @@ -185,6 +200,7 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, struct devlink_port * mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, u8 local_port); +int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); bool mlxsw_core_schedule_work(struct work_struct *work); @@ -244,7 +260,8 @@ struct mlxsw_driver { const char *kind; size_t priv_size; int (*init)(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info); + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack); void (*fini)(struct mlxsw_core *mlxsw_core); int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core); int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port, @@ -289,6 +306,15 @@ struct mlxsw_driver { int (*flash_update)(struct mlxsw_core *mlxsw_core, const char *file_name, const char *component, struct netlink_ext_ack *extack); + int (*trap_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + void (*trap_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + int (*trap_action_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action); + int (*trap_group_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); @@ -321,6 +347,8 @@ void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); +void mlxsw_core_emad_string_tlv_enable(struct mlxsw_core *mlxsw_core); + bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, enum mlxsw_res_id res_id); @@ -333,6 +361,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, #define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id) \ mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id) +static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core) +{ + return devlink_net(priv_to_devlink(mlxsw_core)); +} + #define MLXSW_BUS_F_TXRX BIT(0) #define MLXSW_BUS_F_RESET BIT(1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index d2c7ce67c300..08215fed193d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -50,6 +50,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; char mcia_pl[MLXSW_REG_MCIA_LEN]; u16 i2c_addr; + u8 page = 0; int status; int err; @@ -62,11 +63,21 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { - i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; - offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; + page = MLXSW_REG_MCIA_PAGE_GET(offset); + offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page; + /* When reading upper pages 1, 2 and 3 the offset starts at + * 128. Please refer to "QSFP+ Memory Map" figure in SFF-8436 + * specification for graphical depiction. + * MCIA register accepts buffer size <= 48. Page of size 128 + * should be read by chunks of size 48, 48, 32. Align the size + * of the last chunk to avoid reading after the end of the + * page. + */ + if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; } - mlxsw_reg_mcia_pack(mcia_pl, module, 0, 0, offset, size, i2c_addr); + mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); if (err) @@ -168,7 +179,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, switch (module_id) { case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: @@ -176,10 +187,10 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, module_rev_id >= MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) { modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; } break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 5b00726c4346..3fe878d7c94c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -41,7 +41,7 @@ struct mlxsw_hwmon { struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; u8 sensor_count; - u8 module_sensor_count; + u8 module_sensor_max; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -56,7 +56,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -79,7 +79,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -109,7 +109,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, return -EINVAL; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, - mlxsw_hwmon->module_sensor_count); + mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -336,7 +336,7 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; int index = mlwsw_hwmon_attr->type_index - - mlxsw_hwmon->module_sensor_count + 1; + mlxsw_hwmon->module_sensor_max + 1; return sprintf(buf, "gearbox %03u\n", index); } @@ -528,57 +528,52 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) { - unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); - char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; - int i, index; - u8 width; - int err; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 module_sensor_max; + int i, err; if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) return 0; + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &module_sensor_max); + /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before * sensor_count are already utilized by the sensors connected through * mtmp register by mlxsw_hwmon_temp_init(). */ - index = mlxsw_hwmon->sensor_count; - for (i = 1; i < module_count; i++) { - mlxsw_reg_pmlp_pack(pmlp_pl, i); - err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), - pmlp_pl); - if (err) { - dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", - i); - return err; - } - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - continue; + mlxsw_hwmon->module_sensor_max = mlxsw_hwmon->sensor_count + + module_sensor_max; + for (i = mlxsw_hwmon->sensor_count; + i < mlxsw_hwmon->module_sensor_max; i++) { mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, - index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, - MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, - index, index); + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i, + i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, - index, index); + i, i); mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, - index, index); - index++; + i, i); } - mlxsw_hwmon->module_sensor_count = index; return 0; } static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) { + enum mlxsw_reg_mgpir_device_type device_type; int index, max_index, sensor_index; char mgpir_pl[MLXSW_REG_MGPIR_LEN]; char mtmp_pl[MLXSW_REG_MTMP_LEN]; @@ -590,14 +585,15 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL); - if (!gbox_num) + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL); + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) return 0; - index = mlxsw_hwmon->module_sensor_count; - max_index = mlxsw_hwmon->module_sensor_count + gbox_num; + index = mlxsw_hwmon->module_sensor_max; + max_index = mlxsw_hwmon->module_sensor_max + gbox_num; while (index < max_index) { - sensor_index = index % mlxsw_hwmon->module_sensor_count + + sensor_index = index % mlxsw_hwmon->module_sensor_max + MLXSW_REG_MTMP_GBOX_INDEX_MIN; mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 35a1dc89c28a..ce0a6837daa3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -112,6 +112,7 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + u8 tz_module_num; struct mlxsw_thermal_module *tz_gearbox_arr; u8 tz_gearbox_num; unsigned int tz_highest_score; @@ -775,23 +776,10 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) static int mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal *thermal, u8 local_port) + struct mlxsw_thermal *thermal, u8 module) { struct mlxsw_thermal_module *module_tz; - char pmlp_pl[MLXSW_REG_PMLP_LEN]; - u8 width, module; - int err; - - mlxsw_reg_pmlp_pack(pmlp_pl, local_port); - err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl); - if (err) - return err; - width = mlxsw_reg_pmlp_width_get(pmlp_pl); - if (!width) - return 0; - - module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); module_tz = &thermal->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) @@ -819,26 +807,34 @@ static int mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(core); struct mlxsw_thermal_module *module_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; if (!mlxsw_core_res_query_enabled(core)) return 0; - thermal->tz_module_arr = kcalloc(module_count, + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, + &thermal->tz_module_num); + + thermal->tz_module_arr = kcalloc(thermal->tz_module_num, sizeof(*thermal->tz_module_arr), GFP_KERNEL); if (!thermal->tz_module_arr) return -ENOMEM; - for (i = 1; i < module_count; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { err = mlxsw_thermal_module_init(dev, core, thermal, i); if (err) goto err_unreg_tz_module_arr; } - for (i = 0; i < module_count - 1; i++) { + for (i = 0; i < thermal->tz_module_num; i++) { module_tz = &thermal->tz_module_arr[i]; if (!module_tz->parent) continue; @@ -850,7 +846,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, return 0; err_unreg_tz_module_arr: - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); return err; @@ -859,13 +855,12 @@ err_unreg_tz_module_arr: static void mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) { - unsigned int module_count = mlxsw_core_max_ports(thermal->core); int i; if (!mlxsw_core_res_query_enabled(thermal->core)) return; - for (i = module_count - 1; i >= 0; i--) + for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); } @@ -900,8 +895,10 @@ static int mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { + enum mlxsw_reg_mgpir_device_type device_type; struct mlxsw_thermal_module *gearbox_tz; char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 gbox_num; int i; int err; @@ -913,10 +910,13 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); - if (!thermal->tz_gearbox_num) + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, + NULL); + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) return 0; + thermal->tz_gearbox_num = gbox_num; thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num, sizeof(*thermal->tz_gearbox_arr), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlxsw/emad.h b/drivers/net/ethernet/mellanox/mlxsw/emad.h index a33b896f4bb8..acfbbec52424 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/emad.h +++ b/drivers/net/ethernet/mellanox/mlxsw/emad.h @@ -19,10 +19,8 @@ enum { MLXSW_EMAD_TLV_TYPE_END, MLXSW_EMAD_TLV_TYPE_OP, - MLXSW_EMAD_TLV_TYPE_DR, + MLXSW_EMAD_TLV_TYPE_STRING, MLXSW_EMAD_TLV_TYPE_REG, - MLXSW_EMAD_TLV_TYPE_USERDATA, - MLXSW_EMAD_TLV_TYPE_OOBETH, }; /* OP TLV */ @@ -89,6 +87,9 @@ enum { MLXSW_EMAD_OP_TLV_METHOD_EVENT = 5, }; +/* STRING TLV */ +#define MLXSW_EMAD_STRING_TLV_LEN 33 /* Length in u32 */ + /* END TLV */ #define MLXSW_EMAD_END_TLV_LEN 1 /* Length in u32 */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 95f408d0e103..34566eb62c47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -640,7 +640,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, &mlxsw_i2c_bus, mlxsw_i2c, false, - NULL); + NULL, NULL); if (err) { dev_err(&client->dev, "Fail to register core bus\n"); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 471b0ca6d69a..c4caeeadcba9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -16,6 +16,14 @@ static const char mlxsw_m_driver_name[] = "mlxsw_minimal"; +#define MLXSW_M_FWREV_MINOR 2000 +#define MLXSW_M_FWREV_SUBMINOR 1886 + +static const struct mlxsw_fw_rev mlxsw_m_fw_rev = { + .minor = MLXSW_M_FWREV_MINOR, + .subminor = MLXSW_M_FWREV_SUBMINOR, +}; + struct mlxsw_m_port; struct mlxsw_m { @@ -172,6 +180,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) } SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_m->core)); mlxsw_m_port = netdev_priv(dev); mlxsw_m_port->dev = dev; mlxsw_m_port->mlxsw_m = mlxsw_m; @@ -204,8 +213,8 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) err_register_netdev: mlxsw_m->ports[local_port] = NULL; - free_netdev(dev); err_dev_addr_get: + free_netdev(dev); err_alloc_etherdev: mlxsw_core_port_fini(mlxsw_m->core, local_port); return err; @@ -325,8 +334,27 @@ static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m) kfree(mlxsw_m->ports); } +static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m) +{ + const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev; + + /* Validate driver and FW are compatible. + * Do not check major version, since it defines chip type, while + * driver is supposed to support any type. + */ + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev)) + return 0; + + dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, rev->major, + mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor); + + return -EINVAL; +} + static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -334,6 +362,10 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, mlxsw_m->core = mlxsw_core; mlxsw_m->bus_info = mlxsw_bus_info; + err = mlxsw_m_fw_rev_validate(mlxsw_m); + if (err) + return err; + err = mlxsw_m_base_mac_get(mlxsw_m); if (err) { dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 615455a21567..914c33e46fb4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -284,15 +284,18 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + int tclass; int i; int err; q->producer_counter = 0; q->consumer_counter = 0; + tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC : + MLXSW_PCI_SDQ_CTL_TC; /* Set CQ of same number of this SDQ. */ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); - mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -963,6 +966,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); if (num_sdqs + num_rdqs > num_cqs || + num_sdqs < MLXSW_PCI_SDQS_MIN || num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { dev_err(&pdev->dev, "Unsupported number of queues\n"); return -EINVAL; @@ -1520,7 +1524,15 @@ static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) { - u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci); + u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 sdqn; + + if (tx_info->is_emad) { + sdqn = MLXSW_PCI_SDQ_EMAD_INDEX; + } else { + BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0); + sdqn = 1 + (tx_info->local_port % ctl_sdq_count); + } return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); } @@ -1790,7 +1802,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci, false, - NULL); + NULL, NULL); if (err) { dev_err(&pdev->dev, "cannot register bus device\n"); goto err_bus_device_register; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 946339e13eb9..5b1323645a5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3 0xcf70 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB 0xcb20 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB2 0xcf08 diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e57e42e2d2b2..e0d7d2d9a0c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF @@ -51,6 +51,11 @@ #define MLXSW_PCI_EQ_ASYNC_NUM 0 #define MLXSW_PCI_EQ_COMP_NUM 1 +#define MLXSW_PCI_SDQS_MIN 2 /* EMAD and control traffic */ +#define MLXSW_PCI_SDQ_EMAD_INDEX 0 +#define MLXSW_PCI_SDQ_EMAD_TC 0 +#define MLXSW_PCI_SDQ_CTL_TC 3 + #define MLXSW_PCI_AQ_PAGES 8 #define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index a33eeef0b00c..741fd2989d12 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -24,8 +24,6 @@ #define MLXSW_PORT_DONT_CARE 0xFF -#define MLXSW_PORT_MODULE_MAX_WIDTH 4 - enum mlxsw_port_admin_status { MLXSW_PORT_ADMIN_STATUS_UP = 1, MLXSW_PORT_ADMIN_STATUS_DOWN = 2, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ead36702549a..dd6685156396 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); enum mlxsw_reg_qeec_hr { - MLXSW_REG_QEEC_HIERARCY_PORT, - MLXSW_REG_QEEC_HIERARCY_GROUP, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_PORT, + MLXSW_REG_QEEC_HR_GROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, + MLXSW_REG_QEEC_HR_TC, }; /* reg_qeec_element_hierarchy @@ -3563,8 +3563,8 @@ MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); */ MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); -/* A large max rate will disable the max shaper. */ -#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */ +/* The largest max shaper value possible to disable the shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS ((1u << 31) - 1) /* Kbps */ /* reg_qeec_max_shaper_rate * Max shaper information rate. @@ -3602,6 +3602,21 @@ MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); */ MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); +/* reg_qeec_max_shaper_bs + * Max shaper burst size + * Burst size is 2^max_shaper_bs * 512 bits + * For Spectrum-1: Range is: 5..25 + * For Spectrum-2: Range is: 11..25 + * Reserved when ptps = 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6); + +#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5 + static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index) @@ -3618,8 +3633,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, { MLXSW_REG_ZERO(qeec, payload); mlxsw_reg_qeec_local_port_set(payload, local_port); - mlxsw_reg_qeec_element_hierarchy_set(payload, - MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT); mlxsw_reg_qeec_ptps_set(payload, ptps); } @@ -3749,6 +3763,38 @@ mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp) mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp); } +/* QPDP - QoS Port DSCP to Priority Mapping Register + * ------------------------------------------------- + * This register controls the port default Switch Priority and Color. The + * default Switch Priority and Color are used for frames where the trust state + * uses default values. All member ports of a LAG should be configured with the + * same default values. + */ +#define MLXSW_REG_QPDP_ID 0x4007 +#define MLXSW_REG_QPDP_LEN 0x8 + +MLXSW_REG_DEFINE(qpdp, MLXSW_REG_QPDP_ID, MLXSW_REG_QPDP_LEN); + +/* reg_qpdp_local_port + * Local Port. Supported for data packets from CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, qpdp, local_port, 0x00, 16, 8); + +/* reg_qpdp_switch_prio + * Default port Switch Priority (default 0) + * Access: RW + */ +MLXSW_ITEM32(reg, qpdp, switch_prio, 0x04, 0, 4); + +static inline void mlxsw_reg_qpdp_pack(char *payload, u8 local_port, + u8 switch_prio) +{ + MLXSW_REG_ZERO(qpdp, payload); + mlxsw_reg_qpdp_local_port_set(payload, local_port); + mlxsw_reg_qpdp_switch_prio_set(payload, switch_prio); +} + /* QPDPM - QoS Port DSCP to Priority Mapping Register * -------------------------------------------------- * This register controls the mapping from DSCP field to @@ -3969,6 +4015,7 @@ MLXSW_ITEM32(reg, pmlp, local_port, 0x00, 16, 8); * 1 - Lane 0 is used. * 2 - Lanes 0 and 1 are used. * 4 - Lanes 0, 1, 2 and 3 are used. + * 8 - Lanes 0-7 are used. * Access: RW */ MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8); @@ -3983,14 +4030,14 @@ MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false); * Tx Lane. When rxtx field is cleared, this field is used for Rx as well. * Access: RW */ -MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false); +MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 4, 0x04, 0x00, false); /* reg_pmlp_rx_lane * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is * equal to Tx lane. * Access: RW */ -MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false); +MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 4, 0x04, 0x00, false); static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port) { @@ -4111,6 +4158,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4 BIT(9) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2 BIT(10) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4 BIT(12) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8 BIT(15) /* reg_ptys_ext_eth_proto_cap * Extended Ethernet port supported speeds and protocols. @@ -4126,7 +4174,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7) -#define MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4 BIT(8) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR BIT(13) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) @@ -5374,6 +5421,55 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port, MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0); } +/* PMTM - Port Module Type Mapping Register + * ---------------------------------------- + * The PMTM allows query or configuration of module types. + */ +#define MLXSW_REG_PMTM_ID 0x5067 +#define MLXSW_REG_PMTM_LEN 0x10 + +MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN); + +/* reg_pmtm_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8); + +enum mlxsw_reg_pmtm_module_type { + /* Backplane with 4 lanes */ + MLXSW_REG_PMTM_MODULE_TYPE_BP_4X, + /* QSFP */ + MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP, + /* SFP */ + MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP, + /* Backplane with single lane */ + MLXSW_REG_PMTM_MODULE_TYPE_BP_1X = 4, + /* Backplane with two lane */ + MLXSW_REG_PMTM_MODULE_TYPE_BP_2X = 8, + /* Chip2Chip */ + MLXSW_REG_PMTM_MODULE_TYPE_C2C = 10, +}; + +/* reg_pmtm_module_type + * Module type. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 4); + +static inline void mlxsw_reg_pmtm_pack(char *payload, u8 module) +{ + MLXSW_REG_ZERO(pmtm, payload); + mlxsw_reg_pmtm_module_set(payload, module); +} + +static inline void +mlxsw_reg_pmtm_unpack(char *payload, + enum mlxsw_reg_pmtm_module_type *module_type) +{ + *module_type = mlxsw_reg_pmtm_module_type_get(payload); +} + /* HTGT - Host Trap Group Table * ---------------------------- * Configures the properties for forwarding to CPU. @@ -5422,6 +5518,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, + MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP, + + __MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1 +}; + +enum mlxsw_reg_htgt_discard_trap_group { + MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, }; /* reg_htgt_trap_group @@ -5559,6 +5666,8 @@ enum mlxsw_reg_hpkt_action { MLXSW_REG_HPKT_ACTION_DISCARD, MLXSW_REG_HPKT_ACTION_SOFT_DISCARD, MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD, + MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU, + MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT = 15, }; /* reg_hpkt_action @@ -5569,6 +5678,8 @@ enum mlxsw_reg_hpkt_action { * 3 - Discard. * 4 - Soft discard (allow other traps to act on the packet). * 5 - Trap and soft discard (allow other traps to overwrite this trap). + * 6 - Trap to CPU (CPU receives sole copy) and count it as error. + * 15 - Restore the firmware's default action. * Access: RW * * Note: Must be set to 0 (forward) for event trap IDs, as they are already @@ -8400,6 +8511,7 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); #define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 +#define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH 128 #define MLXSW_REG_MCIA_EEPROM_SIZE 48 #define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 #define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 @@ -8435,6 +8547,14 @@ enum mlxsw_reg_mcia_eeprom_module_info { */ MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); +/* This is used to access the optional upper pages (1-3) in the QSFP+ + * memory map. Page 1 is available on offset 256 through 383, page 2 - + * on offset 384 through 511, page 3 - on offset 512 through 639. + */ +#define MLXSW_REG_MCIA_PAGE_GET(off) (((off) - \ + MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \ + MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1) + static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, u8 page_number, u16 device_addr, u8 size, u8 i2c_device_addr) @@ -8659,7 +8779,7 @@ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl, * properties. */ #define MLXSW_REG_MPAR_ID 0x901B -#define MLXSW_REG_MPAR_LEN 0x08 +#define MLXSW_REG_MPAR_LEN 0x0C MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN); @@ -9520,6 +9640,12 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); +/* num_of_modules + * Number of modules. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8); + static inline void mlxsw_reg_mgpir_pack(char *payload) { MLXSW_REG_ZERO(mgpir, payload); @@ -9528,7 +9654,7 @@ static inline void mlxsw_reg_mgpir_pack(char *payload) static inline void mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, enum mlxsw_reg_mgpir_device_type *device_type, - u8 *devices_per_flash) + u8 *devices_per_flash, u8 *num_of_modules) { if (num_of_devices) *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); @@ -9537,6 +9663,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, if (devices_per_flash) *devices_per_flash = mlxsw_reg_mgpir_devices_per_flash_get(payload); + if (num_of_modules) + *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); } /* TNGCR - Tunneling NVE General Configuration Register @@ -10028,6 +10156,92 @@ static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); } +/* TIEEM - Tunneling IPinIP Encapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIEEM register maps ECN of the IP header at the ingress to the + * encapsulation to the ECN of the underlay network. + */ +#define MLXSW_REG_TIEEM_ID 0xA812 +#define MLXSW_REG_TIEEM_LEN 0x0C + +MLXSW_REG_DEFINE(tieem, MLXSW_REG_TIEEM_ID, MLXSW_REG_TIEEM_LEN); + +/* reg_tieem_overlay_ecn + * ECN of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tieem, overlay_ecn, 0x04, 24, 2); + +/* reg_tineem_underlay_ecn + * ECN of the IP header in the underlay network. + * Access: RW + */ +MLXSW_ITEM32(reg, tieem, underlay_ecn, 0x04, 16, 2); + +static inline void mlxsw_reg_tieem_pack(char *payload, u8 overlay_ecn, + u8 underlay_ecn) +{ + MLXSW_REG_ZERO(tieem, payload); + mlxsw_reg_tieem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tieem_underlay_ecn_set(payload, underlay_ecn); +} + +/* TIDEM - Tunneling IPinIP Decapsulation ECN Mapping Register + * ----------------------------------------------------------- + * The TIDEM register configures the actions that are done in the + * decapsulation. + */ +#define MLXSW_REG_TIDEM_ID 0xA813 +#define MLXSW_REG_TIDEM_LEN 0x0C + +MLXSW_REG_DEFINE(tidem, MLXSW_REG_TIDEM_ID, MLXSW_REG_TIDEM_LEN); + +/* reg_tidem_underlay_ecn + * ECN field of the IP header in the underlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, underlay_ecn, 0x04, 24, 2); + +/* reg_tidem_overlay_ecn + * ECN field of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tidem, overlay_ecn, 0x04, 16, 2); + +/* reg_tidem_eip_ecn + * Egress IP ECN. ECN field of the IP header of the packet which goes out + * from the decapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, eip_ecn, 0x04, 8, 2); + +/* reg_tidem_trap_en + * Trap enable: + * 0 - No trap due to decap ECN + * 1 - Trap enable with trap_id + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_en, 0x08, 28, 4); + +/* reg_tidem_trap_id + * Trap ID. Either DECAP_ECN0 or DECAP_ECN1. + * Reserved when trap_en is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, tidem, trap_id, 0x08, 0, 9); + +static inline void mlxsw_reg_tidem_pack(char *payload, u8 underlay_ecn, + u8 overlay_ecn, u8 eip_ecn, + bool trap_en, u16 trap_id) +{ + MLXSW_REG_ZERO(tidem, payload); + mlxsw_reg_tidem_underlay_ecn_set(payload, underlay_ecn); + mlxsw_reg_tidem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tidem_eip_ecn_set(payload, eip_ecn); + mlxsw_reg_tidem_trap_en_set(payload, trap_en); + mlxsw_reg_tidem_trap_id_set(payload, trap_id); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -10500,6 +10714,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qeec), MLXSW_REG(qrwe), MLXSW_REG(qpdsm), + MLXSW_REG(qpdp), MLXSW_REG(qpdpm), MLXSW_REG(qtctm), MLXSW_REG(qpsc), @@ -10515,6 +10730,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pbmc), MLXSW_REG(pspa), MLXSW_REG(pplr), + MLXSW_REG(pmtm), MLXSW_REG(htgt), MLXSW_REG(hpkt), MLXSW_REG(rgcr), @@ -10570,6 +10786,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(tndem), MLXSW_REG(tnpc), MLXSW_REG(tigcr), + MLXSW_REG(tieem), + MLXSW_REG(tidem), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 33a9fc9ef6a4..6534184cb942 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -26,7 +26,8 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_LOCAL_PORTS_IN_1X, MLXSW_RES_ID_LOCAL_PORTS_IN_2X, - MLXSW_RES_ID_MAX_BUFFER_SIZE, + MLXSW_RES_ID_LOCAL_PORTS_IN_4X, + MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER, MLXSW_RES_ID_CELL_SIZE, MLXSW_RES_ID_MAX_HEADROOM_SIZE, MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, @@ -82,7 +83,8 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_LOCAL_PORTS_IN_1X] = 0x2610, [MLXSW_RES_ID_LOCAL_PORTS_IN_2X] = 0x2611, - [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ + [MLXSW_RES_ID_LOCAL_PORTS_IN_4X] = 0x2612, + [MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805, /* Bytes */ [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index eda9c23e87b2..7358b5bc7eb6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -22,6 +22,7 @@ #include <linux/inetdevice.h> #include <linux/netlink.h> #include <linux/jhash.h> +#include <linux/log2.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> @@ -44,11 +45,9 @@ #include "spectrum_ptp.h" #include "../mlxfw/mlxfw.h" -#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) - #define MLXSW_SP1_FWREV_MAJOR 13 #define MLXSW_SP1_FWREV_MINOR 2000 -#define MLXSW_SP1_FWREV_SUBMINOR 1122 +#define MLXSW_SP1_FWREV_SUBMINOR 2714 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -63,8 +62,24 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { "." __stringify(MLXSW_SP1_FWREV_MINOR) \ "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2" +#define MLXSW_SP2_FWREV_MAJOR 29 +#define MLXSW_SP2_FWREV_MINOR 2000 +#define MLXSW_SP2_FWREV_SUBMINOR 2714 + +static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { + .major = MLXSW_SP2_FWREV_MAJOR, + .minor = MLXSW_SP2_FWREV_MINOR, + .subminor = MLXSW_SP2_FWREV_SUBMINOR, +}; + +#define MLXSW_SP2_FW_FILENAME \ + "mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \ + "." __stringify(MLXSW_SP2_FWREV_MINOR) \ + "." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2" + static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; +static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3"; static const char mlxsw_sp_driver_version[] = "1.0"; static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = { @@ -174,6 +189,14 @@ struct mlxsw_sp_ptp_ops { void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, struct ethtool_ts_info *info); + int (*get_stats_count)(void); + void (*get_stats_strings)(u8 **p); + void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); +}; + +struct mlxsw_sp_span_ops { + u32 (*buffsize_get)(int mtu, u32 speed); }; static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, @@ -402,15 +425,12 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) rev->major, req_rev->major); return -EINVAL; } - if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == - MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) && - (rev->minor > req_rev->minor || - (rev->minor == req_rev->minor && - rev->subminor >= req_rev->subminor))) + if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev)) return 0; - dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", - rev->major, rev->minor, rev->subminor); + dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n", + rev->major, rev->minor, rev->subminor, req_rev->major, + req_rev->minor, req_rev->subminor); dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n", fw_filename); @@ -730,35 +750,69 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width, u8 *p_lane) +static int +mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, + struct mlxsw_sp_port_mapping *port_mapping) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; + bool separate_rxtx; + u8 module; + u8 width; int err; + int i; mlxsw_reg_pmlp_pack(pmlp_pl, local_port); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); if (err) return err; - *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); - *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); - *p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); + module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + separate_rxtx = mlxsw_reg_pmlp_rxtx_get(pmlp_pl); + + if (width && !is_power_of_2(width)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: width value is not power of 2\n", + local_port); + return -EINVAL; + } + + for (i = 0; i < width; i++) { + if (mlxsw_reg_pmlp_module_get(pmlp_pl, i) != module) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: contains multiple modules\n", + local_port); + return -EINVAL; + } + if (separate_rxtx && + mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != + mlxsw_reg_pmlp_rx_lane_get(pmlp_pl, i)) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are different\n", + local_port); + return -EINVAL; + } + if (mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != i) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are not sequential\n", + local_port); + return -EINVAL; + } + } + + port_mapping->module = module; + port_mapping->width = width; + port_mapping->lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); return 0; } -static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port, - u8 module, u8 width, u8 lane) +static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp_port_mapping *port_mapping = &mlxsw_sp_port->mapping; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmlp_pl[MLXSW_REG_PMLP_LEN]; int i; mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port); - mlxsw_reg_pmlp_width_set(pmlp_pl, width); - for (i = 0; i < width; i++) { - mlxsw_reg_pmlp_module_set(pmlp_pl, i, module); - mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, lane + i); /* Rx & Tx */ + mlxsw_reg_pmlp_width_set(pmlp_pl, port_mapping->width); + for (i = 0; i < port_mapping->width; i++) { + mlxsw_reg_pmlp_module_set(pmlp_pl, i, port_mapping->module); + mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, port_mapping->lane + i); /* Rx & Tx */ } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); @@ -807,23 +861,17 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; - if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { - struct sk_buff *skb_orig = skb; - - skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); - if (!skb) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb_orig); - return NETDEV_TX_OK; - } - dev_consume_skb_any(skb_orig); - } - if (eth_skb_pad(skb)) { this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); return NETDEV_TX_OK; @@ -1162,6 +1210,9 @@ static void update_stats_cache(struct work_struct *work) periodic_hw_stats.update_dw.work); if (!netif_carrier_ok(mlxsw_sp_port->dev)) + /* Note: mlxsw_sp_port_down_wipe_counters() clears the cache as + * necessary when port goes down. + */ goto out; mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, @@ -1625,7 +1676,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port, } flow_block_cb_incref(block_cb); err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block, - mlxsw_sp_port, ingress); + mlxsw_sp_port, ingress, f->extack); if (err) goto err_block_bind; @@ -1743,6 +1794,10 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_PRIO: return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_ETS: + return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_TBF: + return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -2328,6 +2383,7 @@ static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) static void mlxsw_sp_port_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); u8 *p = data; int i; @@ -2369,6 +2425,7 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, for (i = 0; i < TC_MAX_QUEUE; i++) mlxsw_sp_port_get_tc_strings(&p, i); + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p); break; } } @@ -2463,6 +2520,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int i, data_index = 0; /* IEEE 802.3 Counters */ @@ -2503,13 +2561,21 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, data, data_index); data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN; } + + /* PTP counters */ + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port, + data, data_index); + data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); } static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + switch (sset) { case ETH_SS_STATS: - return MLXSW_SP_PORT_ETHTOOL_STATS_LEN; + return MLXSW_SP_PORT_ETHTOOL_STATS_LEN + + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); default: return -EOPNOTSUPP; } @@ -2608,26 +2674,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .speed = SPEED_50000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, - .speed = SPEED_56000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, .speed = SPEED_100000, @@ -2674,7 +2720,7 @@ mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode) + u8 width, unsigned long *mode) { int i; @@ -2715,7 +2761,7 @@ mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, } static u32 -mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, +mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; @@ -2729,7 +2775,8 @@ mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) +static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width, + u32 speed) { u32 ptys_proto = 0; int i; @@ -2917,11 +2964,46 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_400gaui_8[] = { + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8) + +#define MLXSW_SP_PORT_MASK_WIDTH_1X BIT(0) +#define MLXSW_SP_PORT_MASK_WIDTH_2X BIT(1) +#define MLXSW_SP_PORT_MASK_WIDTH_4X BIT(2) +#define MLXSW_SP_PORT_MASK_WIDTH_8X BIT(3) + +static u8 mlxsw_sp_port_mask_width_get(u8 width) +{ + switch (width) { + case 1: + return MLXSW_SP_PORT_MASK_WIDTH_1X; + case 2: + return MLXSW_SP_PORT_MASK_WIDTH_2X; + case 4: + return MLXSW_SP_PORT_MASK_WIDTH_4X; + case 8: + return MLXSW_SP_PORT_MASK_WIDTH_8X; + default: + WARN_ON_ONCE(1); + return 0; + } +} + struct mlxsw_sp2_port_link_mode { const enum ethtool_link_mode_bit_indices *mask_ethtool; int m_ethtool_len; u32 mask; u32 speed; + u8 mask_width; }; static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { @@ -2929,74 +3011,116 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M, .mask_ethtool = mlxsw_sp2_mask_ethtool_sgmii_100m, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII, .mask_ethtool = mlxsw_sp2_mask_ethtool_1000base_x_sgmii, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_1000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII, .mask_ethtool = mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_2500, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_5000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_25000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X, .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X, .speed = SPEED_100000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_200000, }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8, + .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_8, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_8X, + .speed = SPEED_400000, + }, }; #define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode) @@ -3022,12 +3146,14 @@ mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, static void mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode) + u8 width, unsigned long *mode) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + if ((ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) && + (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i], mode); } @@ -3078,27 +3204,32 @@ mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, } static u32 -mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, +mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + if ((mask_width & mlxsw_sp2_port_link_mode[i].mask_width) && + mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; } -static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) +static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, + u8 width, u32 speed) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (speed == mlxsw_sp2_port_link_mode[i].speed) + if ((speed == mlxsw_sp2_port_link_mode[i].speed) && + (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; @@ -3182,7 +3313,7 @@ mlxsw_sp2_port_type_speed_ops = { static void mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, - struct ethtool_link_ksettings *cmd) + u8 width, struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -3193,12 +3324,13 @@ mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd); - ops->from_ptys_link(mlxsw_sp, eth_proto_cap, cmd->link_modes.supported); + ops->from_ptys_link(mlxsw_sp, eth_proto_cap, width, + cmd->link_modes.supported); } static void mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, - u32 eth_proto_admin, bool autoneg, + u32 eth_proto_admin, bool autoneg, u8 width, struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -3209,7 +3341,7 @@ mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, return; ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - ops->from_ptys_link(mlxsw_sp, eth_proto_admin, + ops->from_ptys_link(mlxsw_sp, eth_proto_admin, width, cmd->link_modes.advertising); } @@ -3264,10 +3396,11 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, ð_proto_admin, ð_proto_oper); - mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, cmd); + mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, + mlxsw_sp_port->mapping.width, cmd); mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, - cmd); + mlxsw_sp_port->mapping.width, cmd); cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); @@ -3300,13 +3433,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, NULL, NULL); autoneg = cmd->base.autoneg == AUTONEG_ENABLE; - if (!autoneg && cmd->base.speed == SPEED_56000) { - netdev_err(dev, "56G not supported with autoneg off\n"); - return -EINVAL; - } eth_proto_new = autoneg ? - ops->to_ptys_advert_link(mlxsw_sp, cmd) : - ops->to_ptys_speed(mlxsw_sp, cmd->base.speed); + ops->to_ptys_advert_link(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd) : + ops->to_ptys_speed(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { @@ -3386,7 +3517,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { }; static int -mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) +mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; const struct mlxsw_sp_port_type_speed_ops *ops; @@ -3402,7 +3533,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) &base_speed); if (err) return err; - upper_speed = base_speed * width; + upper_speed = base_speed * mlxsw_sp_port->mapping.width; eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed); ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, @@ -3410,6 +3541,27 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } +int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed) +{ + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_oper; + int err; + + port_type_speed_ops = mlxsw_sp->port_type_speed_ops; + port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, + mlxsw_sp_port->local_port, 0, + false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL, + ð_proto_oper); + *speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper); + return 0; +} + int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight) @@ -3427,7 +3579,7 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, - u8 next_index, u32 maxrate) + u8 next_index, u32 maxrate, u8 burst_size) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char qeec_pl[MLXSW_REG_QEEC_LEN]; @@ -3436,6 +3588,7 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, next_index); mlxsw_reg_qeec_mase_set(qeec_pl, true); mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); } @@ -3473,26 +3626,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * one subgroup, which are all member in the same group. */ err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, - 0); + MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); if (err) return err; } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, i, i, + MLXSW_REG_QEEC_HR_TC, i, i, false, 0); if (err) return err; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, true, 100); if (err) @@ -3504,30 +3656,30 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_HR_PORT, 0, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i, i, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; } @@ -3535,7 +3687,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) /* Configure the min shaper for multicast TCs. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MIS_MIN); if (err) @@ -3563,15 +3715,18 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) + u8 split_base_local_port, + struct mlxsw_sp_port_mapping *port_mapping) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + bool split = !!split_base_local_port; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; int err; err = mlxsw_core_port_init(mlxsw_sp->core, local_port, - module + 1, split, lane / width, + port_mapping->module + 1, split, + port_mapping->lane / port_mapping->width, mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac)); if (err) { @@ -3586,15 +3741,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_etherdev; } SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev); + dev_net_set(dev, mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_port = netdev_priv(dev); mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID; mlxsw_sp_port->split = split; - mlxsw_sp_port->mapping.module = module; - mlxsw_sp_port->mapping.width = width; - mlxsw_sp_port->mapping.lane = lane; + mlxsw_sp_port->split_base_local_port = split_base_local_port; + mlxsw_sp_port->mapping = *port_mapping; mlxsw_sp_port->link.autoneg = 1; INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list); INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list); @@ -3619,7 +3774,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; - err = mlxsw_sp_port_module_map(mlxsw_sp_port, module, width, lane); + err = mlxsw_sp_port_module_map(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n", mlxsw_sp_port->local_port); @@ -3661,7 +3816,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_system_port_mapping_set; } - err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width); + err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n", mlxsw_sp_port->local_port); @@ -3722,6 +3877,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_qdiscs_init; } + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 0, VLAN_N_VID - 1, false, + false); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to clear VLAN filter\n", + mlxsw_sp_port->local_port); + goto err_port_vlan_clear; + } + err = mlxsw_sp_port_nve_init(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n", @@ -3748,6 +3911,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw, mlxsw_sp->ptp_ops->shaper_work); + INIT_DELAYED_WORK(&mlxsw_sp_port->span.speed_update_dw, + mlxsw_sp_span_speed_update_work); mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); @@ -3769,6 +3934,7 @@ err_port_vlan_create: err_port_pvid_set: mlxsw_sp_port_nve_fini(mlxsw_sp_port); err_port_nve_init: +err_port_vlan_clear: mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); err_port_qdiscs_init: mlxsw_sp_port_fids_fini(mlxsw_sp_port); @@ -3803,6 +3969,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->span.speed_update_dw); cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); @@ -3823,6 +3990,45 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(mlxsw_sp->core, local_port); } +static int mlxsw_sp_cpu_port_create(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = kzalloc(sizeof(*mlxsw_sp_port), GFP_KERNEL); + if (!mlxsw_sp_port) + return -ENOMEM; + + mlxsw_sp_port->mlxsw_sp = mlxsw_sp; + mlxsw_sp_port->local_port = MLXSW_PORT_CPU_PORT; + + err = mlxsw_core_cpu_port_init(mlxsw_sp->core, + mlxsw_sp_port, + mlxsw_sp->base_mac, + sizeof(mlxsw_sp->base_mac)); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize core CPU port\n"); + goto err_core_cpu_port_init; + } + + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = mlxsw_sp_port; + return 0; + +err_core_cpu_port_init: + kfree(mlxsw_sp_port); + return err; +} + +static void mlxsw_sp_cpu_port_remove(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT]; + + mlxsw_core_cpu_port_fini(mlxsw_sp->core); + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = NULL; + kfree(mlxsw_sp_port); +} + static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port) { return mlxsw_sp->ports[local_port] != NULL; @@ -3835,14 +4041,14 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); - kfree(mlxsw_sp->port_to_module); + mlxsw_sp_cpu_port_remove(mlxsw_sp); kfree(mlxsw_sp->ports); } static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); - u8 module, width, lane; + struct mlxsw_sp_port_mapping *port_mapping; size_t alloc_size; int i; int err; @@ -3852,60 +4058,100 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->ports) return -ENOMEM; - mlxsw_sp->port_to_module = kmalloc_array(max_ports, sizeof(int), - GFP_KERNEL); - if (!mlxsw_sp->port_to_module) { - err = -ENOMEM; - goto err_port_to_module_alloc; - } + err = mlxsw_sp_cpu_port_create(mlxsw_sp); + if (err) + goto err_cpu_port_create; for (i = 1; i < max_ports; i++) { - /* Mark as invalid */ - mlxsw_sp->port_to_module[i] = -1; - - err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, - &width, &lane); - if (err) - goto err_port_module_info_get; - if (!width) + port_mapping = mlxsw_sp->port_mapping[i]; + if (!port_mapping) continue; - mlxsw_sp->port_to_module[i] = module; - err = mlxsw_sp_port_create(mlxsw_sp, i, false, - module, width, lane); + err = mlxsw_sp_port_create(mlxsw_sp, i, 0, port_mapping); if (err) goto err_port_create; } return 0; err_port_create: -err_port_module_info_get: for (i--; i >= 1; i--) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); - kfree(mlxsw_sp->port_to_module); -err_port_to_module_alloc: + mlxsw_sp_cpu_port_remove(mlxsw_sp); +err_cpu_port_create: kfree(mlxsw_sp->ports); return err; } -static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) +static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp) { - u8 offset = (local_port - 1) % MLXSW_SP_PORTS_PER_CLUSTER_MAX; + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_port_mapping port_mapping; + int i; + int err; + + mlxsw_sp->port_mapping = kcalloc(max_ports, + sizeof(struct mlxsw_sp_port_mapping *), + GFP_KERNEL); + if (!mlxsw_sp->port_mapping) + return -ENOMEM; + + for (i = 1; i < max_ports; i++) { + err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &port_mapping); + if (err) + goto err_port_module_info_get; + if (!port_mapping.width) + continue; + + mlxsw_sp->port_mapping[i] = kmemdup(&port_mapping, + sizeof(port_mapping), + GFP_KERNEL); + if (!mlxsw_sp->port_mapping[i]) { + err = -ENOMEM; + goto err_port_module_info_dup; + } + } + return 0; + +err_port_module_info_get: +err_port_module_info_dup: + for (i--; i >= 1; i--) + kfree(mlxsw_sp->port_mapping[i]); + kfree(mlxsw_sp->port_mapping); + return err; +} + +static void mlxsw_sp_port_module_info_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) + kfree(mlxsw_sp->port_mapping[i]); + kfree(mlxsw_sp->port_mapping); +} + +static u8 mlxsw_sp_cluster_base_port_get(u8 local_port, unsigned int max_width) +{ + u8 offset = (local_port - 1) % max_width; return local_port - offset; } -static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, - u8 module, unsigned int count, u8 offset) +static int +mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, + struct mlxsw_sp_port_mapping *port_mapping, + unsigned int count, u8 offset) { - u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + struct mlxsw_sp_port_mapping split_port_mapping; int err, i; + split_port_mapping = *port_mapping; + split_port_mapping.width /= count; for (i = 0; i < count; i++) { err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * offset, - true, module, width, i * width); + base_port, &split_port_mapping); if (err) goto err_port_create; + split_port_mapping.lane += split_port_mapping.width; } return 0; @@ -3918,45 +4164,55 @@ err_port_create: } static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, - u8 base_port, unsigned int count) + u8 base_port, + unsigned int count, u8 offset) { - u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH; + struct mlxsw_sp_port_mapping *port_mapping; int i; - /* Split by four means we need to re-create two ports, otherwise - * only one. - */ - count = count / 2; - - for (i = 0; i < count; i++) { - local_port = base_port + i * 2; - if (mlxsw_sp->port_to_module[local_port] < 0) + /* Go over original unsplit ports in the gap and recreate them. */ + for (i = 0; i < count * offset; i++) { + port_mapping = mlxsw_sp->port_mapping[base_port + i]; + if (!port_mapping) continue; - module = mlxsw_sp->port_to_module[local_port]; - - mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, - width, 0); + mlxsw_sp_port_create(mlxsw_sp, base_port + i, 0, port_mapping); } } +static int mlxsw_sp_local_ports_offset(struct mlxsw_core *mlxsw_core, + unsigned int count, + unsigned int max_width) +{ + enum mlxsw_res_id local_ports_in_x_res_id; + int split_width = max_width / count; + + if (split_width == 1) + local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_1X; + else if (split_width == 2) + local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_2X; + else if (split_width == 4) + local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_4X; + else + return -EINVAL; + + if (!mlxsw_core_res_valid(mlxsw_core, local_ports_in_x_res_id)) + return -EINVAL; + return mlxsw_core_res_get(mlxsw_core, local_ports_in_x_res_id); +} + static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u8 local_ports_in_1x, local_ports_in_2x, offset; + struct mlxsw_sp_port_mapping port_mapping; struct mlxsw_sp_port *mlxsw_sp_port; - u8 module, cur_width, base_port; + int max_width; + u8 base_port; + int offset; int i; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) || - !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X)) - return -EIO; - - local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X); - local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", @@ -3965,47 +4221,70 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, return -EINVAL; } - module = mlxsw_sp_port->mapping.module; - cur_width = mlxsw_sp_port->mapping.width; + /* Split ports cannot be split. */ + if (mlxsw_sp_port->split) { + netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); + NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further"); + return -EINVAL; + } + + max_width = mlxsw_core_module_max_width(mlxsw_core, + mlxsw_sp_port->mapping.module); + if (max_width < 0) { + netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module"); + return max_width; + } - if (count != 2 && count != 4) { - netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); - NL_SET_ERR_MSG_MOD(extack, "Port can only be split into 2 or 4 ports"); + /* Split port with non-max and 1 module width cannot be split. */ + if (mlxsw_sp_port->mapping.width != max_width || max_width == 1) { + netdev_err(mlxsw_sp_port->dev, "Port cannot be split\n"); + NL_SET_ERR_MSG_MOD(extack, "Port cannot be split"); return -EINVAL; } - if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { - netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); - NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further"); + if (count == 1 || !is_power_of_2(count) || count > max_width) { + netdev_err(mlxsw_sp_port->dev, "Invalid split count\n"); + NL_SET_ERR_MSG_MOD(extack, "Invalid split count"); return -EINVAL; } - /* Make sure we have enough slave (even) ports for the split. */ - if (count == 2) { - offset = local_ports_in_2x; - base_port = local_port; - if (mlxsw_sp->ports[base_port + local_ports_in_2x]) { - netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n"); - NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration"); - return -EINVAL; - } - } else { - offset = local_ports_in_1x; - base_port = mlxsw_sp_cluster_base_port_get(local_port); - if (mlxsw_sp->ports[base_port + 1] || - mlxsw_sp->ports[base_port + 3]) { + offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width); + if (offset < 0) { + netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset"); + return -EINVAL; + } + + /* Only in case max split is being done, the local port and + * base port may differ. + */ + base_port = count == max_width ? + mlxsw_sp_cluster_base_port_get(local_port, max_width) : + local_port; + + for (i = 0; i < count * offset; i++) { + /* Expect base port to exist and also the one in the middle in + * case of maximal split count. + */ + if (i == 0 || (count == max_width && i == count / 2)) + continue; + + if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) { netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n"); NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration"); return -EINVAL; } } + port_mapping = mlxsw_sp_port->mapping; + for (i = 0; i < count; i++) if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset)) mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset); - err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count, - offset); + err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, &port_mapping, + count, offset); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); goto err_port_split_create; @@ -4014,7 +4293,7 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, return 0; err_port_split_create: - mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset); return err; } @@ -4022,19 +4301,13 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u8 local_ports_in_1x, local_ports_in_2x, offset; struct mlxsw_sp_port *mlxsw_sp_port; - u8 cur_width, base_port; unsigned int count; + int max_width; + u8 base_port; + int offset; int i; - if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) || - !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X)) - return -EIO; - - local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X); - local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X); - mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n", @@ -4049,29 +4322,43 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port, return -EINVAL; } - cur_width = mlxsw_sp_port->mapping.width; - count = cur_width == 1 ? 4 : 2; + max_width = mlxsw_core_module_max_width(mlxsw_core, + mlxsw_sp_port->mapping.module); + if (max_width < 0) { + netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module"); + return max_width; + } - if (count == 2) - offset = local_ports_in_2x; - else - offset = local_ports_in_1x; + count = max_width / mlxsw_sp_port->mapping.width; - base_port = mlxsw_sp_cluster_base_port_get(local_port); + offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width); + if (WARN_ON(offset < 0)) { + netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset"); + return -EINVAL; + } - /* Determine which ports to remove. */ - if (count == 2 && local_port >= base_port + 2) - base_port = base_port + 2; + base_port = mlxsw_sp_port->split_base_local_port; for (i = 0; i < count; i++) if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset)) mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset); - mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset); return 0; } +static void +mlxsw_sp_port_down_wipe_counters(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++) + mlxsw_sp_port->periodic_hw_stats.xstats.backlog[i] = 0; +} + static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, char *pude_pl, void *priv) { @@ -4090,9 +4377,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, netdev_info(mlxsw_sp_port->dev, "link up\n"); netif_carrier_on(mlxsw_sp_port->dev); mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp_port->span.speed_update_dw, 0); } else { netdev_info(mlxsw_sp_port->dev, "link down\n"); netif_carrier_off(mlxsw_sp_port->dev); + mlxsw_sp_port_down_wipe_counters(mlxsw_sp_port); } } @@ -4260,8 +4549,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, false), /* L3 traps */ - MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false), MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, @@ -4288,14 +4575,18 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), - MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false), + MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_DIP, FORWARD, + ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -4304,7 +4595,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false), - MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), /* NVE traps */ @@ -4379,6 +4669,10 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) rate = 19 * 1024; burst_size = 12; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP: + rate = 360; + burst_size = 7; + break; default: continue; } @@ -4418,6 +4712,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP: priority = 5; tc = 5; break; @@ -4609,6 +4904,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, .shaper_work = mlxsw_sp1_ptp_shaper_work, .get_ts_info = mlxsw_sp1_ptp_get_ts_info, + .get_stats_count = mlxsw_sp1_get_stats_count, + .get_stats_strings = mlxsw_sp1_get_stats_strings, + .get_stats = mlxsw_sp1_get_stats, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { @@ -4622,13 +4920,44 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, .shaper_work = mlxsw_sp2_ptp_shaper_work, .get_ts_info = mlxsw_sp2_ptp_get_ts_info, + .get_stats_count = mlxsw_sp2_get_stats_count, + .get_stats_strings = mlxsw_sp2_get_stats_strings, + .get_stats = mlxsw_sp2_get_stats, }; +static u32 mlxsw_sp1_span_buffsize_get(int mtu, u32 speed) +{ + return mtu * 5 / 2; +} + +static const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = { + .buffsize_get = mlxsw_sp1_span_buffsize_get, +}; + +#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38 + +static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed) +{ + return 3 * mtu + MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR * speed / 1000; +} + +static const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = { + .buffsize_get = mlxsw_sp2_span_buffsize_get, +}; + +u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed) +{ + u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu); + + return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1; +} + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; @@ -4640,6 +4969,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, if (err) return err; + mlxsw_core_emad_string_tlv_enable(mlxsw_core); + err = mlxsw_sp_base_mac_get(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to get base mac\n"); @@ -4664,6 +4995,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_traps_init; } + err = mlxsw_sp_devlink_traps_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize devlink traps\n"); + goto err_devlink_traps_init; + } + err = mlxsw_sp_buffers_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n"); @@ -4715,7 +5052,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_router_init(mlxsw_sp); + err = mlxsw_sp_router_init(mlxsw_sp, extack); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); goto err_router_init; @@ -4748,7 +5085,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, * respin. */ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; - err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); goto err_netdev_notifier; @@ -4760,6 +5098,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_dpipe_init; } + err = mlxsw_sp_port_module_info_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init port module info\n"); + goto err_port_module_info_init; + } + err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -4769,9 +5113,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: + mlxsw_sp_port_module_info_fini(mlxsw_sp); +err_port_module_info_init: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); err_netdev_notifier: if (mlxsw_sp->clock) mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); @@ -4797,6 +5144,8 @@ err_span_init: err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: + mlxsw_sp_devlink_traps_fini(mlxsw_sp); +err_devlink_traps_init: mlxsw_sp_traps_fini(mlxsw_sp); err_traps_init: mlxsw_sp_fids_fini(mlxsw_sp); @@ -4806,7 +5155,8 @@ err_fids_init: } static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -4823,17 +5173,22 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp1_span_ops; mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + mlxsw_sp->req_rev = &mlxsw_sp2_fw_rev; + mlxsw_sp->fw_filename = MLXSW_SP2_FW_FILENAME; mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; @@ -4845,8 +5200,33 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; - return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + +static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) @@ -4854,8 +5234,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_port_module_info_fini(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->netdevice_nb); if (mlxsw_sp->clock) { mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); @@ -4869,6 +5251,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); + mlxsw_sp_devlink_traps_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_fids_fini(mlxsw_sp); mlxsw_sp_kvdl_fini(mlxsw_sp); @@ -5026,14 +5409,81 @@ static int mlxsw_sp1_resources_kvd_register(struct mlxsw_core *mlxsw_core) return 0; } +static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params kvd_size_params; + u32 kvd_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE)) + return -EIO; + + kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); + devlink_resource_size_params_init(&kvd_size_params, kvd_size, kvd_size, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD, + kvd_size, MLXSW_SP_RESOURCE_KVD, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &kvd_size_params); +} + +static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params span_size_params; + u32 max_span; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SPAN)) + return -EIO; + + max_span = MLXSW_CORE_RES_GET(mlxsw_core, MAX_SPAN); + devlink_resource_size_params_init(&span_size_params, max_span, max_span, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_SPAN, + max_span, MLXSW_SP_RESOURCE_SPAN, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &span_size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { - return mlxsw_sp1_resources_kvd_register(mlxsw_core); + int err; + + err = mlxsw_sp1_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + + return 0; + +err_resources_span_register: + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + return err; } static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) { + int err; + + err = mlxsw_sp2_resources_kvd_register(mlxsw_core); + if (err) + return err; + + err = mlxsw_sp_resources_span_register(mlxsw_core); + if (err) + goto err_resources_span_register; + return 0; + +err_resources_span_register: + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + return err; } static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, @@ -5230,6 +5680,10 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, @@ -5260,6 +5714,43 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp2_config_profile, + .res_query_enabled = true, +}; + +static struct mlxsw_driver mlxsw_sp3_driver = { + .kind = mlxsw_sp3_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp3_init, + .fini = mlxsw_sp_fini, + .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -6304,6 +6795,16 @@ static struct pci_driver mlxsw_sp2_pci_driver = { .id_table = mlxsw_sp2_pci_id_table, }; +static const struct pci_device_id mlxsw_sp3_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM3), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp3_pci_driver = { + .name = mlxsw_sp3_driver_name, + .id_table = mlxsw_sp3_pci_id_table, +}; + static int __init mlxsw_sp_module_init(void) { int err; @@ -6319,6 +6820,10 @@ static int __init mlxsw_sp_module_init(void) if (err) goto err_sp2_core_driver_register; + err = mlxsw_core_driver_register(&mlxsw_sp3_driver); + if (err) + goto err_sp3_core_driver_register; + err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver); if (err) goto err_sp1_pci_driver_register; @@ -6327,11 +6832,19 @@ static int __init mlxsw_sp_module_init(void) if (err) goto err_sp2_pci_driver_register; + err = mlxsw_pci_driver_register(&mlxsw_sp3_pci_driver); + if (err) + goto err_sp3_pci_driver_register; + return 0; +err_sp3_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); err_sp2_pci_driver_register: mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); err_sp1_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); +err_sp3_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp2_driver); err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); @@ -6343,8 +6856,10 @@ err_sp1_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { + mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver); mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); @@ -6359,4 +6874,6 @@ MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Spectrum driver"); MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table); MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table); MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME); +MODULE_FIRMWARE(MLXSW_SP2_FW_FILENAME); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6664119fb0c8..a0f1f9dceec5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -14,6 +14,7 @@ #include <linux/dcbnl.h> #include <linux/in6.h> #include <linux/notifier.h> +#include <linux/net_namespace.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> @@ -31,8 +32,6 @@ #define MLXSW_SP_MID_MAX 7000 -#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 - #define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */ #define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */ @@ -47,6 +46,8 @@ #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks" #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks" +#define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -55,6 +56,7 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP_RESOURCE_SPAN, }; struct mlxsw_sp_port; @@ -138,6 +140,13 @@ struct mlxsw_sp_sb_vals; struct mlxsw_sp_port_type_speed_ops; struct mlxsw_sp_ptp_state; struct mlxsw_sp_ptp_ops; +struct mlxsw_sp_span_ops; + +struct mlxsw_sp_port_mapping { + u8 module; + u8 width; + u8 lane; +}; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -146,7 +155,7 @@ struct mlxsw_sp { unsigned char base_mac[ETH_ALEN]; const unsigned char *mac_mask; struct mlxsw_sp_upper *lags; - int *port_to_module; + struct mlxsw_sp_port_mapping **port_mapping; struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; @@ -177,8 +186,10 @@ struct mlxsw_sp { const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; const struct mlxsw_sp_ptp_ops *ptp_ops; + const struct mlxsw_sp_span_ops *span_ops; const struct mlxsw_listener *listeners; size_t listeners_count; + u32 lowest_shaper_bs; }; static inline struct mlxsw_sp_upper * @@ -225,6 +236,16 @@ struct mlxsw_sp_port_xstats { u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; }; +struct mlxsw_sp_ptp_port_dir_stats { + u64 packets; + u64 timestamps; +}; + +struct mlxsw_sp_ptp_port_stats { + struct mlxsw_sp_ptp_port_dir_stats rx_gcd; + struct mlxsw_sp_ptp_port_dir_stats tx_gcd; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -245,11 +266,11 @@ struct mlxsw_sp_port { struct ieee_pfc *pfc; enum mlxsw_reg_qpts_trust_state trust_state; } dcb; - struct { - u8 module; - u8 width; - u8 lane; - } mapping; + struct mlxsw_sp_port_mapping mapping; /* mapping is constant during the + * mlxsw_sp_port lifetime, however + * the same localport can have + * different mapping. + */ /* TC handles */ struct list_head mall_tc_list; struct { @@ -271,7 +292,12 @@ struct mlxsw_sp_port { struct hwtstamp_config hwtstamp_config; u16 ing_types; u16 egr_types; + struct mlxsw_sp_ptp_port_stats stats; } ptp; + u8 split_base_local_port; + struct { + struct delayed_work speed_update_dw; + } span; }; struct mlxsw_sp_port_type_speed_ops { @@ -279,14 +305,14 @@ struct mlxsw_sp_port_type_speed_ops { u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode); + u8 width, unsigned long *mode); u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); - u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, + u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd); - u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 speed); + u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed); int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port, u32 *base_speed); @@ -451,6 +477,7 @@ extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u8 local_port, void *priv); +int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -461,7 +488,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, struct ieee_pfc *my_pfc); int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, - u8 next_index, u32 maxrate); + u8 next_index, u32 maxrate, u8 burst_size); enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state); int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u8 state); @@ -481,6 +508,7 @@ int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int *p_counter_index); void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); +u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed); bool mlxsw_sp_port_dev_check(const struct net_device *dev); struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); @@ -513,7 +541,8 @@ union mlxsw_sp_l3addr { struct in6_addr addr6; }; -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr); @@ -623,7 +652,8 @@ struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; - u8 action_created:1; + u8 action_created:1, + egress_bind_blocker:1; unsigned int counter_index; }; @@ -642,6 +672,7 @@ struct mlxsw_sp_acl_block { struct mlxsw_sp *mlxsw_sp; unsigned int rule_count; unsigned int disable_count; + unsigned int egress_blocker_rule_count; struct net *net; }; @@ -657,7 +688,8 @@ void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block); int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress); + bool ingress, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, @@ -828,6 +860,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p); +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); @@ -955,4 +991,22 @@ void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp); +/* spectrum_trap.c */ +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action); +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); + +static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_net(mlxsw_sp->core); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 84a87d059333..3d3cca596116 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <linux/mutex.h> #include <net/net_namespace.h> #include <net/tc_act/tc_vlan.h> @@ -25,6 +26,7 @@ struct mlxsw_sp_acl { struct mlxsw_sp_fid *dummy_fid; struct rhashtable ruleset_ht; struct list_head rules; + struct mutex rules_lock; /* Protects rules list */ struct { struct delayed_work dw; unsigned long interval; /* ms */ @@ -239,7 +241,8 @@ mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block, int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress) + bool ingress, + struct netlink_ext_ack *extack) { struct mlxsw_sp_acl_block_binding *binding; int err; @@ -247,6 +250,11 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST; + if (!ingress && block->egress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + binding = kzalloc(sizeof(*binding), GFP_KERNEL); if (!binding) return -ENOMEM; @@ -672,6 +680,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; int err; err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); @@ -689,14 +698,16 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, * one, to be directly bound to device. The rest of the * rulesets are bound by "Goto action set". */ - err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, - ruleset->ht_key.block); + err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block); if (err) goto err_ruleset_block_bind; } + mutex_lock(&mlxsw_sp->acl->rules_lock); list_add_tail(&rule->list, &mlxsw_sp->acl->rules); - ruleset->ht_key.block->rule_count++; + mutex_unlock(&mlxsw_sp->acl->rules_lock); + block->rule_count++; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; err_ruleset_block_bind: @@ -712,9 +723,13 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; + block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; ruleset->ht_key.block->rule_count--; + mutex_lock(&mlxsw_sp->acl->rules_lock); list_del(&rule->list); + mutex_unlock(&mlxsw_sp->acl->rules_lock); if (!ruleset->ht_key.chain_index && mlxsw_sp_acl_ruleset_is_singular(ruleset)) mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, @@ -774,19 +789,18 @@ static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl) struct mlxsw_sp_acl_rule *rule; int err; - /* Protect internal structures from changes */ - rtnl_lock(); + mutex_lock(&acl->rules_lock); list_for_each_entry(rule, &acl->rules, list) { err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, rule); if (err) goto err_rule_update; } - rtnl_unlock(); + mutex_unlock(&acl->rules_lock); return 0; err_rule_update: - rtnl_unlock(); + mutex_unlock(&acl->rules_lock); return err; } @@ -871,6 +885,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) acl->dummy_fid = fid; INIT_LIST_HEAD(&acl->rules); + mutex_init(&acl->rules_lock); err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam); if (err) goto err_acl_ops_init; @@ -883,6 +898,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) return 0; err_acl_ops_init: + mutex_destroy(&acl->rules_lock); mlxsw_sp_fid_put(fid); err_fid_get: rhashtable_destroy(&acl->ruleset_ht); @@ -899,6 +915,7 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam); + mutex_destroy(&acl->rules_lock); WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 888ba4300bcc..968f0902e4fe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -35,6 +35,7 @@ struct mlxsw_sp_sb_cm { }; #define MLXSW_SP_SB_INFI -1U +#define MLXSW_SP_SB_REST -2U struct mlxsw_sp_sb_pm { u32 min_buff; @@ -250,6 +251,10 @@ static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, true, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, @@ -273,6 +278,10 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false, 0, 0); @@ -413,19 +422,16 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) .freeze_size = _freeze_size, \ } -#define MLXSW_SP1_SB_PR_INGRESS_SIZE 12440000 -#define MLXSW_SP1_SB_PR_EGRESS_SIZE 13232000 #define MLXSW_SP1_SB_PR_CPU_SIZE (256 * 1000) /* Order according to mlxsw_sp1_sb_pool_dess */ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP1_SB_PR_INGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), - MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP1_SB_PR_EGRESS_SIZE, true, false), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST, + true, false), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), @@ -437,19 +443,16 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { MLXSW_SP1_SB_PR_CPU_SIZE, true, false), }; -#define MLXSW_SP2_SB_PR_INGRESS_SIZE 35297568 -#define MLXSW_SP2_SB_PR_EGRESS_SIZE 35297568 #define MLXSW_SP2_SB_PR_CPU_SIZE (256 * 1000) /* Order according to mlxsw_sp2_sb_pool_dess */ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP2_SB_PR_INGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), - MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP2_SB_PR_EGRESS_SIZE, true, false), + MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST, + true, false), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), @@ -463,11 +466,33 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_sb_pr *prs, + const struct mlxsw_sp_sb_pool_des *pool_dess, size_t prs_len) { + /* Round down, unlike mlxsw_sp_bytes_cells(). */ + u32 sb_cells = div_u64(mlxsw_sp->sb->sb_size, mlxsw_sp->sb->cell_size); + u32 rest_cells[2] = {sb_cells, sb_cells}; int i; int err; + /* Calculate how much space to give to the "REST" pools in either + * direction. + */ + for (i = 0; i < prs_len; i++) { + enum mlxsw_reg_sbxx_dir dir = pool_dess[i].dir; + u32 size = prs[i].size; + u32 size_cells; + + if (size == MLXSW_SP_SB_INFI || size == MLXSW_SP_SB_REST) + continue; + + size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); + if (WARN_ON_ONCE(size_cells > rest_cells[dir])) + continue; + + rest_cells[dir] -= size_cells; + } + for (i = 0; i < prs_len; i++) { u32 size = prs[i].size; u32 size_cells; @@ -475,6 +500,10 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, if (size == MLXSW_SP_SB_INFI) { err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, 0, true); + } else if (size == MLXSW_SP_SB_REST) { + size_cells = rest_cells[pool_dess[i].dir]; + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + size_cells, false); } else { size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, @@ -896,7 +925,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) return -EIO; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, GUARANTEED_SHARED_BUFFER)) return -EIO; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE)) @@ -907,7 +936,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_BUFFER_SIZE); + GUARANTEED_SHARED_BUFFER); max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_HEADROOM_SIZE); /* Round down, because this limit must not be overstepped. */ @@ -918,6 +947,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_sb_ports_init; err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs, + mlxsw_sp->sb_vals->pool_dess, mlxsw_sp->sb_vals->pool_count); if (err) goto err_sb_prs_init; @@ -1005,7 +1035,8 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, mode = (enum mlxsw_reg_sbpr_mode) threshold_type; pr = &mlxsw_sp->sb_vals->prs[pool_index]; - if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { + if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, + GUARANTEED_SHARED_BUFFER)) { NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size"); return -EINVAL; } @@ -1013,12 +1044,12 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, if (pr->freeze_mode && pr->mode != mode) { NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden"); return -EINVAL; - }; + } if (pr->freeze_size && pr->size != size) { NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden"); return -EINVAL; - }; + } return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, pool_size, false); @@ -1085,6 +1116,11 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's threshold is forbidden"); + return -EINVAL; + } + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff, extack); if (err) @@ -1130,6 +1166,11 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's binding is forbidden"); + return -EINVAL; + } + if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) { NL_SET_ERR_MSG_MOD(extack, "Binding egress TC to ingress pool and vice versa is forbidden"); return -EINVAL; @@ -1187,6 +1228,11 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; + if (local_port == MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + masked_count++; + continue; + } for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_INGRESS); @@ -1222,7 +1268,7 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, char *sbsr_pl; u8 masked_count; u8 local_port_1; - u8 local_port = 0; + u8 local_port; int i; int err; int err2; @@ -1231,8 +1277,8 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, if (!sbsr_pl) return -ENOMEM; + local_port = MLXSW_PORT_CPU_PORT; next_batch: - local_port++; local_port_1 = local_port; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); @@ -1243,7 +1289,11 @@ next_batch: for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, @@ -1264,8 +1314,10 @@ do_query: cb_priv); if (err) goto out; - if (local_port < mlxsw_core_max_ports(mlxsw_core)) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; goto next_batch; + } out: err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); @@ -1282,7 +1334,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, LIST_HEAD(bulk_list); char *sbsr_pl; unsigned int masked_count; - u8 local_port = 0; + u8 local_port; int i; int err; int err2; @@ -1291,8 +1343,8 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, if (!sbsr_pl) return -ENOMEM; + local_port = MLXSW_PORT_CPU_PORT; next_batch: - local_port++; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) @@ -1302,7 +1354,11 @@ next_batch: for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, @@ -1319,8 +1375,10 @@ do_query: &bulk_list, NULL, 0); if (err) goto out; - if (local_port < mlxsw_core_max_ports(mlxsw_core)) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; goto next_batch; + } out: err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 21296fa7f7fb..49a72a8f1f57 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, u8 weight = ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); if (err) { netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", @@ -198,7 +198,7 @@ err_port_ets_set: u8 weight = my_ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); } return err; @@ -369,6 +369,17 @@ err_update_qrwe: } static int +mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port, + u8 default_prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qpdp_pl[MLXSW_REG_QPDP_LEN]; + + mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl); +} + +static int mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port, struct dcb_ieee_app_dscp_map *map) { @@ -405,6 +416,12 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port) int err; default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port); + err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n"); + return err; + } + have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port, &prio_map); @@ -507,9 +524,9 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, - maxrate->tc_maxrate[i]); + maxrate->tc_maxrate[i], 0); if (err) { netdev_err(dev, "Failed to set maxrate for TC %d\n", i); goto err_port_ets_maxrate_set; @@ -523,8 +540,9 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, err_port_ets_maxrate_set: for (i--; i >= 0; i--) mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, - i, 0, my_maxrate->tc_maxrate[i]); + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, + my_maxrate->tc_maxrate[i], 0); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 49933818c6f5..2dc0978428e6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -215,7 +215,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -247,6 +247,7 @@ start_again: return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 202e9a246019..b607919c8ad0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -21,6 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; + int mirror_act_count = 0; int err, i; if (!flow_action_has_entries(flow_action)) @@ -78,6 +79,16 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid; u16 fid_index; + if (mlxsw_sp_acl_block_is_egress_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress"); + return -EOPNOTSUPP; + } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp); fid_index = mlxsw_sp_fid_index(fid); err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, @@ -95,6 +106,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, case FLOW_ACTION_MIRRED: { struct net_device *out_dev = act->dev; + if (mirror_act_count++) { + NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported"); + return -EOPNOTSUPP; + } + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, block, out_dev, extack); @@ -257,6 +273,12 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, flow_rule_match_tcp(rule, &match); + if (match.mask->flags & htons(0x0E00)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "TCP flags match not supported on reserved bits"); + dev_err(mlxsw_sp->bus_info->dev, "TCP flags match not supported on reserved bits\n"); + return -EINVAL; + } + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, ntohs(match.key->flags), ntohs(match.mask->flags)); @@ -390,6 +412,12 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); return -EOPNOTSUPP; } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + if (match.mask->vlan_id != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VID, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 6400cd644b7a..a8525992528f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -3,8 +3,10 @@ #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +#include <net/inet_ecn.h> #include "spectrum_ipip.h" +#include "reg.h" struct ip_tunnel_parm mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) @@ -338,3 +340,61 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, }; + +static int mlxsw_sp_ipip_ecn_encap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tieem_pl[MLXSW_REG_TIEEM_LEN]; + + mlxsw_reg_tieem_pack(tieem_pl, inner_ecn, outer_ecn); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tieem), tieem_pl); +} + +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + int err; + + err = mlxsw_sp_ipip_ecn_encap_init_one(mlxsw_sp, i, outer_ecn); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tidem_pl[MLXSW_REG_TIDEM_LEN]; + bool trap_en, set_ce = false; + u8 new_inner_ecn; + + trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; + + mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); +} + +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i, j, err; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + err = mlxsw_sp_ipip_ecn_decap_init_one(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 17f334b46c40..2153bcc4b585 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -870,7 +870,7 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_vni(fid, &vni))) goto out; - nve_dev = dev_get_by_index(&init_net, nve_ifindex); + nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!nve_dev) goto out; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 38bb1cfe4e8c..34f7c3501b08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -630,6 +630,8 @@ static void mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, struct mlxsw_sp1_ptp_unmatched *unmatched) { + struct mlxsw_sp_ptp_port_dir_stats *stats; + struct mlxsw_sp_port *mlxsw_sp_port; int err; /* If an unmatched entry has an SKB, it has to be handed over to the @@ -650,6 +652,17 @@ mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, /* The packet was matched with timestamp during the walk. */ goto out; + mlxsw_sp_port = ptp_state->mlxsw_sp->ports[unmatched->key.local_port]; + if (mlxsw_sp_port) { + stats = unmatched->key.ingress ? + &mlxsw_sp_port->ptp.stats.rx_gcd : + &mlxsw_sp_port->ptp.stats.tx_gcd; + if (unmatched->skb) + stats->packets++; + else + stats->timestamps++; + } + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While * the comment at that function states that it can only be called in * soft IRQ context, this pattern of local_bh_disable() + @@ -907,6 +920,7 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, egr_types = 0xff; break; case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: return -ERANGE; } @@ -1002,27 +1016,17 @@ mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port) { - const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_oper, speed; bool ptps = false; int err, i; + u32 speed; if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false); - port_type_speed_ops = mlxsw_sp->port_type_speed_ops; - port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, - mlxsw_sp_port->local_port, 0, - false); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed); if (err) return err; - port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL, - ð_proto_oper); - speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper); for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) { ptps = true; @@ -1098,3 +1102,57 @@ int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return 0; } + +struct mlxsw_sp_ptp_port_stat { + char str[ETH_GSTRING_LEN]; + ptrdiff_t offset; +}; + +#define MLXSW_SP_PTP_PORT_STAT(NAME, FIELD) \ + { \ + .str = NAME, \ + .offset = offsetof(struct mlxsw_sp_ptp_port_stats, \ + FIELD), \ + } + +static const struct mlxsw_sp_ptp_port_stat mlxsw_sp_ptp_port_stats[] = { + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_packets", rx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_timestamps", rx_gcd.timestamps), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_packets", tx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_timestamps", tx_gcd.timestamps), +}; + +#undef MLXSW_SP_PTP_PORT_STAT + +#define MLXSW_SP_PTP_PORT_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_ptp_port_stats) + +int mlxsw_sp1_get_stats_count(void) +{ + return MLXSW_SP_PTP_PORT_STATS_LEN; +} + +void mlxsw_sp1_get_stats_strings(u8 **p) +{ + int i; + + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + memcpy(*p, mlxsw_sp_ptp_port_stats[i].str, + ETH_GSTRING_LEN); + *p += ETH_GSTRING_LEN; + } +} + +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ + void *stats = &mlxsw_sp_port->ptp.stats; + ptrdiff_t offset; + int i; + + data += data_index; + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + offset = mlxsw_sp_ptp_port_stats[i].offset; + *data++ = *(u64 *)(stats + offset); + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 72e55f6926b9..8c386571afce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -59,6 +59,11 @@ void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct ethtool_ts_info *info); +int mlxsw_sp1_get_stats_count(void); +void mlxsw_sp1_get_stats_strings(u8 **p); +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); + #else static inline struct mlxsw_sp_ptp_clock * @@ -125,6 +130,19 @@ static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_ptp_get_ts_info_noptp(info); } +static inline int mlxsw_sp1_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp1_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} #endif static inline struct mlxsw_sp_ptp_clock * @@ -183,4 +201,18 @@ static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_ptp_get_ts_info_noptp(info); } +static inline int mlxsw_sp2_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp2_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp2_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index bdf53cf350f6..02526c53d4f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -18,6 +18,8 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, MLXSW_SP_QDISC_PRIO, + MLXSW_SP_QDISC_ETS, + MLXSW_SP_QDISC_TBF, }; struct mlxsw_sp_qdisc_ops { @@ -195,6 +197,20 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static u64 +mlxsw_sp_xstats_backlog(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->backlog[tclass_num] + + xstats->backlog[tclass_num + 8]; +} + +static u64 +mlxsw_sp_xstats_tail_drop(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->tail_drop[tclass_num] + + xstats->tail_drop[tclass_num + 8]; +} + static void mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, u8 prio_bitmap, u64 *tx_packets, @@ -212,6 +228,70 @@ mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, } } +static void +mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 *p_tx_bytes, u64 *p_tx_packets, + u64 *p_drops, u64 *p_backlog) +{ + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + struct mlxsw_sp_port_xstats *xstats; + u64 tx_bytes, tx_packets; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &tx_packets, &tx_bytes); + + *p_tx_packets += tx_packets; + *p_tx_bytes += tx_bytes; + *p_drops += xstats->wred_drop[tclass_num] + + mlxsw_sp_xstats_tail_drop(xstats, tclass_num); + *p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num); +} + +static void +mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 tx_bytes, u64 tx_packets, + u64 drops, u64 backlog, + struct tc_qopt_offload_stats *stats_ptr) +{ + struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base; + + tx_bytes -= stats_base->tx_bytes; + tx_packets -= stats_base->tx_packets; + drops -= stats_base->drops; + backlog -= stats_base->backlog; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog); + + stats_base->backlog += backlog; + stats_base->drops += drops; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; +} + +static void +mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -269,7 +349,7 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, &stats_base->tx_bytes); red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; - red_base->pdrop = xstats->tail_drop[tclass_num]; + red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); stats_base->overlimits = red_base->prob_drop + red_base->prob_mark; stats_base->drops = red_base->prob_drop + red_base->pdrop; @@ -305,7 +385,8 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, p->max); return -EINVAL; } - if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { + if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, + GUARANTEED_SHARED_BUFFER)) { dev_err(mlxsw_sp->bus_info->dev, "spectrum: RED: max value %u is too big\n", p->max); return -EINVAL; @@ -341,19 +422,28 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, } static void -mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) { - struct tc_red_qopt_offload_params *p = params; u64 backlog; backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; + qstats->backlog -= backlog; mlxsw_sp_qdisc->stats_base.backlog = 0; } +static void +mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_red_qopt_offload_params *p = params; + + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); +} + static int mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, @@ -369,7 +459,8 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; marks = xstats->ecn - xstats_base->prob_mark; - pdrops = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - + xstats_base->pdrop; res->pdrop += pdrops; res->prob_drop += early_drops; @@ -386,40 +477,21 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u64 tx_bytes, tx_packets, overlimits, drops, backlog; u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; + u64 overlimits; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats_base = &mlxsw_sp_qdisc->stats_base; - mlxsw_sp_qdisc_bstats_per_priority_get(xstats, - mlxsw_sp_qdisc->prio_bitmap, - &tx_packets, &tx_bytes); - tx_bytes = tx_bytes - stats_base->tx_bytes; - tx_packets = tx_packets - stats_base->tx_packets; - + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; - drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - - stats_base->drops; - backlog = xstats->backlog[tclass_num]; - _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); stats_ptr->qstats->overlimits += overlimits; - stats_ptr->qstats->drops += drops; - stats_ptr->qstats->backlog += - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - backlog) - - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - stats_base->backlog); - - stats_base->backlog = backlog; - stats_base->drops += drops; stats_base->overlimits += overlimits; - stats_base->tx_bytes += tx_bytes; - stats_base->tx_packets += tx_packets; + return 0; } @@ -469,15 +541,215 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +static void +mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + u64 backlog_cells = 0; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog_cells); + + mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets; + mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes; + mlxsw_sp_qdisc->stats_base.drops = drops; + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + static int -mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + mlxsw_sp_qdisc->tclass_num, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); +} + +static int +mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port, + u32 max_size, u8 *p_burst_size) +{ + /* TBF burst size is configured in bytes. The ASIC burst size value is + * ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units. + */ + u32 bs512 = max_size / 64; + u8 bs = fls(bs512); + + if (!bs) + return -EINVAL; + --bs; + + /* Demand a power of two. */ + if ((1 << bs) != bs512) + return -EINVAL; + + if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs || + bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS) + return -EINVAL; + + *p_burst_size = bs; + return 0; +} + +static u32 +mlxsw_sp_qdisc_tbf_max_size(u8 bs) +{ + return (1U << bs) * 64; +} + +static u64 +mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) +{ + /* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in + * Kbits/s. + */ + return div_u64(p->rate.rate_bytes_ps, 1000) * 8; +} + +static int +mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + u8 burst_size; + int err; + + if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) { + dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev, + "spectrum: TBF: rate of %lluKbps must be below %u\n", + rate_kbps, MLXSW_REG_QEEC_MAS_DIS); + return -EINVAL; + } + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (err) { + u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS; + + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u", + p->max_size, + mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs), + mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs)); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + u8 burst_size; + int err; + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (WARN_ON_ONCE(err)) + /* check_params above was supposed to reject this value. */ + return -EINVAL; + + /* Configure subgroup shaper, so that both UC and MC traffic is subject + * to shaping. That is unlike RED, however UC queue lengths are going to + * be different than MC ones due to different pool and quota + * configurations, so the configuration is not applicable. For shaper on + * the other hand, subjecting the overall stream to the configured + * shaper makes sense. Also note that that is what we do for + * ieee_setmaxrate(). + */ + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + mlxsw_sp_qdisc->tclass_num, 0, + rate_kbps, burst_size); +} + +static void +mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); +} + +static int +mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { + .type = MLXSW_SP_QDISC_TBF, + .check_params = mlxsw_sp_qdisc_tbf_check_params, + .replace = mlxsw_sp_qdisc_tbf_replace, + .unoffload = mlxsw_sp_qdisc_tbf_unoffload, + .destroy = mlxsw_sp_qdisc_tbf_destroy, + .get_stats = mlxsw_sp_qdisc_get_tbf_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, +}; + +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_TBF_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_tbf, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_TBF)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_TBF_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_TBF_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + default: + return -EOPNOTSUPP; + } +} + +static int +__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_port->tclass_qdiscs[i]); mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; @@ -487,36 +759,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct tc_prio_qopt_offload_params *p = params; + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} - if (p->bands > IEEE_8021QAZ_MAX_TCS) +static int +__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) +{ + if (nbands > IEEE_8021QAZ_MAX_TCS) return -EOPNOTSUPP; return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) { struct tc_prio_qopt_offload_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned int nbands, + const unsigned int *quanta, + const unsigned int *weights, + const u8 *priomap) +{ struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; int err; - for (band = 0; band < p->bands; band++) { + for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, !!quanta[band], + weights[band]); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (p->priomap[i] == band) { + if (priomap[i] == band) { child_qdisc->prio_bitmap |= BIT(i); if (BIT(i) & old_priomap) continue; @@ -539,21 +833,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, false, 0); } return 0; } +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + zeroes, zeroes, p->priomap); +} + +static void +__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; +} + static void mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; - u64 backlog; - backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); } static int @@ -561,37 +880,23 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u64 tx_bytes, tx_packets, drops = 0, backlog = 0; - struct mlxsw_sp_qdisc_stats *stats_base; - struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; + struct mlxsw_sp_qdisc *tc_qdisc; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; int i; - xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; - stats_base = &mlxsw_sp_qdisc->stats_base; - - tx_bytes = stats->tx_bytes - stats_base->tx_bytes; - tx_packets = stats->tx_packets - stats_base->tx_packets; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - drops += xstats->tail_drop[i]; - drops += xstats->wred_drop[i]; - backlog += xstats->backlog[i]; + tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i]; + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); } - drops = drops - stats_base->drops; - _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); - stats_ptr->qstats->drops += drops; - stats_ptr->qstats->backlog += - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - backlog) - - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - stats_base->backlog); - stats_base->backlog = backlog; - stats_base->drops += drops; - stats_base->tx_bytes += tx_bytes; - stats_base->tx_packets += tx_packets; + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); return 0; } @@ -613,7 +918,7 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base->drops = 0; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - stats_base->drops += xstats->tail_drop[i]; + stats_base->drops += mlxsw_sp_xstats_tail_drop(xstats, i); stats_base->drops += xstats->wred_drop[i]; } @@ -630,31 +935,104 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; -/* Grafting is not supported in mlxsw. It will result in un-offloading of the - * grafted qdisc as well as the qdisc in the qdisc new location. - * (However, if the graft is to the location where the qdisc is already at, it - * will be ignored completely and won't cause un-offloading). +static int +mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + p->quanta, p->weights, p->priomap); +} + +static void +mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { + .type = MLXSW_SP_QDISC_ETS, + .check_params = mlxsw_sp_qdisc_ets_check_params, + .replace = mlxsw_sp_qdisc_ets_replace, + .unoffload = mlxsw_sp_qdisc_ets_unoffload, + .destroy = mlxsw_sp_qdisc_ets_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting + * graph is free of cycles). These operations do not change the parent handle + * though, which means it can be incomplete (if there is more than one class + * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was + * linked to a different class and then removed from the original class). + * + * E.g. consider this sequence of operations: + * + * # tc qdisc add dev swp1 root handle 1: prio + * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000 + * RED: set bandwidth to 10Mbit + * # tc qdisc link dev swp1 handle 13: parent 1:2 + * + * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their + * child. But RED will still only claim that 1:3 is its parent. If it's removed + * from that band, its only parent will be 1:2, but it will continue to claim + * that it is in fact 1:3. + * + * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before + * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace + * notification to offload the child Qdisc, based on its parent handle, and use + * the graft operation to validate that the class where the child is actually + * grafted corresponds to the parent handle. If the two don't match, we + * unoffload the child. */ static int -mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct tc_prio_qopt_offload_graft_params *p) +__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) { - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - /* Check if the grafted qdisc is already in its "new" location. If so - - * nothing needs to be done. - */ - if (p->band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + if (band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) return 0; + if (!child_handle) { + /* This is an invisible FIFO replacing the original Qdisc. + * Ignore it--the original Qdisc's destroy will follow. + */ + return 0; + } + /* See if the grafted qdisc is already offloaded on any tclass. If so, * unoffload it. */ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, - p->child_handle); + child_handle); if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); @@ -663,6 +1041,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->band, p->child_handle); +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -696,6 +1083,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_ETS_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_ets, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_ETS)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_ETS_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_ETS_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_ETS_GRAFT: + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e618be7ce6c6..4a77b511ead2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -16,6 +16,7 @@ #include <linux/if_macvlan.h> #include <linux/refcount.h> #include <linux/jhash.h> +#include <linux/net_namespace.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -76,6 +77,8 @@ struct mlxsw_sp_router { struct notifier_block inet6addr_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; + u32 adj_discard_index; + bool adj_discard_index_valid; }; struct mlxsw_sp_rif { @@ -366,6 +369,7 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE, + MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE, /* This is a special case of local delivery, where a packet should be * decapsulated on reception. Note that there is no corresponding ENCAP, @@ -378,9 +382,10 @@ enum mlxsw_sp_fib_entry_type { }; struct mlxsw_sp_nexthop_group; +struct mlxsw_sp_fib_entry; struct mlxsw_sp_fib_node { - struct list_head entry_list; + struct mlxsw_sp_fib_entry *fib_entry; struct list_head list; struct rhash_head ht_node; struct mlxsw_sp_fib *fib; @@ -393,7 +398,6 @@ struct mlxsw_sp_fib_entry_decap { }; struct mlxsw_sp_fib_entry { - struct list_head list; struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; @@ -994,7 +998,7 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) if (d) return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; else - return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN; + return RT_TABLE_MAIN; } static struct mlxsw_sp_rif * @@ -1158,7 +1162,6 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const union mlxsw_sp_l3addr *addr, enum mlxsw_sp_fib_entry_type type) { - struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_node *fib_node; unsigned char addr_prefix_len; struct mlxsw_sp_fib *fib; @@ -1187,15 +1190,10 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, addr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) - return NULL; - - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != type) + if (!fib_node || fib_node->fib_entry->type != type) return NULL; - return fib_entry; + return fib_node->fib_entry; } /* Given an IPIP entry, find the corresponding decap route. */ @@ -1205,7 +1203,6 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, { static struct mlxsw_sp_fib_node *fib_node; const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_fib_entry *fib_entry; unsigned char saddr_prefix_len; union mlxsw_sp_l3addr saddr; struct mlxsw_sp_fib *ul_fib; @@ -1240,15 +1237,11 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, saddr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) - return NULL; - - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + if (!fib_node || + fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) return NULL; - return fib_entry; + return fib_node->fib_entry; } static struct mlxsw_sp_ipip_entry * @@ -1598,27 +1591,10 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - enum mlxsw_sp_l3proto ul_proto; - union mlxsw_sp_l3addr saddr; - u32 ul_tb_id; if (!ipip_entry) return 0; - /* For flat configuration cases, moving overlay to a different VRF might - * cause local address conflict, and the conflicting tunnels need to be - * demoted. - */ - ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); - ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; - saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); - if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, - saddr, ul_tb_id, - ipip_entry)) { - mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); - return 0; - } - return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, false, false, extack); } @@ -1627,8 +1603,25 @@ static int mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, struct net_device *ul_dev, + bool *demote_this, struct netlink_ext_ack *extack) { + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + /* Moving underlay to a different VRF might cause local address + * conflict, and the conflicting tunnels need to be demoted. + */ + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + *demote_this = true; + return 0; + } + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, true, false, extack); } @@ -1779,6 +1772,7 @@ static int __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, struct net_device *ul_dev, + bool *demote_this, unsigned long event, struct netdev_notifier_info *info) { @@ -1793,6 +1787,7 @@ __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp, ipip_entry, ul_dev, + demote_this, extack); break; @@ -1819,13 +1814,31 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, ul_dev, ipip_entry))) { + struct mlxsw_sp_ipip_entry *prev; + bool demote_this = false; + err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry, - ul_dev, event, info); + ul_dev, &demote_this, + event, info); if (err) { mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, ul_dev); return err; } + + if (demote_this) { + if (list_is_first(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list)) + prev = NULL; + else + /* This can't be cached from previous iteration, + * because that entry could be gone now. + */ + prev = list_prev_entry(ipip_entry, + ipip_list_node); + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + ipip_entry = prev; + } } return 0; @@ -2551,14 +2564,14 @@ static int mlxsw_sp_router_schedule_work(struct net *net, struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_router *router; - if (!net_eq(net, &init_net)) + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); if (!net_work) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; mlxsw_core_schedule_work(&net_work->work); @@ -2943,7 +2956,7 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) val = nh_grp->count; for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - val ^= nh->ifindex; + val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); } return jhash(&val, sizeof(val), seed); default: @@ -2961,7 +2974,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev; - val ^= dev->ifindex; + val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed); } return jhash(&val, sizeof(val), seed); @@ -3207,10 +3220,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry); - static int mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -3219,9 +3228,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) return err; @@ -3229,24 +3235,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err); - -static void -mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) -{ - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; - struct mlxsw_sp_fib_entry *fib_entry; - - list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - } -} - static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) { /* Valid sizes for an adjacency group are: @@ -3350,6 +3338,73 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) } } +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6); + +static void +mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + /* Unfortunately, in IPv6 the route and the nexthop are described by + * the same struct, so we need to iterate over all the routes using the + * nexthop group and set / clear the offload indication for them. + */ + list_for_each_entry(fib6_entry, &nh_grp->fib_list, + common.nexthop_group_node) + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); +} + +static void +mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + switch (mlxsw_sp_nexthop_group_type(nh_grp)) { + case AF_INET: + mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case AF_INET6: + mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); + break; + } +} + static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -3423,6 +3478,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + if (!old_adj_index_valid) { /* The trap was set for fib entries, so we have to call * fib entry update to unset it and use adjacency index. @@ -3444,9 +3501,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } - /* Offload state within the group changed, so update the flags. */ - mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); - return; set_trap: @@ -3459,6 +3513,7 @@ set_trap: err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); if (err) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); if (old_adj_index_valid) mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, nh_grp->ecmp_size, nh_grp->adj_index); @@ -3821,7 +3876,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); - if (WARN_ON_ONCE(!nh)) + if (!nh) return; switch (event) { @@ -4041,131 +4096,128 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, } static void -mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; - - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { - nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - return; - } - - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; + bool should_offload; - if (nh->offloaded) - nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - else - nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.tos = fib4_entry->tos; + fri.type = fib4_entry->type; + fri.offload = should_offload; + fri.trap = !should_offload; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } static void -mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; - - if (!list_is_singular(&nh_grp->fib_list)) - return; - - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; - nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.tos = fib4_entry->tos; + fri.type = fib4_entry->type; + fri.offload = false; + fri.trap = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } static void -mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + bool should_offload; + + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { - list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - return; - } - - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; - struct mlxsw_sp_nexthop *nh; - - nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); - if (nh && nh->offloaded) - fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; - else - fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload, + !should_offload); } static void -mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct fib6_info *rt = mlxsw_sp_rt6->rt; - - rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false); } -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static void +mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_set(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_set(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_unset(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_unset(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err) +mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (op) { - case MLXSW_REG_RALUE_OP_WRITE_DELETE: - return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: - if (err) - return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry)) - mlxsw_sp_fib_entry_offload_set(fib_entry); - else - mlxsw_sp_fib_entry_offload_unset(fib_entry); - return; + mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + case MLXSW_REG_RALUE_OP_WRITE_DELETE: + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; default: - return; + break; } } @@ -4195,15 +4247,50 @@ mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, } } +static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index) +{ + enum mlxsw_reg_ratr_trap_action trap_action; + char ratr_pl[MLXSW_REG_RATR_LEN]; + int err; + + if (mlxsw_sp->router->adj_discard_index_valid) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &mlxsw_sp->router->adj_discard_index); + if (err) + return err; + + trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS; + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, + MLXSW_REG_RATR_TYPE_ETHERNET, + mlxsw_sp->router->adj_discard_index, rif_index); + mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); + if (err) + goto err_ratr_write; + + mlxsw_sp->router->adj_discard_index_valid = true; + + return 0; + +err_ratr_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_discard_index); + return err; +} + static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; u16 ecmp_size = 0; + int err; /* In case the nexthop group adjacency index is valid, use it * with provided ECMP size. Otherwise, setup trap and pass @@ -4213,6 +4300,15 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = fib_entry->nh_group->adj_index; ecmp_size = fib_entry->nh_group->ecmp_size; + } else if (!nh_group->adj_index_valid && nh_group->count && + nh_group->nh_rif) { + err = mlxsw_sp_adj_discard_write(mlxsw_sp, + nh_group->nh_rif->rif_index); + if (err) + return err; + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = mlxsw_sp->router->adj_discard_index; + ecmp_size = 1; } else { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; @@ -4273,6 +4369,23 @@ static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, } static int +mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u16 trap_id; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -4313,6 +4426,9 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, + op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); @@ -4328,7 +4444,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, { int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + if (err) + return err; + + mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op); return err; } @@ -4390,7 +4509,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, * can do so with a lower priority than packets directed * at the host, so use action type local instead of trap. */ - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; return 0; case RTN_UNICAST: if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) @@ -4403,6 +4522,19 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, } } +static void +mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + break; + default: + break; + } +} + static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, @@ -4435,6 +4567,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return fib4_entry; err_nexthop4_group_get: + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry); err_fib4_entry_type_set: kfree(fib4_entry); return ERR_PTR(err); @@ -4444,6 +4577,7 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common); kfree(fib4_entry); } @@ -4467,15 +4601,14 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id == fen_info->tb_id && - fib4_entry->tos == fen_info->tos && - fib4_entry->type == fen_info->type && - mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == - fen_info->fi) { - return fib4_entry; - } - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == + fen_info->fi) + return fib4_entry; return NULL; } @@ -4523,7 +4656,6 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, if (!fib_node) return NULL; - INIT_LIST_HEAD(&fib_node->entry_list); list_add(&fib_node->list, &fib->node_list); memcpy(fib_node->key.addr, addr, addr_len); fib_node->key.prefix_len = prefix_len; @@ -4534,18 +4666,9 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) { list_del(&fib_node->list); - WARN_ON(!list_empty(&fib_node->entry_list)); kfree(fib_node); } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry) -{ - return list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list) == fib_entry; -} - static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -4685,200 +4808,65 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_vr *vr = fib_node->fib->vr; - if (!list_empty(&fib_node->entry_list)) + if (fib_node->fib_entry) return; mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); mlxsw_sp_vr_put(mlxsw_sp, vr); } -static struct mlxsw_sp_fib4_entry * -mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib4_entry *fib4_entry; - - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id > new4_entry->tb_id) - continue; - if (fib4_entry->tb_id != new4_entry->tb_id) - break; - if (fib4_entry->tos > new4_entry->tos) - continue; - if (fib4_entry->prio >= new4_entry->prio || - fib4_entry->tos < new4_entry->tos) - return fib4_entry; - } - - return NULL; -} - -static int -mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, - struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib_node *fib_node; - - if (WARN_ON(!fib4_entry)) - return -EINVAL; - - fib_node = fib4_entry->common.fib_node; - list_for_each_entry_from(fib4_entry, &fib_node->entry_list, - common.list) { - if (fib4_entry->tb_id != new4_entry->tb_id || - fib4_entry->tos != new4_entry->tos || - fib4_entry->prio != new4_entry->prio) - break; - } - - list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); - return 0; -} - -static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, - bool replace, bool append) -{ - struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *fib4_entry; - - fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); - - if (append) - return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); - if (replace && WARN_ON(!fib4_entry)) - return -EINVAL; - - /* Insert new entry before replaced one, so that we can later - * remove the second. - */ - if (fib4_entry) { - list_add_tail(&new4_entry->common.list, - &fib4_entry->common.list); - } else { - struct mlxsw_sp_fib4_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - if (new4_entry->tb_id > last->tb_id) - break; - fib4_entry = last; - } - - if (fib4_entry) - list_add(&new4_entry->common.list, - &fib4_entry->common.list); - else - list_add(&new4_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) -{ - list_del(&fib4_entry->common.list); -} - -static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) -{ - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return 0; - - /* To prevent packet loss, overwrite the previously offloaded - * entry. - */ - if (!list_is_singular(&fib_node->entry_list)) { - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - - mlxsw_sp_fib_entry_offload_refresh(n, op, 0); - } - - return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); -} - -static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, +static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return; - - /* Promote the next entry by overwriting the deleted entry */ - if (!list_is_singular(&fib_node->entry_list)) { - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - - mlxsw_sp_fib_entry_update(mlxsw_sp, n); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - return; - } - - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); -} - -static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace, bool append) -{ int err; - err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); - if (err) - return err; + fib_node->fib_entry = fib_entry; - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; return 0; -err_fib_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib4_entry); +err_fib_entry_update: + fib_node->fib_entry = NULL; return err; } static void -mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry) +mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib4_node_list_remove(fib4_entry); + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) - mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + fib_node->fib_entry = NULL; } -static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace) +static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) { struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *replaced; + struct mlxsw_sp_fib4_entry *fib4_replaced; - if (!replace) - return; + if (!fib_node->fib_entry) + return true; - /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib4_entry, common.list); + fib4_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == RT_TABLE_MAIN && + fib4_replaced->tb_id == RT_TABLE_LOCAL) + return false; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return true; } static int -mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - bool replace, bool append) +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; int err; @@ -4901,18 +4889,32 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, - append); + if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) { + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); - goto err_fib4_node_entry_link; + goto err_fib_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry, + common); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced); return 0; -err_fib4_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); @@ -4929,11 +4931,11 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, return; fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib4_entry)) + if (!fib4_entry) return; fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } @@ -4991,17 +4993,13 @@ static void mlxsw_sp_rt6_release(struct fib6_info *rt) static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) -{ - /* RTF_CACHE routes are ignored */ - return !(rt->fib6_flags & RTF_ADDRCONF) && - rt->fib6_nh->fib_nh_gw_family; -} - static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { @@ -5009,37 +5007,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) list)->rt; } -static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry; - - if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) - return NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same - * virtual router. - */ - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (rt->fib6_metric < nrt->fib6_metric) - continue; - if (rt->fib6_metric == nrt->fib6_metric && - mlxsw_sp_fib6_rt_can_mp(rt)) - return fib6_entry; - if (rt->fib6_metric > nrt->fib6_metric) - break; - } - - return NULL; -} - static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, const struct fib6_info *rt) @@ -5225,6 +5192,11 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, &nh_grp->fib_list); fib6_entry->common.nh_group = nh_grp; + /* The route and the nexthop are described by the same struct, so we + * need to the update the nexthop offload indication for the new route. + */ + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + return 0; } @@ -5257,16 +5229,16 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; if (list_empty(&old_nh_grp->fib_list)) mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); return 0; -err_fib_node_entry_add: +err_fib_entry_update: mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); err_nexthop6_group_get: list_add_tail(&fib6_entry->common.nexthop_group_node, @@ -5350,7 +5322,7 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, else if (rt->fib6_type == RTN_BLACKHOLE) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else @@ -5431,112 +5403,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (replace && rt->fib6_metric == nrt->fib6_metric) { - if (mlxsw_sp_fib6_rt_can_mp(rt) == - mlxsw_sp_fib6_rt_can_mp(nrt)) - return fib6_entry; - if (mlxsw_sp_fib6_rt_can_mp(nrt)) - fallback = fallback ?: fib6_entry; - } - if (rt->fib6_metric > nrt->fib6_metric) - return fallback ?: fib6_entry; - } - - return fallback; -} - -static int -mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool *p_replace) -{ - struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); - struct mlxsw_sp_fib6_entry *fib6_entry; - - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace); - - if (*p_replace && !fib6_entry) - *p_replace = false; - - if (fib6_entry) { - list_add_tail(&new6_entry->common.list, - &fib6_entry->common.list); - } else { - struct mlxsw_sp_fib6_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); - - if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id) - break; - fib6_entry = last; - } - - if (fib6_entry) - list_add(&new6_entry->common.list, - &fib6_entry->common.list); - else - list_add(&new6_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) -{ - list_del(&fib6_entry->common.list); -} - -static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool *p_replace) -{ - int err; - - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); - if (err) - return err; - - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); - if (err) - goto err_fib_node_entry_add; - - return 0; - -err_fib_node_entry_add: - mlxsw_sp_fib6_node_list_remove(fib6_entry); - return err; -} - -static void -mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) -{ - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); - mlxsw_sp_fib6_node_list_remove(fib6_entry); -} - -static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; + struct fib6_info *cmp_rt; struct mlxsw_sp_vr *vr; vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); @@ -5550,40 +5423,44 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id && - rt->fib6_metric == iter_rt->fib6_metric && - mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) - return fib6_entry; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id && + rt->fib6_metric == cmp_rt->fib6_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; return NULL; } -static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) +static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; - struct mlxsw_sp_fib6_entry *replaced; + struct mlxsw_sp_fib6_entry *fib6_replaced; + struct fib6_info *rt, *rt_replaced; - if (!replace) - return; + if (!fib_node->fib_entry) + return true; - replaced = list_next_entry(fib6_entry, common.list); + fib6_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, + common); + rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced); + if (rt->fib6_table->tb6_id == RT_TABLE_MAIN && + rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL) + return false; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return true; } -static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6, bool replace) +static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { - struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; struct fib6_info *rt = rt_arr[0]; int err; @@ -5605,18 +5482,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(fib_node)) return PTR_ERR(fib_node); - /* Before creating a new entry, try to append route to an existing - * multipath entry. - */ - fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); - if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, - rt_arr, nrt6); - if (err) - goto err_fib6_entry_nexthop_add; - return 0; - } - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, nrt6); if (IS_ERR(fib6_entry)) { @@ -5624,17 +5489,76 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); + if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) { + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib6_node_entry_link; + goto err_fib_node_entry_link; - mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry, + common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced); return 0; -err_fib6_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); err_fib6_entry_create: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (rt->fib6_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + if (WARN_ON_ONCE(!fib_node->fib_entry)) { + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return -EINVAL; + } + + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + if (err) + goto err_fib6_entry_nexthop_add; + + return 0; + err_fib6_entry_nexthop_add: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; @@ -5654,8 +5578,13 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_fib6_rt_should_ignore(rt)) return; + /* Multipath routes are first added to the FIB trie and only then + * notified. If we vetoed the addition, we will get a delete + * notification for a route we do not have. Therefore, do not warn if + * route was not found. + */ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); - if (WARN_ON(!fib6_entry)) + if (!fib6_entry) return; /* If not all the nexthops are deleted, then only reduce the nexthop @@ -5669,7 +5598,7 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, fib_node = fib6_entry->common.fib_node; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } @@ -5823,39 +5752,25 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - - list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib4_entry *fib4_entry; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - /* Break when entry list is empty and node was freed. - * Otherwise, we'll access freed memory in the next - * iteration. - */ - if (do_break) - break; - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; - - list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib6_entry *fib6_entry; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - if (do_break) - break; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, @@ -5908,6 +5823,16 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } + + /* After flushing all the routes, it is not possible anyone is still + * using the adjacency index that is discarding packets, so free it in + * case it was allocated. + */ + if (!mlxsw_sp->router->adj_discard_index_valid) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_discard_index); + mlxsw_sp->router->adj_discard_index_valid = false; } static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) @@ -5996,7 +5921,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace, append; int err; /* Protect internal structures from changes */ @@ -6004,13 +5928,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - append = fib_work->event == FIB_EVENT_ENTRY_APPEND; - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, - replace, append); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); @@ -6019,12 +5939,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, @@ -6041,20 +5955,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace; int err; rtnl_lock(); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6, - replace); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + break; + case FIB_EVENT_ENTRY_APPEND: + err = mlxsw_sp_router_fib6_append(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); @@ -6065,12 +5983,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) fib_work->fib6_work.nrt6); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6112,12 +6024,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) &fib_work->ven_info); dev_put(fib_work->ven_info.dev); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; } rtnl_unlock(); kfree(fib_work); @@ -6131,8 +6037,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -6160,7 +6064,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); @@ -6213,7 +6117,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, rule = fr_info->rule; /* Rule only affects locally generated traffic */ - if (rule->iifindex == info->net->loopback_dev->ifindex) + if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex) return 0; switch (info->family) { @@ -6250,8 +6154,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; int err; - if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6 && + if ((info->family != AF_INET && info->family != AF_INET6 && info->family != RTNL_FAMILY_IPMR && info->family != RTNL_FAMILY_IP6MR)) return NOTIFY_DONE; @@ -6263,12 +6166,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, case FIB_EVENT_RULE_DEL: err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); - if (!err || info->extack) - return notifier_from_errno(err); - break; - case FIB_EVENT_ENTRY_ADD: + return notifier_from_errno(err); + case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: if (router->aborted) { NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); return notifier_from_errno(-EINVAL); @@ -6997,6 +6898,9 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { rif = mlxsw_sp->router->rifs[i]; + if (rif && rif->ops && + rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB) + continue; if (rif && rif->dev && rif->dev != dev && !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, mlxsw_sp->mac_mask)) { @@ -7940,8 +7844,18 @@ mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) { + int err; + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + + err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp); + if (err) + return err; + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); } @@ -7974,9 +7888,10 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); } -static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy; mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); @@ -7991,9 +7906,9 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); } -static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl) { - bool only_l3 = !ip6_multipath_hash_policy(&init_net); + bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp)); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); @@ -8021,8 +7936,8 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); mlxsw_reg_recr2_pack(recr2_pl, seed); - mlxsw_sp_mp4_hash_init(recr2_pl); - mlxsw_sp_mp6_hash_init(recr2_pl); + mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl); + mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); } @@ -8053,7 +7968,8 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { - bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority; + struct net *net = mlxsw_sp_net(mlxsw_sp); + bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; int err; @@ -8079,7 +7995,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; int err; @@ -8155,8 +8072,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) goto err_dscp_init; mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - err = register_fib_notifier(&mlxsw_sp->router->fib_nb, - mlxsw_sp_router_fib_dump_flush); + err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb, + mlxsw_sp_router_fib_dump_flush, extack); if (err) goto err_register_fib_notifier; @@ -8195,7 +8113,8 @@ err_register_inetaddr_notifier: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index cc1de91e8217..c9b94f435cdd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -104,4 +104,7 @@ static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, return !memcmp(addr1, addr2, sizeof(*addr1)); } +int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); + #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 560a60e522f9..0cdd7954a085 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -14,8 +14,23 @@ #include "spectrum_span.h" #include "spectrum_switchdev.h" +static u64 mlxsw_sp_span_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + u64 occ = 0; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (mlxsw_sp->span.entries[i].ref_count) + occ++; + } + + return occ; +} + int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) @@ -36,13 +51,19 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) curr->id = i; } + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN, + mlxsw_sp_span_occ_get, mlxsw_sp); + return 0; } void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) { + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; + devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN); + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; @@ -727,33 +748,50 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) return false; } -static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, - int mtu) +static int +mlxsw_sp_span_port_buffsize_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { - return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + u32 buffsize; + u32 speed; + int err; + + err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed); + if (err) + return err; + if (speed == SPEED_UNKNOWN) + speed = 0; + + buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu); + mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); } int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) { - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - int err; - /* If port is egress mirrored, the shared buffer size should be * updated according to the mtu value */ - if (mlxsw_sp_span_is_egress_mirror(port)) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); + if (mlxsw_sp_span_is_egress_mirror(port)) + return mlxsw_sp_span_port_buffsize_update(port, mtu); + return 0; +} - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); - return err; - } - } +void mlxsw_sp_span_speed_update_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_port *mlxsw_sp_port; - return 0; + mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port, + span.speed_update_dw); + + /* If port is egress mirrored, the shared buffer size should be + * updated according to the speed value. + */ + if (mlxsw_sp_span_is_egress_mirror(mlxsw_sp_port)) + mlxsw_sp_span_port_buffsize_update(mlxsw_sp_port, + mlxsw_sp_port->dev->mtu); } static struct mlxsw_sp_span_inspected_port * @@ -815,15 +853,9 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, /* if it is an egress SPAN, bind a shared buffer to it */ if (type == MLXSW_SP_SPAN_EGRESS) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, - port->dev->mtu); - - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); + err = mlxsw_sp_span_port_buffsize_update(port, port->dev->mtu); + if (err) return err; - } } if (bind) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index 5e04252f2a11..59724335525f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -74,5 +74,6 @@ void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_span_entry *span_entry); int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); +void mlxsw_sp_span_speed_update_work(struct work_struct *work); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5ecb45118400..a3af171c6358 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2591,7 +2591,7 @@ __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, if (err) return err; - dev = __dev_get_by_index(&init_net, nve_ifindex); + dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!dev) return -EINVAL; *nve_dev = dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c new file mode 100644 index 000000000000..60205aa3f6a5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <net/devlink.h> +#include <uapi/linux/devlink.h> + +#include "core.h" +#include "reg.h" +#include "spectrum.h" + +/* All driver-specific traps must be documented in + * Documentation/networking/devlink/mlxsw.rst + */ +enum { + DEVLINK_MLXSW_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, + DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, +}; + +#define DEVLINK_MLXSW_TRAP_NAME_IRIF_DISABLED \ + "irif_disabled" +#define DEVLINK_MLXSW_TRAP_NAME_ERIF_DISABLED \ + "erif_disabled" + +#define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + +static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, + void *priv); +static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx); + +#define MLXSW_SP_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id, \ + DEVLINK_MLXSW_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ + MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT, \ + false, SP_##_group_id, DISCARD) + +#define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action) \ + MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id, \ + _action, false, SP_##_group_id, DISCARD) + +static struct devlink_trap mlxsw_sp_traps_arr[] = { + MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), + MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS), + MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS), + MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS), + MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS), + MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS), + MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS), + MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS), + MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, L3_DROPS), + MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS), + MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS), + MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS), + MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), + MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), + MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), +}; + +static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { + MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS), + MLXSW_SP_RXL_EXCEPTION(MTUERROR, ROUTER_EXP, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(TTLERROR, ROUTER_EXP, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RPF, RPF, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, REMOTE_ROUTE, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, HOST_MISS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, HOST_MISS, TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, REMOTE_ROUTE, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, ROUTER_EXP, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), + MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, ROUTER_EXP, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), +}; + +/* Mapping between hardware trap and devlink trap. Multiple hardware traps can + * be mapped to the same devlink trap. Order is according to + * 'mlxsw_sp_listeners_arr'. + */ +static u16 mlxsw_sp_listener_devlink_map[] = { + DEVLINK_TRAP_GENERIC_ID_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER, + DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, + DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, + DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER, + DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE, + DEVLINK_TRAP_GENERIC_ID_NON_IP_PACKET, + DEVLINK_TRAP_GENERIC_ID_UC_DIP_MC_DMAC, + DEVLINK_TRAP_GENERIC_ID_DIP_LB, + DEVLINK_TRAP_GENERIC_ID_SIP_MC, + DEVLINK_TRAP_GENERIC_ID_SIP_LB, + DEVLINK_TRAP_GENERIC_ID_CORRUPTED_IP_HDR, + DEVLINK_TRAP_GENERIC_ID_IPV4_SIP_BC, + DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_RESERVED_SCOPE, + DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + DEVLINK_TRAP_GENERIC_ID_MTU_ERROR, + DEVLINK_TRAP_GENERIC_ID_TTL_ERROR, + DEVLINK_TRAP_GENERIC_ID_RPF, + DEVLINK_TRAP_GENERIC_ID_REJECT_ROUTE, + DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, + DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, + DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, + DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS, + DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS, + DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, + DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, + DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, + DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, +}; + +static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + kfree_skb(skb); + return -EINVAL; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + + return 0; +} + +static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); + consume_skb(skb); +} + +static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + skb_push(skb, ETH_HLEN); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); + skb_pull(skb, ETH_HLEN); + skb->offload_fwd_mark = 1; + netif_receive_skb(skb); +} + +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) != + ARRAY_SIZE(mlxsw_sp_listeners_arr))) + return -EINVAL; + + return devlink_traps_register(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr), + mlxsw_sp); +} + +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr)); +} + +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + struct mlxsw_listener *listener; + int err; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx); + if (err) + return err; + } + + return 0; +} + +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + struct mlxsw_listener *listener; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx); + } +} + +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + enum mlxsw_reg_hpkt_action hw_action; + struct mlxsw_listener *listener; + int err; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + switch (action) { + case DEVLINK_TRAP_ACTION_DROP: + hw_action = MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT; + break; + case DEVLINK_TRAP_ACTION_TRAP: + hw_action = MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU; + break; + default: + return -EINVAL; + } + + err = mlxsw_core_trap_action_set(mlxsw_core, listener, + hw_action); + if (err) + return err; + } + + return 0; +} + +#define MLXSW_SP_DISCARD_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) + +static int +mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp, + const struct devlink_trap_group *group) +{ + enum mlxsw_reg_qpcr_ir_units ir_units; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u16 policer_id; + u8 burst_size; + bool is_bytes; + u32 rate; + + switch (group->id) { + case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: /* fall through */ + case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: /* fall through */ + case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + ir_units = MLXSW_REG_QPCR_IR_UNITS_M; + is_bytes = false; + rate = 10 * 1024; /* 10Kpps */ + burst_size = 7; + break; + default: + return -EINVAL; + } + + mlxsw_reg_qpcr_pack(qpcr_pl, policer_id, ir_units, is_bytes, rate, + burst_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +static int +__mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, + const struct devlink_trap_group *group) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + u8 priority, tc, group_id; + u16 policer_id; + + switch (group->id) { + case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS; + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS; + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS; + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + default: + return -EINVAL; + } + + mlxsw_reg_htgt_pack(htgt_pl, group_id, policer_id, priority, tc); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); +} + +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int err; + + err = mlxsw_sp_trap_group_policer_init(mlxsw_sp, group); + if (err) + return err; + + err = __mlxsw_sp_trap_group_init(mlxsw_sp, group); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 0d9356b3f65d..4ff1e623aa76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -446,7 +446,8 @@ static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index bdab96f5bc70..f0e98ec8f1ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -299,22 +299,17 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; - if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { - struct sk_buff *skb_orig = skb; - - skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); - if (!skb) { - this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb_orig); - return NETDEV_TX_OK; - } - dev_consume_skb_any(skb_orig); - } mlxsw_sx_txhdr_construct(skb, &tx_info); /* TX header is consumed by HW on the way so we shouldn't count its * bytes as being sent. @@ -637,12 +632,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { .speed = 50000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | @@ -998,6 +987,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, if (!dev) return -ENOMEM; SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev); + dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core)); mlxsw_sx_port = netdev_priv(dev); mlxsw_sx_port->dev = dev; mlxsw_sx_port->mlxsw_sx = mlxsw_sx; @@ -1569,7 +1559,8 @@ static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) } static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core, - const struct mlxsw_bus_info *mlxsw_bus_info) + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) { struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 19202bdb5105..12e1fa998d42 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -49,6 +49,7 @@ enum { MLXSW_TRAP_ID_IPV6_DHCP = 0x69, MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, + MLXSW_TRAP_ID_RTR_INGRESS1 = 0x71, MLXSW_TRAP_ID_IPV6_PIM = 0x79, MLXSW_TRAP_ID_IPV6_VRRP = 0x7A, MLXSW_TRAP_ID_IPV4_BGP = 0x88, @@ -66,6 +67,36 @@ enum { MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, + MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, + MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, + MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VTAG_ALLOW = 0x148, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VLAN = 0x149, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_STP = 0x14A, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_UC = 0x150, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_MC_NULL = 0x151, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_LB = 0x152, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_NON_IP_PACKET = 0x160, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_CLASS_E = 0x164, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_MC_DMAC = 0x168, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_DIP = 0x169, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, + MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, + MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, + MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C, + MLXSW_TRAP_ID_DISCARD_DEC_PKT = 0x188, + MLXSW_TRAP_ID_DISCARD_OVERLAY_SMAC_MC = 0x190, + MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0, + MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE = 0x1B1, MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, |