diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 14:45:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 14:45:08 -0700 |
commit | aae3dbb4776e7916b6cd442d00159bea27a695c1 (patch) | |
tree | d074c5d783a81e7e2e084b1eba77f57459da7e37 /drivers/net/ethernet/mellanox | |
parent | ec3604c7a5aae8953545b0d05495357009a960e5 (diff) | |
parent | 66bed8465a808400eb14562510e26c8818082cb8 (diff) | |
download | blackbird-op-linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.gz blackbird-op-linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Support ipv6 checksum offload in sunvnet driver, from Shannon
Nelson.
2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric
Dumazet.
3) Allow generic XDP to work on virtual devices, from John Fastabend.
4) Add bpf device maps and XDP_REDIRECT, which can be used to build
arbitrary switching frameworks using XDP. From John Fastabend.
5) Remove UFO offloads from the tree, gave us little other than bugs.
6) Remove the IPSEC flow cache, from Florian Westphal.
7) Support ipv6 route offload in mlxsw driver.
8) Support VF representors in bnxt_en, from Sathya Perla.
9) Add support for forward error correction modes to ethtool, from
Vidya Sagar Ravipati.
10) Add time filter for packet scheduler action dumping, from Jamal Hadi
Salim.
11) Extend the zerocopy sendmsg() used by virtio and tap to regular
sockets via MSG_ZEROCOPY. From Willem de Bruijn.
12) Significantly rework value tracking in the BPF verifier, from Edward
Cree.
13) Add new jump instructions to eBPF, from Daniel Borkmann.
14) Rework rtnetlink plumbing so that operations can be run without
taking the RTNL semaphore. From Florian Westphal.
15) Support XDP in tap driver, from Jason Wang.
16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal.
17) Add Huawei hinic ethernet driver.
18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan
Delalande.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits)
i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq
i40e: avoid NVM acquire deadlock during NVM update
drivers: net: xgene: Remove return statement from void function
drivers: net: xgene: Configure tx/rx delay for ACPI
drivers: net: xgene: Read tx/rx delay for ACPI
rocker: fix kcalloc parameter order
rds: Fix non-atomic operation on shared flag variable
net: sched: don't use GFP_KERNEL under spin lock
vhost_net: correctly check tx avail during rx busy polling
net: mdio-mux: add mdio_mux parameter to mdio_mux_init()
rxrpc: Make service connection lookup always check for retry
net: stmmac: Delete dead code for MDIO registration
gianfar: Fix Tx flow control deactivation
cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6
cxgb4: Fix pause frame count in t4_get_port_stats
cxgb4: fix memory leak
tun: rename generic_xdp to skb_xdp
tun: reserve extra headroom only when XDP is set
net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping
net: dsa: bcm_sf2: Advertise number of egress queues
...
Diffstat (limited to 'drivers/net/ethernet/mellanox')
81 files changed, 7030 insertions, 1754 deletions
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig index 84a200764111..872548cd9431 100644 --- a/drivers/net/ethernet/mellanox/Kconfig +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -5,9 +5,10 @@ config NET_VENDOR_MELLANOX bool "Mellanox devices" default y - depends on PCI + depends on PCI || I2C ---help--- - If you have a network (Ethernet) card belonging to this class, say Y. + If you have a network (Ethernet or RDMA) device belonging to this + class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b651c1210555..6dabd983e7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -186,7 +186,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, bitmap->effective_len = bitmap->avail; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * - sizeof (long), GFP_KERNEL); + sizeof(long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78b89ceb4f46..6a9086dc1e92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1958,19 +1958,19 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) int i; int err; int num_vfs; - u16 availible_vpp; + u16 available_vpp; u8 vpp_param[MLX4_NUM_UP]; struct mlx4_qos_manager *port_qos; struct mlx4_priv *priv = mlx4_priv(dev); - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos = &priv->mfunc.master.qos_ctl[port]; - num_vfs = (availible_vpp / + num_vfs = (available_vpp / bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP)); for (i = 0; i < MLX4_NUM_UP; i++) { @@ -1985,14 +1985,14 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) } /* Query actual allocated VPP, just to make sure */ - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos->num_of_qos_vfs = num_vfs; - mlx4_dbg(dev, "Port %d Availible VPPs %d\n", port, availible_vpp); + mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i, @@ -2637,7 +2637,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) int err = 0; priv->cmd.context = kmalloc(priv->cmd.max_cmds * - sizeof (struct mlx4_cmd_context), + sizeof(struct mlx4_cmd_context), GFP_KERNEL); if (!priv->cmd.context) return -ENOMEM; @@ -2695,7 +2695,7 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; - mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); if (!mailbox) return ERR_PTR(-ENOMEM); @@ -2891,7 +2891,7 @@ static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port, memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP); if (slave > port_qos->num_of_qos_vfs) { - mlx4_info(dev, "No availible VPP resources for this VF\n"); + mlx4_info(dev, "No available VPP resources for this VF\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index f849eec21824..1e487acb4667 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -209,12 +209,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) cq->moder_cnt, cq->moder_time); } -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, &priv->mdev->uar_lock); - - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 2b0cbca4beb5..686e18de9a97 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev, mutex_unlock(&priv->mdev->state_lock); } -static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) +static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; @@ -176,8 +176,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; } - - return 0; } static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) @@ -309,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) } /* Build device profile according to supplied module parameters */ - if (mlx4_en_get_profile(mdev)) { - mlx4_err(mdev, "Bad module parameters, aborting\n"); - goto err_mr; - } + mlx4_en_get_profile(mdev); /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e3e6d9fa69fd..9c218f1cfc6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -130,19 +130,20 @@ out: return err; } -static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { - if (tc->type != TC_SETUP_MQPRIO) - return -EINVAL; + struct tc_mqprio_qopt *mqprio = type_data; + + if (type != TC_SETUP_MQPRIO) + return -EOPNOTSUPP; - if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) + if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) return -EINVAL; - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc); + return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL @@ -732,6 +733,21 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); } +static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv, + unsigned char new_mac[ETH_ALEN + 2]) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN)) + return; + + err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac); + if (err) + en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n", + new_mac, priv->port, err); +} + static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv, unsigned char new_mac[ETH_ALEN + 2]) { @@ -766,8 +782,12 @@ static int mlx4_en_set_mac(struct net_device *dev, void *addr) mutex_lock(&mdev->state_lock); memcpy(new_mac, saddr->sa_data, ETH_ALEN); err = mlx4_en_do_set_mac(priv, new_mac); - if (!err) - memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + if (err) + goto out; + + memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + mlx4_en_update_user_mac(priv, new_mac); +out: mutex_unlock(&mdev->state_lock); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 86d2d42d658d..5a47f9669621 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); context->pd = cpu_to_be32(mdev->priv_pdn); context->mtu_msgmax = 0xff; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ec24c4057be6..b97a55c827eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1056,7 +1056,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, } qp->event = mlx4_en_sqp_event; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index a81db2582555..8a32a8f7f9c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -644,7 +644,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; - int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; + int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl); unsigned int hlen = skb_headlen(skb); if (skb->len <= spc) { diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 07406cf2eacd..6f57c052053e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -259,7 +259,7 @@ int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) if (!s_slave->active) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; @@ -276,7 +276,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; @@ -295,7 +295,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; eqe.subtype = port_subtype_change; @@ -432,7 +432,7 @@ int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) { struct mlx4_eqe eqe; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; @@ -726,7 +726,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } memcpy(&priv->mfunc.master.comm_arm_bit_vector, eqe->event.comm_channel_arm.bit_vec, - sizeof eqe->event.comm_channel_arm.bit_vec); + sizeof(eqe->event.comm_channel_arm.bit_vec)); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.comm_work); break; @@ -984,15 +984,15 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, */ npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE; - eq->page_list = kmalloc(npages * sizeof *eq->page_list, - GFP_KERNEL); + eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list), + GFP_KERNEL); if (!eq->page_list) goto err_out; for (i = 0; i < npages; ++i) eq->page_list[i].buf = NULL; - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL); if (!dma_list) goto err_out_free; @@ -1161,7 +1161,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, - sizeof *priv->eq_table.eq, GFP_KERNEL); + sizeof(*priv->eq_table.eq), GFP_KERNEL); if (!priv->eq_table.eq) return -ENOMEM; @@ -1180,7 +1180,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) int i; priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev), - sizeof *priv->eq_table.uar_map, + sizeof(*priv->eq_table.uar_map), GFP_KERNEL); if (!priv->eq_table.uar_map) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 041c0ed65929..16c09949afd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -36,6 +36,7 @@ #include <linux/mlx4/cmd.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/kernel.h> #include "fw.h" #include "icm.h" @@ -57,7 +58,7 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); do { \ void *__p = (char *) (source) + (offset); \ u64 val; \ - switch (sizeof (dest)) { \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ @@ -162,6 +163,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [35] = "Diag counters per port", [36] = "QinQ VST mode support", [37] = "sl to vl mapping table change event support", + [38] = "user MAC support", }; int i; @@ -679,22 +681,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); - func_cap->qp0_qkey = qkey; + func_cap->spec_qps.qp0_qkey = qkey; } else { - func_cap->qp0_qkey = 0; + func_cap->spec_qps.qp0_qkey = 0; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); - func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); - func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); - func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); - func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF; if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) MLX4_GET(func_cap->phys_port_id, outbox, @@ -778,6 +780,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET 0x5C #define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 @@ -949,6 +952,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET); + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; @@ -1534,7 +1540,7 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); - virt += 1 << lg; + virt += 1ULL << lg; } pages[nent * 2 + 1] = @@ -2450,14 +2456,14 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev, csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index b52ba01aa486..cd6399c76bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -144,11 +144,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 qp0_qkey; - u32 qp0_tunnel_qpn; - u32 qp0_proxy_qpn; - u32 qp1_tunnel_qpn; - u32 qp1_proxy_qpn; + struct mlx4_spec_qps spec_qps; u32 reserved_lkey; u8 physical_port; u8 flags0; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c index 8f2fde0487c4..3a09d7122d3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -65,7 +65,7 @@ struct mlx4_set_port_scheduler_context { /* Granular Qos (per VF) section */ struct mlx4_alloc_vpp_param { - __be32 availible_vpp; + __be32 available_vpp; __be32 vpp_p_up[MLX4_NUM_UP]; }; @@ -157,7 +157,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up) + u16 *available_vpp, u8 *vpp_p_up) { int i; int err; @@ -179,7 +179,7 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, goto out; /* Total number of supported VPPs */ - *availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp); + *available_vpp = (u16)be32_to_cpu(out_param->available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index ac1f331878e6..582997577a04 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -84,23 +84,23 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); /** - * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation. - * Before distribution of VPPs to priorities, only availible_vpp is returned. + * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation. + * Before distribution of VPPs to priorities, only available_vpp is returned. * After initialization it returns the distribution of VPPs among priorities. * * @dev: mlx4_dev. * @port: Physical port number. - * @availible_vpp: Pointer to variable where number of availible VPPs is stored + * @available_vpp: Pointer to variable where number of available VPPs is stored * @vpp_p_up: Distribution of VPPs to priorities is stored in this array * * Returns 0 on success or a negative mlx4_core errno code. **/ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up); + u16 *available_vpp, u8 *vpp_p_up); /** * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities. * The total number of VPPs assigned to all for a port must not exceed - * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get. + * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get. * VPP allocation is allowed only after the port type has been set, * and while no QPs are open for this port. * diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 5a7816e7c7b4..a822f7a56bc5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -400,7 +400,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL); + table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); if (!table->icm) return -ENOMEM; table->virt = virt; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index dee67fa39107..c9169a490557 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -39,8 +39,8 @@ #include <linux/mutex.h> #define MLX4_ICM_CHUNK_LEN \ - ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ - (sizeof (struct scatterlist))) + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) enum { MLX4_ICM_PAGE_SHIFT = 12, diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index e00f627331cb..2edcce98ab2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -53,7 +53,7 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) { struct mlx4_device_context *dev_ctx; - dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL); + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a594bfd9e095..e61c99ef741d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -34,6 +34,7 @@ */ #include <linux/module.h> +#include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/pci.h> @@ -121,7 +122,7 @@ static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION "\n"; -static struct mlx4_profile default_profile = { +static const struct mlx4_profile default_profile = { .num_qp = 1 << 18, .num_srq = 1 << 16, .rdmarc_per_qp = 1 << 4, @@ -131,7 +132,7 @@ static struct mlx4_profile default_profile = { .num_mtt = 1 << 20, /* It is really num mtt segements */ }; -static struct mlx4_profile low_mem_profile = { +static const struct mlx4_profile low_mem_profile = { .num_qp = 1 << 17, .num_srq = 1 << 6, .rdmarc_per_qp = 1 << 4, @@ -819,38 +820,93 @@ static void slave_adjust_steering_mode(struct mlx4_dev *dev, mlx4_steering_mode_str(dev->caps.steering_mode)); } +static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev) +{ + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; +} + +static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev) +{ + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_caps *caps = &dev->caps; + int i, err = 0; + + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL); + + if (!func_cap || !caps->spec_qps) { + mlx4_err(dev, "Failed to allocate memory for special qps cap\n"); + err = -ENOMEM; + goto err_mem; + } + + for (i = 1; i <= caps->num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + caps->spec_qps[i - 1] = func_cap->spec_qps; + caps->port_mask[i] = caps->port_type[i]; + caps->phys_port_id[i] = func_cap->phys_port_id; + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &caps->gid_table_len[i], + &caps->pkey_table_len[i]); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + } + +err_mem: + if (err) + mlx4_slave_destroy_special_qp_cap(dev); + kfree(func_cap); + return err; +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; - struct mlx4_dev_cap dev_cap; - struct mlx4_func_cap func_cap; - struct mlx4_init_hca_param hca_param; - u8 i; + struct mlx4_dev_cap *dev_cap = NULL; + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_init_hca_param *hca_param = NULL; + + hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL); + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + if (!hca_param || !func_cap || !dev_cap) { + mlx4_err(dev, "Failed to allocate memory for slave_cap\n"); + err = -ENOMEM; + goto free_mem; + } - memset(&hca_param, 0, sizeof(hca_param)); - err = mlx4_QUERY_HCA(dev, &hca_param); + err = mlx4_QUERY_HCA(dev, hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting\n"); - return err; + goto free_mem; } /* fail if the hca has an unknown global capability * at this time global_caps should be always zeroed */ - if (hca_param.global_caps) { + if (hca_param->global_caps) { mlx4_err(dev, "Unknown hca global capabilities\n"); - return -EINVAL; + err = -EINVAL; + goto free_mem; } - dev->caps.hca_core_clock = hca_param.hca_core_clock; + dev->caps.hca_core_clock = hca_param->hca_core_clock; - memset(&dev_cap, 0, sizeof(dev_cap)); - dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; - err = mlx4_dev_cap(dev, &dev_cap); + dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp; + err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - return err; + goto free_mem; } err = mlx4_QUERY_FW(dev); @@ -862,21 +918,23 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", page_size, PAGE_SIZE); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set uar_page_shift for VF */ - dev->uar_page_shift = hca_param.uar_page_sz + 12; + dev->uar_page_shift = hca_param->uar_page_sz + 12; /* Make sure the master uar page size is valid */ if (dev->uar_page_shift > PAGE_SHIFT) { mlx4_err(dev, "Invalid configuration: uar page size is larger than system page size\n"); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set reserved_uars based on the uar_page_shift */ - mlx4_set_num_reserved_uars(dev, &dev_cap); + mlx4_set_num_reserved_uars(dev, dev_cap); /* Although uar page size in FW differs from system page size, * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) @@ -884,34 +942,35 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) */ dev->caps.uar_page_size = PAGE_SIZE; - memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n", err); - return err; + goto free_mem; } - if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != + if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", - func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); - return -EINVAL; - } - - dev->caps.num_ports = func_cap.num_ports; - dev->quotas.qp = func_cap.qp_quota; - dev->quotas.srq = func_cap.srq_quota; - dev->quotas.cq = func_cap.cq_quota; - dev->quotas.mpt = func_cap.mpt_quota; - dev->quotas.mtt = func_cap.mtt_quota; - dev->caps.num_qps = 1 << hca_param.log_num_qps; - dev->caps.num_srqs = 1 << hca_param.log_num_srqs; - dev->caps.num_cqs = 1 << hca_param.log_num_cqs; - dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.reserved_lkey = func_cap.reserved_lkey; + func_cap->pf_context_behaviour, + PF_CONTEXT_BEHAVIOUR_MASK); + err = -EINVAL; + goto free_mem; + } + + dev->caps.num_ports = func_cap->num_ports; + dev->quotas.qp = func_cap->qp_quota; + dev->quotas.srq = func_cap->srq_quota; + dev->quotas.cq = func_cap->cq_quota; + dev->quotas.mpt = func_cap->mpt_quota; + dev->quotas.mtt = func_cap->mtt_quota; + dev->caps.num_qps = 1 << hca_param->log_num_qps; + dev->caps.num_srqs = 1 << hca_param->log_num_srqs; + dev->caps.num_cqs = 1 << hca_param->log_num_cqs; + dev->caps.num_mpts = 1 << hca_param->log_mpt_sz; + dev->caps.num_eqs = func_cap->max_eq; + dev->caps.reserved_eqs = func_cap->reserved_eq; + dev->caps.reserved_lkey = func_cap->reserved_lkey; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -919,43 +978,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", dev->caps.num_ports, MLX4_MAX_PORTS); - return -ENODEV; + err = -ENODEV; + goto free_mem; } mlx4_replace_zero_macs(dev); - dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || - !dev->caps.qp0_qkey) { - err = -ENOMEM; - goto err_mem; - } - - for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); - if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", - i, err); - goto err_mem; - } - dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; - dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; - dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; - dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; - dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; - dev->caps.port_mask[i] = dev->caps.port_type[i]; - dev->caps.phys_port_id[i] = func_cap.phys_port_id; - err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, - &dev->caps.gid_table_len[i], - &dev->caps.pkey_table_len[i]); - if (err) - goto err_mem; + err = mlx4_slave_special_qp_cap(dev); + if (err) { + mlx4_err(dev, "Set special QP caps failed. aborting\n"); + goto free_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -970,7 +1002,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { @@ -978,20 +1010,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { - dev->caps.eqe_size = hca_param.eqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { + dev->caps.eqe_size = hca_param->eqe_size; dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { - dev->caps.cqe_size = hca_param.cqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { + dev->caps.cqe_size = hca_param->cqe_size; /* User still need to know when CQE > 32B */ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } @@ -999,31 +1031,27 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); - slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN; + mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n"); + + slave_adjust_steering_mode(dev, dev_cap, hca_param); mlx4_dbg(dev, "RSS support for IP fragments is %s\n", - hca_param.rss_ip_frags ? "on" : "off"); + hca_param->rss_ip_frags ? "on" : "off"); - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && dev->caps.bf_reg_size) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; - return 0; - err_mem: - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_qkey = NULL; - dev->caps.qp0_tunnel = NULL; - dev->caps.qp0_proxy = NULL; - dev->caps.qp1_tunnel = NULL; - dev->caps.qp1_proxy = NULL; - + if (err) + mlx4_slave_destroy_special_qp_cap(dev); +free_mem: + kfree(hca_param); + kfree(func_cap); + kfree(dev_cap); return err; } @@ -2399,7 +2427,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; } priv->eq_table.inta_pin = adapter.inta_pin; - memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); + memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); return 0; @@ -2407,13 +2435,8 @@ unmap_bf: unmap_internal_clock(dev); unmap_bf_area(dev); - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: if (mlx4_is_slave(dev)) @@ -2870,7 +2893,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) dev->caps.num_eqs - dev->caps.reserved_eqs, MAX_MSIX); - entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); + entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL); if (!entries) goto no_msi; @@ -3597,13 +3620,8 @@ err_master_mfunc: mlx4_multi_func_cleanup(dev); } - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: mlx4_close_hca(dev); @@ -3659,7 +3677,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, * per port, we must limit the number of VFs to 63 (since their are * 128 MACs) */ - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc; + for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc; total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) { nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i]; if (nvfs[i] < 0) { @@ -3668,7 +3686,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, goto err_disable_pdev; } } - for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc; + for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc; i++) { prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i]; if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { @@ -3747,11 +3765,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, if (total_vfs) { unsigned vfs_offset = 0; - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && + for (i = 0; i < ARRAY_SIZE(nvfs) && vfs_offset + nvfs[i] < extended_func_num(pdev); vfs_offset += nvfs[i], i++) ; - if (i == sizeof(nvfs)/sizeof(nvfs[0])) { + if (i == ARRAY_SIZE(nvfs)) { err = -ENODEV; goto err_release_regions; } @@ -3783,7 +3801,6 @@ err_release_regions: err_disable_pdev: mlx4_pci_disable_device(&priv->dev); - pci_set_drvdata(pdev, NULL); return err; } @@ -3944,11 +3961,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); + mlx4_slave_destroy_special_qp_cap(dev); kfree(dev->dev_vfs); mlx4_clean_dev(dev); @@ -3998,7 +4011,6 @@ static void mlx4_remove_one(struct pci_dev *pdev) devlink_unregister(devlink); kfree(dev->persist); devlink_free(devlink); - pci_set_drvdata(pdev, NULL); } static int restore_current_port_types(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 0710b3677464..4c5306dbcf11 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -162,7 +162,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; - new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) return -ENOMEM; @@ -175,7 +175,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; @@ -274,7 +274,7 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, } /* add the qp as a duplicate on this index */ - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; @@ -443,7 +443,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, goto out_mutex; } - pqp = kmalloc(sizeof *pqp, GFP_KERNEL); + pqp = kmalloc(sizeof(*pqp), GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; @@ -514,7 +514,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { @@ -1144,7 +1144,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], index += dev->caps.num_mgms; new_entry = 1; - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); memcpy(mgm->gid, gid, 16); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 852d00a5b016..c68da1986e51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -807,6 +807,8 @@ struct mlx4_set_port_general_context { u8 phv_en; u8 reserved6[5]; __be16 user_mtu; + u16 reserved7; + u8 user_mac[6]; }; struct mlx4_set_port_rqp_calc_context { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d350b2158104..fdb3ad0cbe54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -685,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 24282cd017d3..c7c0764991c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,9 +106,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), GFP_KERNEL); - buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, + buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; @@ -703,13 +703,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, return -ENOMEM; dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); return 0; } @@ -1052,7 +1052,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return -EINVAL; /* All MTTs must fit in the same page */ - if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) + if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE) return -EINVAL; fmr->page_shift = page_shift; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e36e287d605..3ef3406ff4cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) #define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG2_V_USER_MAC_MASK BIT(6) #define MLX4_FLAG_V_MTU_MASK BIT(0) #define MLX4_FLAG_V_PPRX_MASK BIT(1) #define MLX4_FLAG_V_PPTX_MASK BIT(2) @@ -1700,6 +1701,30 @@ int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) } EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK; + memcpy(context->user_mac, user_mac, sizeof(context->user_mac)); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mac); + int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 5e5b4475b85e..728a2fb1f5c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -174,7 +174,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn)); *(__be32 *) mailbox->buf = cpu_to_be32(optpar); - memcpy(mailbox->buf + 8, context, sizeof *context); + memcpy(mailbox->buf + 8, context, sizeof(*context)); ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = cpu_to_be32(qp->qpn); @@ -845,24 +845,21 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, * since the PF does not call mlx4_slave_caps */ - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + dev->caps.spec_qps = kcalloc(dev->caps.num_ports, + sizeof(*dev->caps.spec_qps), + GFP_KERNEL); + if (!dev->caps.spec_qps) { err = -ENOMEM; goto err_mem; } for (k = 0; k < dev->caps.num_ports; k++) { - dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + k; - dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; - dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; - dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX; } } @@ -874,12 +871,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) return err; err_mem: - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; mlx4_cleanup_qp_zones(dev); return err; } @@ -908,7 +901,7 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) - memcpy(context, mailbox->buf + 8, sizeof *context); + memcpy(context, mailbox->buf + 8, sizeof(*context)); mlx4_free_cmd_mailbox(dev, mailbox); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 215e21c3dc8a..fabb53379727 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1040,7 +1040,7 @@ static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1058,7 +1058,7 @@ static struct res_common *alloc_mtt_tr(int id, int order) { struct res_mtt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1074,7 +1074,7 @@ static struct res_common *alloc_mpt_tr(int id, int key) { struct res_mpt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1089,7 +1089,7 @@ static struct res_common *alloc_eq_tr(int id) { struct res_eq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1103,7 +1103,7 @@ static struct res_common *alloc_cq_tr(int id) { struct res_cq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1118,7 +1118,7 @@ static struct res_common *alloc_srq_tr(int id) { struct res_srq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1133,7 +1133,7 @@ static struct res_common *alloc_counter_tr(int id, int port) { struct res_counter *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1148,7 +1148,7 @@ static struct res_common *alloc_xrcdn_tr(int id) { struct res_xrcdn *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1162,7 +1162,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1274,7 +1274,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct rb_root *root = &tracker->res_tree[type]; - res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); + res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL); if (!res_arr) return -ENOMEM; @@ -2027,7 +2027,7 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; @@ -4020,7 +4020,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, struct res_gid *res; int err; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 5aee05992f27..fdaef00465d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,27 @@ config MLX5_CORE_EN ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9d17e4e76d3a..87a3099808f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,17 +4,21 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \ - en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ - en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_arfs.o en_fs_ethtool.o en_selftest.o + +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o + +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o @@ -22,3 +26,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o + +CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 3c95f7f53802..47239bf7bf43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -258,6 +258,7 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { u32 db_per_page = PAGE_SIZE / cache_line_size(); + mutex_lock(&dev->priv.pgdir_mutex); __set_bit(db->index, db->u.pgdir->bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1acbb721f38d..1fffdebbc9e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -802,7 +802,6 @@ static void cmd_work_handler(struct work_struct *work) bool poll_cmd = ent->polling; int alloc_ret; - sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a62f4b6a21a5..ff60cf7342ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,11 +45,70 @@ struct mlx5_device_context { unsigned long state; }; +struct mlx5_delayed_event { + struct list_head list; + struct mlx5_core_dev *dev; + enum mlx5_dev_event event; + unsigned long param; +}; + enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; +static void add_delayed_event(struct mlx5_priv *priv, + struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_delayed_event *delayed_event; + + delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); + if (!delayed_event) { + mlx5_core_err(dev, "event %d is missed\n", event); + return; + } + + mlx5_core_dbg(dev, "Accumulating event %d\n", event); + delayed_event->dev = dev; + delayed_event->event = event; + delayed_event->param = param; + list_add_tail(&delayed_event->list, &priv->waiting_events_list); +} + +static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx, + struct mlx5_core_dev *dev, + struct mlx5_priv *priv) +{ + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + + /* stop delaying events */ + priv->is_accum_events = false; + + /* fire all accumulated events before new event comes */ + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + list_del(&de->list); + kfree(de); + } +} + +static void cleanup_delayed_evets(struct mlx5_priv *priv) +{ + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + + spin_lock_irq(&priv->ctx_lock); + priv->is_accum_events = false; + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + list_del(&de->list); + kfree(de); + } + spin_unlock_irq(&priv->ctx_lock); +} + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -63,6 +122,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) return; dev_ctx->intf = intf; + /* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ + + priv->is_accum_events = true; + dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); if (intf->attach) @@ -71,6 +136,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); + + fire_delayed_event_locked(dev_ctx, dev, priv); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (dev_ctx->intf->pfault) { if (priv->pfault) { @@ -84,6 +152,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_unlock_irq(&priv->ctx_lock); } else { kfree(dev_ctx); + /* delete all accumulated events */ + cleanup_delayed_evets(priv); } } @@ -341,6 +411,9 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_lock_irqsave(&priv->ctx_lock, flags); + if (priv->is_accum_events) + add_delayed_event(priv, dev, event, param); + list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) dev_ctx->intf->event(dev, dev_ctx->context, event, param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 000000000000..0be4575b58a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include <linux/stringify.h> + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if (ethertype.m == 0xffff) { + if (ethertype.v == ETH_P_IP) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if (ethertype.v == ETH_P_IPV6) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key_l), + .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 000000000000..1e3a6c3e4132 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, bool new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u8, mask_enable) + __field(bool, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_tag; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && + rule->dest_attr.counter) + __entry->counter_id = + rule->dest_attr.counter->id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 040d1af310b4..cc13d3dbd366 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -291,10 +291,11 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_ENABLED, - MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; +#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr)) + struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; @@ -342,7 +343,6 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; - u8 num_wqebbs; }; struct mlx5e_txqsq { @@ -418,13 +418,8 @@ struct mlx5e_xdpsq { struct mlx5e_icosq { /* data path */ - /* dirtied @completion */ - u16 cc; - /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 prev_cc; struct mlx5e_cq cq; @@ -438,7 +433,6 @@ struct mlx5e_icosq { void __iomem *uar_map; u32 sqn; u16 edge; - struct device *pdev; __be32 mkey_be; unsigned long state; @@ -507,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ */ #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) struct mlx5e_page_cache { u32 head; u32 tail; @@ -516,7 +510,7 @@ struct mlx5e_page_cache { struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); struct mlx5e_rq { @@ -527,21 +521,26 @@ struct mlx5e_rq { struct { struct mlx5e_wqe_frag_info *frag_info; u32 frag_sz; /* max possible skb frag_sz */ - bool page_reuse; - bool xdp_xmit; + union { + bool page_reuse; + bool xdp_xmit; + }; } wqe; struct { struct mlx5e_mpw_info *info; void *mtt_no_align; + u16 num_strides; + u8 log_stride_sz; + bool umr_in_progress; } mpwqe; }; struct { + u16 headroom; u8 page_order; - u32 wqe_sz; /* wqe data buffer size */ u8 map_dir; /* dma map direction */ } buff; - __be32 mkey_be; + struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; @@ -550,12 +549,11 @@ struct mlx5e_rq { struct mlx5e_page_cache page_cache; mlx5e_fp_handle_rx_cqe handle_rx_cqe; - mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_post_rx_wqes post_wqes; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; - u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -565,19 +563,13 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; u8 wq_type; - u32 mpwqe_stride_sz; - u32 mpwqe_num_strides; u32 rqn; - struct mlx5e_channel *channel; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -enum channel_flags { - MLX5E_CHANNEL_NAPI_SCHED = 1, -}; - struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; @@ -589,7 +581,9 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; - unsigned long flags; + + /* data path - accessed per napi poll */ + struct irq_desc *irq_desc; /* control */ struct mlx5e_priv *priv; @@ -620,6 +614,12 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + enum { MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, @@ -679,6 +679,7 @@ struct mlx5e_l2_table { struct mlx5e_ttc_table { struct mlx5e_flow_table ft; struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE @@ -711,6 +712,7 @@ enum { MLX5E_VLAN_FT_LEVEL = 0, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, MLX5E_ARFS_FT_LEVEL }; @@ -736,6 +738,7 @@ struct mlx5e_flow_steering { struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; struct mlx5e_arfs_tables arfs; }; @@ -769,6 +772,7 @@ struct mlx5e_priv { u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; int hard_mtu; @@ -839,11 +843,9 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -903,7 +905,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc); + void *tirc, bool inner); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -922,8 +924,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); @@ -932,6 +933,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f5594014715b..d12e9fc0d76b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -176,7 +176,6 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { - switch (sset) { case ETH_SS_STATS: return NUM_SW_COUNTERS + @@ -207,7 +206,7 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return mlx5e_ethtool_get_sset_count(priv, sset); } -static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -242,10 +241,22 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pcie_perf_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -297,8 +308,7 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) priv->channel_tc2txq[i][tc]); } -void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, - uint32_t stringset, uint8_t *data) +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) { int i; @@ -320,8 +330,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, } } -static void mlx5e_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -373,10 +382,22 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, pcie_perf_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@ -642,8 +663,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->mdev, - new_channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { @@ -966,24 +986,27 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type) if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) return ptys2connector_type[connector_type]; - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return PORT_FIBRE; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) - | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { - return PORT_DA; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { - return PORT_NONE; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; } return PORT_OTHER; @@ -1190,9 +1213,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index dfccb5305e9c..f11fd07ac4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -36,6 +36,7 @@ #include <linux/tcp.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "lib/mpfs.h" static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type); @@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; struct mlx5e_l2_rule ai; + bool mpfs; }; static inline int mlx5e_hash_l2(u8 *addr) @@ -362,17 +364,30 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { - switch (hn->action) { + u8 action = hn->action; + int l2_err = 0; + + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(hn->ai.addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr); + hn->mpfs = !l2_err; + } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; } + + if (l2_err) + netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) @@ -593,12 +608,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) ttc->rules[i] = NULL; } } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } } -static struct { +struct mlx5e_etype_proto { u16 etype; u8 proto; -} ttc_rules[] = { +}; + +static struct mlx5e_etype_proto ttc_rules[] = { [MLX5E_TT_IPV4_TCP] = { .etype = ETH_P_IP, .proto = IPPROTO_TCP, @@ -645,6 +669,28 @@ static struct { }, }; +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + static struct mlx5_flow_handle * mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, @@ -652,10 +698,12 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; + u8 ipv; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -666,7 +714,13 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } - if (etype) { + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); @@ -708,6 +762,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.inner_ttc.ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + return 0; del_rules: @@ -718,13 +786,23 @@ del_rules: } #define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) #define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ MLX5E_TTC_GROUP2_SIZE +\ MLX5E_TTC_GROUP3_SIZE) -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5e_flow_table *ft = &ttc->ft; @@ -746,7 +824,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_TTC_GROUP1_SIZE; @@ -787,6 +868,190 @@ err: return err; } +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_handle **rules; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_table *ft; + int err; + int tt; + + ttc = &priv->fs.inner_ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->inner_indir_tir[tt].tirn; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -797,6 +1062,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; @@ -813,7 +1079,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_ttc_table_groups(ttc); + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto err; @@ -1139,11 +1405,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } err = mlx5e_create_l2_table(priv); @@ -1168,6 +1441,8 @@ err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1179,6 +1454,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d75f3099d164..dfc29720ab77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -213,6 +213,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; for (j = 0; j < priv->channels.params.num_tc; j++) { sq_stats = &c->sq[j].stats; @@ -293,6 +294,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } + if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) { + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -593,12 +600,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->rx_headroom = params->rq_headroom; + rq->buff.headroom = params->rq_headroom; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; @@ -615,11 +622,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; + rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->buff.wqe_sz; + byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -639,7 +645,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; #ifdef CONFIG_MLX5_EN_IPSEC @@ -655,18 +661,17 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->buff.wqe_sz = params->lro_en ? + byte_count = params->lro_en ? params->lro_wqe_sz : MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); #ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) - rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN; + byte_count += MLX5E_METADATA_ETHER_LEN; #endif rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; - byte_count = rq->buff.wqe_sz; /* calc the required page order */ - rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count); + rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count); npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE); rq->buff.page_order = order_base_2(npages); @@ -677,6 +682,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT; + + wqe->data.addr = cpu_to_be64(dma_offset); + } + wqe->data.byte_count = cpu_to_be32(byte_count); wqe->data.lkey = rq->mkey_be; } @@ -883,7 +894,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) u16 wqe_ix; /* UMR WQE (if in progress) is always at wq->head */ - if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + rq->mpwqe.umr_in_progress) mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { @@ -926,7 +938,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, goto err_destroy_rq; if (params->rx_am_enabled) - set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + c->rq.state |= BIT(MLX5E_RQ_STATE_AM); return 0; @@ -946,7 +958,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } @@ -1048,7 +1059,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; int err; - sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; @@ -1752,7 +1762,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; struct mlx5e_channel *c; + unsigned int irq; int err; + int eqn; c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); if (!c) @@ -1768,6 +1780,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; + mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + c->irq_desc = irq_to_desc(irq); + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -2345,9 +2360,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc) + void *tirc, bool inner) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2496,6 +2512,21 @@ free_in: return err; } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -2583,12 +2614,6 @@ static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) } } -static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) -{ - return (MLX5_CAP_GEN(mdev, vport_group_manager) && - MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); -} - void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; @@ -2602,7 +2627,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2613,7 +2638,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -2690,6 +2715,8 @@ int mlx5e_open(struct net_device *netdev) mutex_lock(&priv->state_lock); err = mlx5e_open_locked(netdev); + if (!err) + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); mutex_unlock(&priv->state_lock); return err; @@ -2724,6 +2751,7 @@ int mlx5e_close(struct net_device *netdev) return -ENODEV; mutex_lock(&priv->state_lock); + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -2864,7 +2892,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -2883,6 +2911,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) struct mlx5e_tir *tir; void *tirc; int inlen; + int i = 0; int err; u32 *in; int tt; @@ -2898,16 +2927,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) - goto err_destroy_tirs; + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: kvfree(in); return 0; -err_destroy_tirs: - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2961,6 +3010,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) @@ -3000,12 +3055,16 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } -static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) +static int mlx5e_setup_tc_mqprio(struct net_device *netdev, + struct tc_mqprio_qopt *mqprio) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; + u8 tc = mqprio->num_tc; int err = 0; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + if (tc && tc != MLX5E_MAX_NUM_TC) return -EINVAL; @@ -3029,39 +3088,42 @@ out: return err; } -static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) - goto mqprio; + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; - if (chain_index) + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); + default: return -EOPNOTSUPP; + } +} +#endif - switch (tc->type) { +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { +#ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + return mlx5e_setup_tc_cls_flower(dev, type_data); +#endif + case TC_SETUP_MQPRIO: + return mlx5e_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; } - -mqprio: - if (tc->type != TC_SETUP_MQPRIO) - return -EINVAL; - - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - - return mlx5e_setup_tc(dev, tc->mqprio->num_tc); } static void @@ -3358,6 +3420,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } +#ifdef CONFIG_MLX5_ESWITCH static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -3460,6 +3523,7 @@ static int mlx5e_get_vf_stats(struct net_device *dev, return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } +#endif static void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) @@ -3489,13 +3553,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, - struct sk_buff *skb, - netdev_features_t features) +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) { struct udphdr *udph; - u16 proto; - u16 port = 0; + u8 proto; + u16 port; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -3508,14 +3572,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, goto out; } - if (proto == IPPROTO_UDP) { + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); - } - /* Verify if UDP port is being offloaded by HW */ - if (port && mlx5e_vxlan_lookup_port(priv, port)) - return features; + /* Verify if UDP port is being offloaded by HW */ + if (mlx5e_vxlan_lookup_port(priv, port)) + return features; + } out: /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ @@ -3539,7 +3606,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) - return mlx5e_vxlan_features_check(priv, skb, features); + return mlx5e_tunnel_features_check(priv, skb, features); return features; } @@ -3636,7 +3703,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); if (old_prog) @@ -3693,11 +3759,11 @@ static void mlx5e_netpoll(struct net_device *dev) } #endif -static const struct net_device_ops mlx5e_netdev_ops_basic = { +static const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, + .ndo_setup_tc = mlx5e_setup_tc, .ndo_select_queue = mlx5e_select_queue, .ndo_get_stats64 = mlx5e_get_stats, .ndo_set_rx_mode = mlx5e_set_rx_mode, @@ -3708,6 +3774,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -3716,29 +3785,8 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif -}; - -static const struct net_device_ops mlx5e_netdev_ops_sriov = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, - .ndo_select_queue = mlx5e_select_queue, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_set_rx_mode = mlx5e_set_rx_mode, - .ndo_set_mac_address = mlx5e_set_mac, - .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, - .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, - .ndo_do_ioctl = mlx5e_ioctl, - .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, - .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, - .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, - .ndo_features_check = mlx5e_features_check, -#ifdef CONFIG_RFS_ACCEL - .ndo_rx_flow_steer = mlx5e_rx_flow_steer, -#endif +#ifdef CONFIG_MLX5_ESWITCH + /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, @@ -3747,13 +3795,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = mlx5e_netpoll, -#endif .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -3790,8 +3834,7 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { int i; @@ -3934,7 +3977,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + mlx5e_build_default_indir_rqt(params->indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_channels); } @@ -3973,9 +4016,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) static const struct switchdev_ops mlx5e_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; +#endif static void mlx5e_build_nic_netdev(struct net_device *netdev) { @@ -3986,15 +4031,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, &mdev->pdev->dev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - netdev->netdev_ops = &mlx5e_netdev_ops_sriov; + netdev->netdev_ops = &mlx5e_netdev_ops; + #ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_basic; - } netdev->watchdog_timeo = 15 * HZ; @@ -4017,20 +4059,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; - if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); if (fcs_supported) @@ -4066,8 +4120,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); -#ifdef CONFIG_NET_SWITCHDEV - if (MLX5_CAP_GEN(mdev, vport_group_manager)) +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) + if (MLX5_VPORT_MANAGER(mdev)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif @@ -4199,6 +4253,10 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_init_l2_addr(priv); + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); @@ -4209,7 +4267,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) @@ -4243,7 +4301,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_unregister_vport_reps(priv); mlx5e_disable_async_events(priv); @@ -4416,32 +4474,29 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - struct mlx5e_rep_priv *rpriv = NULL; + struct net_device *netdev; + void *rpriv = NULL; void *priv; - int vport; int err; - struct net_device *netdev; err = mlx5e_check_required_hca_cap(mdev); if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_VPORT_MANAGER(mdev)) { + rpriv = mlx5e_alloc_nic_rep_priv(mdev); if (!rpriv) { - mlx5_core_warn(mdev, - "Not creating net device, Failed to alloc rep priv data\n"); + mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); return NULL; } - rpriv->rep = &esw->offloads.vport_reps[0]; } +#endif netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_unregister_reps; + goto err_free_rpriv; } priv = netdev_priv(netdev); @@ -4462,14 +4517,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) err_detach: mlx5e_detach(mdev, priv); - err_destroy_netdev: mlx5e_destroy_netdev(priv); - -err_unregister_reps: - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); - +err_free_rpriv: kfree(rpriv); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e60be9c277..45e03c427faf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -613,15 +613,18 @@ static int mlx5e_rep_open(struct net_device *dev) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; - err = mlx5e_open(dev); + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); if (err) - return err; + goto unlock; - err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP); - if (!err) + if (!mlx5_eswitch_set_vport_state(esw, rep->vport, + MLX5_ESW_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); - return 0; +unlock: + mutex_unlock(&priv->state_lock); + return err; } static int mlx5e_rep_close(struct net_device *dev) @@ -630,10 +633,13 @@ static int mlx5e_rep_close(struct net_device *dev) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + mutex_lock(&priv->state_lock); (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); - - return mlx5e_close(dev); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; } static int mlx5e_rep_get_phys_port_name(struct net_device *dev, @@ -651,37 +657,42 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, return 0; } -static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int +mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) return -EOPNOTSUPP; - if (tc->egress_dev) { + if (cls_flower->egress_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw); - return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle, - chain_index, - proto, tc); + dev = mlx5_eswitch_get_uplink_netdev(esw); + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + cls_flower); } - if (chain_index) + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); + default: return -EOPNOTSUPP; + } +} - switch (tc->type) { +static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + return mlx5e_rep_setup_tc_cls_flower(dev, type_data); default: return -EOPNOTSUPP; } @@ -773,7 +784,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, - .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, + .ndo_setup_tc = mlx5e_rep_setup_tc, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, @@ -913,7 +924,7 @@ static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) return MLX5E_PORT_REPRESENTOR_NCH; } -static struct mlx5e_profile mlx5e_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, @@ -1099,3 +1110,16 @@ void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } + +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return NULL; + + rpriv->rep = &esw->offloads.vport_reps[0]; + return rpriv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index a0a1a7a1d6c0..5659ed9f51e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -38,6 +38,7 @@ #include "eswitch.h" #include "en.h" +#ifdef CONFIG_MLX5_ESWITCH struct mlx5e_neigh_update_table { struct rhashtable neigh_ht; /* Save the neigh hash entries in a list in addition to the hash table @@ -123,6 +124,7 @@ struct mlx5e_encap_entry { int encap_size; }; +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); void mlx5e_register_vport_reps(struct mlx5e_priv *priv); void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); @@ -141,5 +143,12 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); +#else /* CONFIG_MLX5_ESWITCH */ +static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} +static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} +#endif #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7344433259fc..f1dd638384d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -163,7 +163,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, static inline bool mlx5e_page_is_reserved(struct page *page) { - return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id(); + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -177,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (unlikely(page_is_pfmemalloc(dma_info->page))) + if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + rq->stats.cache_waive++; return false; + } cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; @@ -252,7 +254,7 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, !mlx5e_page_is_reserved(wi->di.page); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; @@ -263,8 +265,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) wi->offset = 0; } - wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + - rq->rx_headroom); + wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); return 0; } @@ -296,7 +297,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { - return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; + return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, @@ -305,7 +306,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz)); dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, @@ -358,7 +359,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } @@ -369,41 +369,35 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) MLX5_OPCODE_UMR); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; - sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int err; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); page_ref_add(dma_info->page, pg_strides); - wi->skbs_frags[i] = 0; } + memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; - wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + dma_info--; page_ref_sub(dma_info->page, pg_strides); mlx5e_page_release(rq, dma_info, true); } @@ -414,27 +408,21 @@ err_unmap: void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); mlx5e_page_release(rq, dma_info, true); } } -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); - return; - } + rq->mpwqe.umr_in_progress = false; mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); @@ -444,16 +432,18 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { int err; - err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); - if (unlikely(err)) + err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); + if (unlikely(err)) { + rq->stats.buff_alloc_err++; return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + } + rq->mpwqe.umr_in_progress = true; mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; + return 0; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) @@ -463,94 +453,150 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_mpwqe(rq, wi); } -#define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; + int err; - if (unlikely(RQ_CANNOT_POST(rq))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return false; - while (!mlx5_wq_ll_is_full(wq)) { + if (mlx5_wq_ll_is_full(wq)) + return false; + + do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - int err; - err = rq->alloc_wqe(rq, wqe, wq->head); - if (err == -EBUSY) - return true; + err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head); if (unlikely(err)) { rq->stats.buff_alloc_err++; break; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - } + } while (!mlx5_wq_ll_is_full(wq)); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); mlx5_wq_ll_update_db_record(wq); - return !mlx5_wq_ll_is_full(wq); + return !!err; +} + +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + + mlx5_cqwq_pop(&cq->wq); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return; + + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); + + mlx5_cqwq_update_db_record(&cq->wq); +} + +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) + return false; + + mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + + if (mlx5_wq_ll_is_full(wq)) + return false; + + if (!rq->mpwqe.umr_in_progress) + mlx5e_alloc_rx_mpwqe(rq, wq->head); + + return true; } static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4; - struct ipv6hdr *ipv6; struct tcphdr *tcp; int network_depth = 0; __be16 proto; u16 tot_len; + void *ip_p; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || - (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - ipv4 = (struct iphdr *)(skb->data + network_depth); - ipv6 = (struct ipv6hdr *)(skb->data + network_depth); tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; if (proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct iphdr)); - ipv6 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - } else { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct ipv6hdr)); - ipv4 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - } - - if (get_cqe_lro_tcppsh(cqe)) - tcp->psh = 1; + struct iphdr *ipv4 = ip_p; - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; - } + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - if (ipv4) { ipv4->ttl = cqe->lro_min_ttl; ipv4->tot_len = cpu_to_be16(tot_len); ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); } else { + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + ipv6->hop_limit = cqe->lro_min_ttl; ipv6->payload_len = cpu_to_be16(tot_len - sizeof(struct ipv6hdr)); } + + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } } static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, @@ -776,9 +822,9 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { struct mlx5e_dma_info *di = &wi->di; + u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb; void *va, *data; - u16 rx_headroom = rq->rx_headroom; bool consumed; u32 frag_size; @@ -857,6 +903,7 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +#ifdef CONFIG_MLX5_ESWITCH void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; @@ -901,6 +948,7 @@ wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +#endif static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -909,7 +957,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb) { u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; @@ -977,7 +1025,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: - if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; mlx5e_free_rx_mpwqe(rq, wi); @@ -987,21 +1035,23 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq; + struct mlx5_cqe64 *cqe; int work_done = 0; - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - for (; work_done < budget; work_done++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return 0; - if (!cqe) - break; + xdpsq = &rq->xdpsq; + do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += mlx5e_decompress_cqes_start(rq, cq, @@ -1012,7 +1062,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(&cq->wq); rq->handle_rx_cqe(rq, cqe); - } + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); if (xdpsq->db.doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); @@ -1030,13 +1080,18 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; struct mlx5e_rq *rq; u16 sqcc; int i; sq = container_of(cq, struct mlx5e_xdpsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; rq = container_of(sq, struct mlx5e_rq, xdpsq); @@ -1046,15 +1101,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -1072,7 +1123,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) /* Recycle RX page */ mlx5e_page_release(rq, di, true); } while (!last_wqe); - } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index e65517eafc58..6d199ffb1c0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -47,7 +47,7 @@ struct counter_desc { char format[ETH_GSTRING_LEN]; - int offset; /* Byte offset */ + size_t offset; /* Byte offset */ }; struct mlx5e_sw_stats { @@ -84,6 +84,7 @@ struct mlx5e_sw_stats { u64 rx_cache_full; u64 rx_cache_empty; u64 rx_cache_busy; + u64 rx_cache_waive; /* Special handling counters */ u64 link_down_events_phy; @@ -123,6 +124,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -216,6 +218,12 @@ static const struct counter_desc vport_stats_desc[] = { MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) #define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) struct mlx5e_pport_stats { __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; @@ -224,6 +232,7 @@ struct mlx5e_pport_stats { __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { @@ -290,12 +299,22 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + struct mlx5e_pcie_stats { __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; }; @@ -305,6 +324,17 @@ static const struct counter_desc pcie_perf_stats_desc[] = { { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, }; +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -326,6 +356,7 @@ struct mlx5e_rq_stats { u64 cache_full; u64 cache_empty; u64 cache_busy; + u64 cache_waive; }; static const struct counter_desc rq_stats_desc[] = { @@ -349,6 +380,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, }; struct mlx5e_sq_stats { @@ -397,17 +429,29 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PCIE_PERF_COUNTERS(priv) \ (ARRAY_SIZE(pcie_perf_stats_desc) * \ MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) +#define NUM_PCIE_PERF_COUNTERS64(priv) \ + (ARRAY_SIZE(pcie_perf_stats_desc64) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) +#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \ + (ARRAY_SIZE(pcie_perf_stall_stats_desc) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ + (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) #define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS(priv) NUM_PCIE_PERF_COUNTERS(priv) + NUM_PPORT_PRIO + \ + NUM_PPORT_ETH_EXT_COUNTERS(priv)) +#define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ + NUM_PCIE_PERF_COUNTERS64(priv) +\ + NUM_PCIE_PERF_STALL_COUNTERS(priv)) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7f282e8f4e7f..da503e6411da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1326,7 +1326,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -1837,7 +1837,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, bool encap = false; int err = 0; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); @@ -1937,7 +1937,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index ecbe30d808ae..c14c263a739b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -33,12 +33,15 @@ #ifndef __MLX5_EN_TC_H__ #define __MLX5_EN_TC_H__ +#include <net/pkt_cls.h> + #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff +#ifdef CONFIG_MLX5_ESWITCH int mlx5e_tc_init(struct mlx5e_priv *priv); void mlx5e_tc_cleanup(struct mlx5e_priv *priv); -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); @@ -60,4 +63,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) return atomic_read(&priv->fs.tc.ht.nelems); } +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +#endif + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 31353e5c3c78..fee43e40fa16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -394,6 +394,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; u32 dma_fifo_cc; u32 nbytes; u16 npkts; @@ -402,7 +403,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_txqsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; npkts = 0; @@ -416,15 +421,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) /* avoid dirtying sq cache line every cqe */ dma_fifo_cc = sq->dma_fifo_cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -467,7 +468,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sqcc += wi->num_wqebbs; napi_consume_skb(skb, napi_budget); } while (!last_wqe); - } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 92db28a9ed43..e906b754415c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -30,66 +30,18 @@ * SOFTWARE. */ +#include <linux/irq.h> #include "en.h" -static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, - struct mlx5e_icosq *sq, - struct mlx5_cqe64 *cqe, - u16 *sqcc) +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { - struct mlx5_wq_cyc *wq = &sq->wq; - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - struct mlx5e_rq *rq = &sq->channel->rq; - - prefetch(rq); - mlx5_cqwq_pop(&cq->wq); - *sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - return; - } - - if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { - mlx5e_post_rx_mpwqe(rq); - return; - } - - if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); -} - -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) -{ - struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return; - - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (likely(!cqe)) - return; + int current_cpu = smp_processor_id(); + const struct cpumask *aff; + struct irq_data *idata; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + idata = irq_desc_get_irq_data(c->irq_desc); + aff = irq_data_get_affinity_mask(idata); + return cpumask_test_cpu(current_cpu, aff); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -100,8 +52,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) int work_done; int i; - clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); - for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); @@ -111,25 +61,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= c->rq.post_wqes(&c->rq); - busy |= mlx5e_post_rx_wqes(&c->rq); - - if (busy) - return budget; - - napi_complete_done(napi, work_done); + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) + return budget; + if (work_done == budget) + work_done--; + } - /* avoid losing completion event during/after polling cqs */ - if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { - napi_schedule(napi); + if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); - if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) mlx5e_rx_am(&c->rq); mlx5e_cq_arm(&c->rq.cq); @@ -143,7 +90,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); cq->event_ctr++; - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index edd11ebd392e..fc606bfd1d6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -36,9 +36,7 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" #include "fpga/core.h" -#ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" -#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -193,6 +191,7 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) { __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + __raw_writel((__force u32)cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ mb(); @@ -483,11 +482,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) } break; -#ifdef CONFIG_MLX5_CORE_EN case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); break; -#endif case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); @@ -702,9 +699,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && - MLX5_CAP_GEN(dev, vport_group_manager) && - mlx5_core_is_pf(dev)) + if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5b41f692acad..c77f4c0c7769 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -46,19 +46,13 @@ enum { MLX5_ACTION_DEL = 2, }; -/* E-Switch UC L2 table hash node */ -struct esw_uc_addr { - struct l2addr_node node; - u32 table_index; - u32 vport; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; u8 action; u32 vport; - struct mlx5_flow_handle *flow_rule; /* SRIOV only */ + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ bool mc_promisc; }; @@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); } -/* HW L2 Table (MPFS) management */ -static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, - u8 *mac, u8 vlan_valid, u16 vlan) -{ - u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; - u8 *in_mac_addr; - - MLX5_SET(set_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_SET_L2_TABLE_ENTRY); - MLX5_SET(set_l2_table_entry_in, in, table_index, index); - MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); - MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); - - in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); - ether_addr_copy(&in_mac_addr[2], mac); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) -{ - u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; - - MLX5_SET(delete_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); - MLX5_SET(delete_l2_table_entry_in, in, table_index, index); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) -{ - int err = 0; - - *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); - if (*ix >= l2_table->size) - err = -ENOSPC; - else - __set_bit(*ix, l2_table->bitmap); - - return err; -} - -static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) -{ - __clear_bit(ix, l2_table->bitmap); -} - -static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, - u8 vlan_valid, u16 vlan, - u32 *index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - int err; - - err = alloc_l2_table_index(l2_table, index); - if (err) - return err; - - err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); - if (err) - free_l2_table_index(l2_table, *index); - - return err; -} - -static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - - del_l2_table_entry_cmd(dev, index); - free_l2_table_index(l2_table, index); -} - /* E-Switch FDB */ static struct mlx5_flow_handle * __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, @@ -455,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; int err; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (esw_uc) { + /* Skip mlx5_mpfs_add_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { esw_warn(esw->dev, - "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, esw_uc->vport); - return -EEXIST; + "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + return err; } + vaddr->mpfs = true; - esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); - if (!esw_uc) - return -ENOMEM; - esw_uc->vport = vport; - - err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); - if (err) - goto abort; - +fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); - esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); - return err; -abort: - l2addr_hash_del(esw_uc); - return err; + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + + return 0; } static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; + int err = 0; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (!esw_uc || esw_uc->vport != vport) { - esw_debug(esw->dev, - "MAC(%pM) doesn't belong to vport (%d)\n", - mac, vport); - return -EINVAL; - } - esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); + /* Skip mlx5_mpfs_del_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport || !vaddr->mpfs) + goto fdb_del; - del_l2_table_entry(esw->dev, esw_uc->table_index); + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; +fdb_del: if (vaddr->flow_rule) mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; - l2addr_hash_del(esw_uc); return 0; } @@ -1611,13 +1525,14 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ +#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) + int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i, enabled_events; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!ESW_ALLOWED(esw)) return 0; if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || @@ -1634,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; - esw_disable_vport(esw, 0); if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); @@ -1647,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; + /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: + * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode + * 2. FDB/Eswitch is programmed by user space tools + */ + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); @@ -1656,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) return 0; abort: - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); esw->mode = SRIOV_NONE; return err; } @@ -1667,9 +1584,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int nvports; int i; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH || - esw->mode == SRIOV_NONE) + if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) return; esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", @@ -1692,44 +1607,21 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; - /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); -} - -void mlx5_eswitch_attach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ -} - -void mlx5_eswitch_detach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_disable_vport(esw, 0); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; int vport_num; int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, - "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", - total_vports, l2_table_size, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + total_vports, MLX5_MAX_UC_PER_VPORT(dev), MLX5_MAX_MC_PER_VPORT(dev)); @@ -1739,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; - esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), - sizeof(uintptr_t), GFP_KERNEL); - if (!esw->l2_table.bitmap) { - err = -ENOMEM; - goto abort; - } - esw->l2_table.size = l2_table_size; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1797,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->vports); kfree(esw->offloads.vport_reps); kfree(esw); @@ -1806,15 +1689,13 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); @@ -1838,8 +1719,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) } /* Vport Administration */ -#define ESW_ALLOWED(esw) \ - (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 834a33050969..565c8b7a399a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -37,6 +37,15 @@ #include <linux/if_link.h> #include <net/devlink.h> #include <linux/mlx5/device.h> +#include "lib/mpfs.h" + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +#ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) @@ -44,9 +53,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) -#define MLX5_L2_ADDR_HASH(addr) (addr[5]) - #define FDB_UPLINK_VPORT 0xffff #define MLX5_MIN_BW_SHARE 1 @@ -54,48 +60,6 @@ #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) -/* L2 -mac address based- hash helpers */ -struct l2addr_node { - struct hlist_node hlist; - u8 addr[ETH_ALEN]; -}; - -#define for_each_l2hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ - hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) - -#define l2addr_hash_find(hash, mac, type) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - bool found = false; \ - type *ptr = NULL; \ - \ - hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ - if (ether_addr_equal(ptr->node.addr, mac)) {\ - found = true; \ - break; \ - } \ - if (!found) \ - ptr = NULL; \ - ptr; \ -}) - -#define l2addr_hash_add(hash, mac, type, gfp) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - type *ptr = NULL; \ - \ - ptr = kzalloc(sizeof(type), gfp); \ - if (ptr) { \ - ether_addr_copy(ptr->node.addr, mac); \ - hlist_add_head(&ptr->node.hlist, &hash[ix]);\ - } \ - ptr; \ -}) - -#define l2addr_hash_del(ptr) ({ \ - hlist_del(&ptr->node.hlist); \ - kfree(ptr); \ -}) - struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -150,12 +114,6 @@ struct mlx5_vport { u16 enabled_events; }; -struct mlx5_l2_table { - struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; - u32 size; - unsigned long *bitmap; -}; - struct mlx5_eswitch_fdb { void *fdb; union { @@ -175,12 +133,6 @@ struct mlx5_eswitch_fdb { }; }; -enum { - SRIOV_NONE, - SRIOV_LEGACY, - SRIOV_OFFLOADS -}; - struct mlx5_esw_sq { struct mlx5_flow_handle *send_to_vport_rule; struct list_head list; @@ -222,7 +174,6 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_eswitch { struct mlx5_core_dev *dev; - struct mlx5_l2_table l2_table; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -250,8 +201,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_attach(struct mlx5_eswitch *esw); -void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); @@ -345,4 +294,13 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} +static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +#endif /* CONFIG_MLX5_ESWITCH */ + #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5bc0593bd76e..d9fd8570b07c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,6 +433,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) struct mlx5_flow_table *fdb = NULL; int esw_size, err = 0; u32 flags = 0; + u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -443,9 +445,9 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); - esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e750f07793b8..e0d0efd903bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -263,7 +263,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); - memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { @@ -359,7 +359,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -374,7 +374,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) return err; } -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; @@ -385,7 +385,7 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + @@ -409,14 +409,14 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, } struct mlx5_cmd_fc_bulk { - u16 id; + u32 id; int num; int outlen; u32 out[0]; }; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) { struct mlx5_cmd_fc_bulk *b; int outlen = @@ -453,7 +453,7 @@ mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes) { int index = id - b->id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 0f98a7cf4877..c6d7bdf255b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -74,20 +74,20 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 underlay_qpn); -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); struct mlx5_cmd_fc_bulk; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e8690fe46bf2..5a7bea688ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" +#include "diag/fs_tracepoint.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -82,8 +83,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, aRFS */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -150,6 +151,23 @@ enum fs_i_mutex_lock_class { FS_MUTEX_CHILD }; +static const struct rhashtable_params rhash_fte = { + .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -255,71 +273,77 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +static bool check_last_reserved(const u32 *match_criteria) { - unsigned int i; + char *match_criteria_reserved = + MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - for (i = 0; i < size; i++, mask++, val1++, val2++) - if ((*((u8 *)val1) & (*(u8 *)mask)) != - ((*(u8 *)val2) & (*(u8 *)mask))) - return false; - - return true; + return !match_criteria_reserved[0] && + !memcmp(match_criteria_reserved, match_criteria_reserved + 1, + MLX5_FLD_SZ_BYTES(fte_match_param, + MLX5_FTE_MATCH_PARAM_RESERVED) - 1); } -static bool compare_match_value(struct mlx5_flow_group_mask *mask, - void *fte_param1, void *fte_param2) +static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) { - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, outer_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, outer_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, outer_headers); + if (match_criteria_enable & ~( + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS))) + return false; - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, outer_headers); + + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, misc_parameters); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, misc_parameters); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, misc_parameters); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, misc_parameters); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_misc))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, inner_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, inner_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, inner_headers); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, inner_headers); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - return true; + + return check_last_reserved(match_criteria); } -static bool compare_match_criteria(u8 match_criteria_enable1, - u8 match_criteria_enable2, - void *mask1, void *mask2) +static bool check_valid_spec(const struct mlx5_flow_spec *spec) { - return match_criteria_enable1 == match_criteria_enable2 && - !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); + int i; + + if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { + pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); + return false; + } + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return check_last_reserved(spec->match_value); } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -360,6 +384,8 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + ida_destroy(&ft->fte_allocator); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -370,22 +396,16 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; - u32 *match_value; int modify_mask; struct mlx5_core_dev *dev = get_dev(node); - int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; bool update_fte = false; - match_value = kvzalloc(match_len, GFP_KERNEL); - if (!match_value) - return; - fs_get_obj(rule, node); fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); - memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_rule(rule); list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -414,7 +434,18 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } - kvfree(match_value); +} + +static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +{ + struct mlx5_flow_table *ft; + int ret; + + ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + WARN_ON(ret); + fte->status = 0; + fs_get_obj(ft, fg->node.parent); + ida_simple_remove(&ft->fte_allocator, fte->index); } static void del_fte(struct fs_node *node) @@ -428,6 +459,7 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); err = mlx5_cmd_delete_fte(dev, ft, @@ -437,8 +469,7 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); - fte->status = 0; - fg->num_ftes--; + destroy_fte(fte, fg); } static void del_flow_group(struct fs_node *node) @@ -446,14 +477,21 @@ static void del_flow_group(struct fs_node *node) struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; + int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); if (ft->autogroup.active) ft->autogroup.num_groups--; + rhashtable_destroy(&fg->ftes_hash); + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); @@ -488,10 +526,17 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) u8 match_criteria_enable = MLX5_GET(create_flow_group_in, create_fg_in, match_criteria_enable); + int ret; + fg = kzalloc(sizeof(*fg), GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kfree(fg); + return ERR_PTR(ret); + } fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -509,10 +554,17 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft u32 flags) { struct mlx5_flow_table *ft; + int ret; ft = kzalloc(sizeof(*ft), GFP_KERNEL); if (!ft) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; @@ -523,6 +575,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); + ida_init(&ft->fte_allocator); return ft; } @@ -812,8 +865,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, op_mod, ft_attr->flags); - if (!ft) { - err = -ENOMEM; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); goto unlock_root; } @@ -839,6 +892,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: + ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -924,11 +978,13 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * if (IS_ERR(fg)) return fg; + err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); + if (err) + goto err_free_fg; + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) { - kfree(fg); - return ERR_PTR(err); - } + if (err) + goto err_remove_fg; if (ft->autogroup.active) ft->autogroup.num_groups++; @@ -938,14 +994,33 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * /* Add node to group list */ list_add(&fg->node.list, prev_fg); + trace_mlx5_fs_add_fg(fg); return fg; + +err_remove_fg: + WARN_ON(rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg)); +err_free_fg: + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); + + return ERR_PTR(err); } struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); struct mlx5_flow_group *fg; + if (!check_valid_mask(match_criteria_enable, match_criteria)) + return ERR_PTR(-EINVAL); + if (ft->autogroup.active) return ERR_PTR(-EPERM); @@ -1102,43 +1177,38 @@ free_handle: return ERR_PTR(err); } -/* Assumed fg is locked */ -static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, - struct list_head **prev) -{ - struct fs_fte *fte; - unsigned int start = fg->start_index; - - if (prev) - *prev = &fg->node.children; - - /* assumed list is sorted by index */ - fs_for_each_fte(fte, fg) { - if (fte->index != start) - return start; - start++; - if (prev) - *prev = &fte->node.list; - } - - return start; -} - -/* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - struct mlx5_flow_act *flow_act, - struct list_head **prev) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_table *ft; struct fs_fte *fte; int index; + int ret; + + fs_get_obj(ft, fg->node.parent); + index = ida_simple_get(&ft->fte_allocator, fg->start_index, + fg->start_index + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); - index = get_free_fte_index(fg, prev); fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) - return fte; + if (IS_ERR(fte)) { + ret = PTR_ERR(fte); + goto err_alloc; + } + ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + if (ret) + goto err_hash; return fte; + +err_hash: + kfree(fte); +err_alloc: + ida_simple_remove(&ft->fte_allocator, index); + return ERR_PTR(ret); } static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, @@ -1226,79 +1296,104 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } +static bool check_conflicting_actions(u32 action1, u32 action2) +{ + u32 xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act->action, fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + return -EEXIST; + } + + return 0; +} + static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + struct fs_fte *fte) { struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct list_head *prev; - struct fs_fte *fte; int i; - nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); - fs_for_each_fte(fte, fg) { + if (fte) { + int old_action; + int ret; + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action)) { - int old_action = fte->action; - - if (fte->flow_tag != flow_act->flow_tag) { - mlx5_core_warn(get_dev(&fte->node), - "FTE flow tag %u already exists with different flow tag %u\n", - fte->flow_tag, - flow_act->flow_tag); - handle = ERR_PTR(-EEXIST); - goto unlock_fte; - } + ret = check_conflicting_ftes(fte, flow_act); + if (ret) { + handle = ERR_PTR(ret); + goto unlock_fte; + } - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - goto add_rules; - } + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action = old_action; + goto unlock_fte; + } else { + trace_mlx5_fs_set_fte(fte, false); + goto add_rules; } - unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); - if (fg->num_ftes >= fg->max_ftes) { - handle = ERR_PTR(-ENOSPC); - goto unlock_fg; - } - fte = create_fte(fg, match_value, flow_act, &prev); - if (IS_ERR(fte)) { - handle = (void *)fte; - goto unlock_fg; - } + fte = create_fte(fg, match_value, flow_act); + if (IS_ERR(fte)) + return (void *)fte; tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); + destroy_fte(fte, fg); kfree(fte); - goto unlock_fg; + return handle; } - fg->num_ftes++; - tree_add_node(&fte->node, &fg->node); - list_add(&fte->node.list, prev); + /* fte list isn't sorted */ + list_add_tail(&fte->node.list, &fg->node.children); + trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) + if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } } unlock_fte: unlock_ref_node(&fte->node); -unlock_fg: - unlock_ref_node(&fg->node); return handle; } @@ -1347,6 +1442,96 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, } static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct rhlist_head *tmp, *list; + struct match_list { + struct list_head list; + struct mlx5_flow_group *g; + } match_list, *iter; + LIST_HEAD(match_head); + + rcu_read_lock(); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head))) { + match_list.g = g; + list_add_tail(&match_list.list, &match_head); + continue; + } + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + + if (!curr_match) { + rcu_read_unlock(); + rule = ERR_PTR(-ENOMEM); + goto free_list; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head); + } + rcu_read_unlock(); + + /* Try to find a fg that already contains a matching fte */ + list_for_each_entry(iter, &match_head, list) { + struct fs_fte *fte; + + g = iter->g; + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (fte) { + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + + /* No group with matching fte found. Try to add a new fte to any + * matching fg. + */ + list_for_each_entry(iter, &match_head, list) { + g = iter->g; + + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, NULL); + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + +free_list: + if (!list_empty(&match_head)) { + struct match_list *match_tmp; + + /* The most common case is having one FG. Since we want to + * optimize this case, we save the first on the stack. + * Therefore, no need to free it. + */ + list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); + list_for_each_entry_safe(iter, match_tmp, &match_head, list) { + list_del(&iter->list); + kfree(iter); + } + } + + return rule; +} + +static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, @@ -1358,22 +1543,18 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; int i; + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - fs_for_each_fg(g, ft) - if (compare_match_criteria(g->mask.match_criteria_enable, - spec->match_criteria_enable, - g->mask.match_criteria, - spec->match_criteria)) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) - goto unlock; - } + rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); + if (!IS_ERR(rule)) + goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, spec->match_criteria); @@ -1382,7 +1563,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, NULL); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 990acee6fb09..5509a752f98e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CORE_ #include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> enum fs_node_type { FS_TYPE_NAMESPACE, @@ -118,6 +119,8 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + struct ida fte_allocator; + struct rhltable fgs_hash; }; struct mlx5_fc_cache { @@ -136,17 +139,29 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; - u16 id; + u32 id; bool deleted; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; - u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 flow_tag; u32 index; @@ -155,6 +170,7 @@ struct fs_fte { u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; + struct rhash_head hash; }; /* Type of children is mlx5_flow_table/namespace */ @@ -174,7 +190,7 @@ struct mlx5_flow_namespace { struct mlx5_flow_group_mask { u8 match_criteria_enable; - u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; }; /* Type of children is fs_fte */ @@ -183,8 +199,9 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; - u32 num_ftes; u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; }; struct mlx5_flow_root_namespace { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 6507d8acc54d..89d1f8650033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,6 +38,8 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) /* locking scheme: * @@ -90,16 +92,21 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(&counter->node, root); } +/* The function returns the last node that was queried so the caller + * function can continue calling it till all counters are queried. + */ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, struct mlx5_fc *first, - u16 last_id) + u32 last_id) { struct mlx5_cmd_fc_bulk *b; struct rb_node *node = NULL; - u16 afirst_id; + u32 afirst_id; int num; int err; - int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); /* first id must be aligned to 4 when using bulk query */ afirst_id = first->id & ~0x3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index eb04e97d8765..43c126c63955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -39,10 +39,11 @@ static void mlx5i_get_drvinfo(struct net_device *dev, struct mlx5e_priv *priv = mlx5i_epriv(dev); mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]", + sizeof(drvinfo->driver)); } -static void mlx5i_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -129,17 +130,123 @@ static int mlx5i_flash_device(struct net_device *netdev, return mlx5e_ethtool_flash_device(priv, flash); } +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + default: return -1; + } +} + +static int mlx5i_get_port_settings(struct net_device *netdev, + u16 *ib_link_width_oper, u16 *ib_proto_oper) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + int ret; + + ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); + if (ret) + return ret; + + *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} + +static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return -EINVAL; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return -EINVAL; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + if (speed < 0) + return -EINVAL; + + link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + link_ksettings->base.speed = speed; + + return 0; +} + const struct ethtool_ops mlx5i_ethtool_ops = { - .get_drvinfo = mlx5i_get_drvinfo, - .get_strings = mlx5i_get_strings, - .get_sset_count = mlx5i_get_sset_count, - .get_ethtool_stats = mlx5i_get_ethtool_stats, - .get_ringparam = mlx5i_get_ringparam, - .set_ringparam = mlx5i_set_ringparam, - .flash_device = mlx5i_flash_device, - .get_channels = mlx5i_get_channels, - .set_channels = mlx5i_set_channels, - .get_coalesce = mlx5i_get_coalesce, - .set_coalesce = mlx5i_set_coalesce, - .get_ts_info = mlx5i_get_ts_info, + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 000000000000..7cb67122e8b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + kfree(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + u32 index; + int err; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + err = -EEXIST; + goto abort; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto abort; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + free_l2table_index(mpfs, index); + err = -ENOMEM; + goto abort; + } + + l2addr->index = index; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) { + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + } + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); +abort: + mutex_unlock(&mpfs->lock); + return err; +} + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 000000000000..4a7b2c3203a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include <linux/if_ether.h> +#include <linux/mlx5/device.h> + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8c4b45ef539c..0d2c8dcd6eae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -54,9 +54,8 @@ #include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" -#ifdef CONFIG_MLX5_CORE_EN +#include "lib/mpfs.h" #include "eswitch.h" -#endif #include "lib/mlx5.h" #include "fpga/core.h" #include "accel/ipsec.h" @@ -788,7 +787,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -EOPNOTSUPP; } - static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { struct pci_dev *pdev = dev->pdev; @@ -897,13 +895,17 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_tables_cleanup; } -#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_mpfs_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + err = mlx5_eswitch_init(dev); if (err) { dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); - goto err_rl_cleanup; + goto err_mpfs_cleanup; } -#endif err = mlx5_sriov_init(dev); if (err) { @@ -922,13 +924,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) err_sriov_cleanup: mlx5_sriov_cleanup(dev); err_eswitch_cleanup: -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); - +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); err_rl_cleanup: -#endif mlx5_cleanup_rl_table(dev); - err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -946,9 +946,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fpga_cleanup(dev); mlx5_sriov_cleanup(dev); -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); -#endif + mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); @@ -1106,10 +1105,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fs; } -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_attach(dev->priv.eswitch); -#endif - err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); @@ -1152,9 +1147,6 @@ err_fpga_start: mlx5_sriov_detach(dev); err_sriov: -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); err_fs: @@ -1225,9 +1217,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fpga_device_stop(dev); mlx5_sriov_detach(dev); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); @@ -1258,7 +1247,7 @@ struct mlx5_core_event_handler { }; static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_CORE_EN +#ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, @@ -1298,6 +1287,9 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + INIT_LIST_HEAD(&priv->waiting_events_list); + priv->is_accum_events = false; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING err = init_srcu_struct(&priv->pfault_srcu); if (err) { @@ -1352,7 +1344,6 @@ clean_srcu: cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); return err; @@ -1379,7 +1370,6 @@ static void remove_one(struct pci_dev *pdev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&priv->pfault_srcu); #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 01d637dac533..b7c2900b75f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -43,6 +43,10 @@ #define DRIVER_VERSION "5.0-0" #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) extern uint mlx5_core_debug_mask; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 28d8472b36f1..6c48e9959b65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -34,9 +34,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" -#ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" -#endif bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) { @@ -90,14 +88,12 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return -EBUSY; } -#ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); return err; } -#endif for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); @@ -117,7 +113,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); - } return 0; @@ -130,11 +125,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) int vf; if (!sriov->enabled_vfs) -#ifdef CONFIG_MLX5_CORE_EN - goto disable_sriov_resources; -#else - return; -#endif + goto out; for (vf = 0; vf < sriov->num_vfs; vf++) { if (!sriov->vfs_ctx[vf].enabled) @@ -148,10 +139,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) sriov->enabled_vfs--; } -#ifdef CONFIG_MLX5_CORE_EN -disable_sriov_resources: +out: mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif if (mlx5_wait_for_vf_pages(dev)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 695adff89d71..d56eea310509 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -75,6 +75,7 @@ config MLXSW_SPECTRUM depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n + depends on IPV6 || IPV6=n select PARMAN select MLXFW default m diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 62fc42f396bb..891ff418bb5e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -16,8 +16,9 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ - spectrum_cnt.o spectrum_dpipe.o \ - spectrum_fid.o + spectrum_cnt.o spectrum_fid.o \ + spectrum_ipip.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index affe84eb4bff..9d5e7cf288be 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -667,7 +667,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, int err; dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + tid, reg->id, mlxsw_reg_id_str(reg->id), mlxsw_core_reg_access_type_str(type)); skb = mlxsw_emad_alloc(mlxsw_core, reg->len); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 9807ef814e42..f6963b0b4a55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -57,6 +57,9 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1bd34d9a7b9e..cc27c5de5a1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5,6 +5,7 @@ * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -3679,15 +3680,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, }; /* reg_htgt_trap_group @@ -3952,10 +3955,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); */ MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); -static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) { MLXSW_REG_ZERO(rgcr, payload); mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); } /* RITR - Router Interface Table Register @@ -3988,16 +3993,18 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); enum mlxsw_reg_ritr_if_type { + /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, + /* FID interface. */ MLXSW_REG_RITR_FID_IF, + /* Sub-port interface. */ MLXSW_REG_RITR_SP_IF, + /* Loopback Interface. */ + MLXSW_REG_RITR_LOOPBACK_IF, }; /* reg_ritr_type - * Router interface type. - * 0 - VLAN interface. - * 1 - FID interface. - * 2 - Sub-port interface. + * Router interface type as per enum mlxsw_reg_ritr_if_type. * Access: RW */ MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); @@ -4125,6 +4132,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); */ MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); +/* Loopback Interface */ + +enum mlxsw_reg_ritr_loopback_protocol { + /* IPinIP IPv4 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, + /* IPinIP IPv6 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, +}; + +/* reg_ritr_loopback_protocol + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4); + +enum mlxsw_reg_ritr_loopback_ipip_type { + /* Tunnel is IPinIP. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP, + /* Tunnel is GRE, no key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP, + /* Tunnel is GRE, with a key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP, +}; + +/* reg_ritr_loopback_ipip_type + * Encapsulation type. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4); + +enum mlxsw_reg_ritr_loopback_ipip_options { + /* The key is defined by gre_key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, +}; + +/* reg_ritr_loopback_ipip_options + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); + +/* reg_ritr_loopback_ipip_uvr + * Underlay Virtual Router ID. + * Range is 0..cap_max_virtual_routers-1. + * Reserved for Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); + +/* reg_ritr_loopback_ipip_usip* + * Encapsulation Underlay source IP. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16); +MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32); + +/* reg_ritr_loopback_ipip_gre_key + * GRE Key. + * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32); + /* Shared between ingress/egress */ enum mlxsw_reg_ritr_counter_set_type { /* No Count. */ @@ -4195,24 +4263,54 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, enum mlxsw_reg_ritr_if_type type, - u16 rif, u16 vr_id, u16 mtu, - const char *mac) + u16 rif, u16 vr_id, u16 mtu) { bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; MLXSW_REG_ZERO(ritr, payload); mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); +} + +static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac) +{ mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } +static inline void +mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); + mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); + mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 usip, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_protocol_set(payload, + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); + mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, + uvr_id, gre_key); + mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -4268,6 +4366,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); */ MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); +enum mlxsw_reg_ratr_type { + /* Ethernet */ + MLXSW_REG_RATR_TYPE_ETHERNET, + /* IPoIB Unicast without GRH. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC, + /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast + * adjacency). + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH, + /* IPoIB Multicast. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_MC, + /* MPLS. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_MPLS, + /* IPinIP Encap. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_IPIP, +}; + +/* reg_ratr_type + * Adjacency entry type. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4); + /* reg_ratr_adjacency_index_low * Bits 15:0 of index into the adjacency table. * For SwitchX and SwitchX-2, the adjacency table is linear and @@ -4297,17 +4427,17 @@ enum mlxsw_reg_ratr_trap_action { */ MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); -enum mlxsw_reg_ratr_trap_id { - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, -}; - /* reg_ratr_adjacency_index_high * Bits 23:16 of the adjacency_index. * Access: Index */ MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1, +}; + /* reg_ratr_trap_id * Trap ID to be reported to CPU. * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. @@ -4322,14 +4452,44 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); */ MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); +enum mlxsw_reg_ratr_ipip_type { + /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV4, + /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV6, +}; + +/* reg_ratr_ipip_type + * Underlay destination ip type. + * Note: the type field must match the protocol of the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4); + +/* reg_ratr_ipip_ipv4_udip + * Underlay ipv4 dip. + * Reserved when ipip_type is IPv6. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); + +/* reg_ratr_ipip_ipv6_ptr + * Pointer to IPv6 underlay destination ip address. + * For Spectrum: Pointer to KVD linear space. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, + enum mlxsw_reg_ratr_type type, u32 adjacency_index, u16 egress_rif) { MLXSW_REG_ZERO(ratr, payload); mlxsw_reg_ratr_op_set(payload, op); mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_type_set(payload, type); mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); @@ -4341,6 +4501,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); } +static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) +{ + mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4); + mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -4712,12 +4878,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); /* reg_ralue_dip* * The prefix of the route or of the marker that the object of the LPM * is compared with. The most significant bits of the dip are the prefix. - * The list significant bits must be '0' if the prefix_len is smaller + * The least significant bits must be '0' if the prefix_len is smaller * than 128 for IPv6 or smaller than 32 for IPv4. * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. * Access: Index */ MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); enum mlxsw_reg_ralue_entry_type { MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, @@ -4806,7 +4973,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); */ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); -/* reg_ralue_v +/* reg_ralue_ip2me_v * Valid bit for the tunnel_ptr field. * If valid = 0 then trap to CPU as IP2ME trap ID. * If valid = 1 and the packet format allows NVE or IPinIP tunnel @@ -4816,15 +4983,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); +MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1); -/* reg_ralue_tunnel_ptr +/* reg_ralue_ip2me_tunnel_ptr * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. * For Spectrum, pointer to KVD Linear. * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); +MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24); static inline void mlxsw_reg_ralue_pack(char *payload, enum mlxsw_reg_ralxx_protocol protocol, @@ -4851,6 +5018,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, mlxsw_reg_ralue_dip4_set(payload, dip); } +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + static inline void mlxsw_reg_ralue_act_remote_pack(char *payload, enum mlxsw_reg_ralue_trap_action trap_action, @@ -4883,6 +5060,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload) MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); } +static inline void +mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); + mlxsw_reg_ralue_ip2me_v_set(payload, 1); + mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr); +} + /* RAUHT - Router Algorithmic LPM Unicast Host Table Register * ---------------------------------------------------------- * The RAUHT register is used to configure and query the Unicast Host table in @@ -4954,6 +5140,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); * Access: Index */ MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); enum mlxsw_reg_rauht_trap_action { MLXSW_REG_RAUHT_TRAP_ACTION_NOP, @@ -4982,6 +5169,15 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW @@ -5018,6 +5214,23 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, mlxsw_reg_rauht_dip4_set(payload, dip); } +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + +static inline void mlxsw_reg_rauht_pack_counter(char *payload, + u64 counter_index) +{ + mlxsw_reg_rauht_counter_index_set(payload, counter_index); + mlxsw_reg_rauht_counter_set_type_set(payload, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); +} + /* RALEU - Router Algorithmic LPM ECMP Update Register * --------------------------------------------------- * The register enables updating the ECMP section in the action for multiple @@ -5216,6 +5429,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, int ent_index, u16 *p_rif, u32 *p_dip) @@ -5224,6 +5461,141 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); } +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + +/* RTDP - Routing Tunnel Decap Properties Register + * ----------------------------------------------- + * The RTDP register is used for configuring the tunnel decap properties of NVE + * and IPinIP. + */ +#define MLXSW_REG_RTDP_ID 0x8020 +#define MLXSW_REG_RTDP_LEN 0x44 + +MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN); + +enum mlxsw_reg_rtdp_type { + MLXSW_REG_RTDP_TYPE_NVE, + MLXSW_REG_RTDP_TYPE_IPIP, +}; + +/* reg_rtdp_type + * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); + +/* reg_rtdp_tunnel_index + * Index to the Decap entry. + * For Spectrum, Index to KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); + +/* IPinIP */ + +/* reg_rtdp_ipip_irif + * Ingress Router Interface for the overlay router + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16); + +enum mlxsw_reg_rtdp_ipip_sip_check { + /* No sip checks. */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO, + /* Filter packet if underlay is not IPv4 or if underlay SIP does not + * equal ipv4_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + /* Filter packet if underlay is not IPv6 or if underlay SIP does not + * equal ipv6_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3, +}; + +/* reg_rtdp_ipip_sip_check + * SIP check to perform. If decapsulation failed due to these configurations + * then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3); + +/* If set, allow decapsulation of IPinIP (without GRE). */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0) +/* If set, allow decapsulation of IPinGREinIP without a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1) +/* If set, allow decapsulation of IPinGREinIP with a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2) + +/* reg_rtdp_ipip_type_check + * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to + * these configurations then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3); + +/* reg_rtdp_ipip_gre_key_check + * Whether GRE key should be checked. When check is enabled: + * - A packet received as IPinIP (without GRE) will always pass. + * - A packet received as IPinGREinIP without a key will not pass the check. + * - A packet received as IPinGREinIP with a key will pass the check only if the + * key in the packet is equal to expected_gre_key. + * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1); + +/* reg_rtdp_ipip_ipv4_usip + * Underlay IPv4 address for ipv4 source address check. + * Reserved when sip_check is not '1'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32); + +/* reg_rtdp_ipip_ipv6_usip_ptr + * This field is valid when sip_check is "sipv6 check explicitly". This is a + * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index + * is to the KVD linear. + * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24); + +/* reg_rtdp_ipip_expected_gre_key + * GRE key for checking. + * Reserved when gre_key_check is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32); + +static inline void mlxsw_reg_rtdp_pack(char *payload, + enum mlxsw_reg_rtdp_type type, + u32 tunnel_index) +{ + MLXSW_REG_ZERO(rtdp, payload); + mlxsw_reg_rtdp_type_set(payload, type); + mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index); +} + +static inline void +mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 ipv4_usip, u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_irif_set(payload, irif); + mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check); + mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check); + mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check); + mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip); + mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -5982,15 +6354,6 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); -enum mlxsw_reg_mgpc_counter_set_type { - /* No count */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_mgpc_counter_set_type * Counter set type. * Access: OP @@ -6030,7 +6393,7 @@ MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, enum mlxsw_reg_mgpc_opcode opcode, - enum mlxsw_reg_mgpc_counter_set_type set_type) + enum mlxsw_reg_flow_counter_set_type set_type) { MLXSW_REG_ZERO(mgpc, payload); mlxsw_reg_mgpc_counter_index_set(payload, counter_index); @@ -6494,6 +6857,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rgcr), MLXSW_REG(ritr), MLXSW_REG(ratr), + MLXSW_REG(rtdp), MLXSW_REG(ricnt), MLXSW_REG(ralta), MLXSW_REG(ralst), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c6a3e61b53bd..ed7cd6c48019 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -58,6 +58,7 @@ #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include <net/tc_act/tc_sample.h> +#include <net/addrconf.h> #include "spectrum.h" #include "pci.h" @@ -381,12 +382,14 @@ int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); if (err) return err; - *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); - *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + if (packets) + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + if (bytes) + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); return 0; } @@ -396,7 +399,7 @@ static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, char mgpc_pl[MLXSW_REG_MGPC_LEN]; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); } @@ -1616,16 +1619,16 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - __be16 protocol, - struct tc_cls_matchall_offload *cls, + struct tc_cls_matchall_offload *f, bool ingress) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + __be16 protocol = f->common.protocol; const struct tc_action *a; LIST_HEAD(actions); int err; - if (!tc_single_action(cls->exts)) { + if (!tcf_exts_has_one_action(f->exts)) { netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); return -EOPNOTSUPP; } @@ -1633,9 +1636,9 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); if (!mall_tc_entry) return -ENOMEM; - mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(cls->exts, &actions); + tcf_exts_to_list(f->exts, &actions); a = list_first_entry(&actions, struct tc_action, list); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { @@ -1647,7 +1650,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mirror, a, ingress); } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) { mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE; - err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls, + err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f, a, ingress); } else { err = -EOPNOTSUPP; @@ -1665,12 +1668,12 @@ err_add_action: } static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *cls) + struct tc_cls_matchall_offload *f) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port, - cls->cookie); + f->cookie); if (!mall_tc_entry) { netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); return; @@ -1692,49 +1695,72 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, kfree(mall_tc_entry); } -static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *f) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + bool ingress; - if (chain_index) + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else return -EOPNOTSUPP; - switch (tc->type) { - case TC_SETUP_MATCHALL: - switch (tc->cls_mall->command) { - case TC_CLSMATCHALL_REPLACE: - return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, - proto, - tc->cls_mall, - ingress); - case TC_CLSMATCHALL_DESTROY: - mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, - tc->cls_mall); - return 0; - default: - return -EOPNOTSUPP; - } - case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, - proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, - tc->cls_flower); - return 0; - case TC_CLSFLOWER_STATS: - return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, - tc->cls_flower); - default: - return -EOPNOTSUPP; - } + if (f->common.chain_index) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f, + ingress); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_flower_offload *f) +{ + bool ingress; + + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSFLOWER_REPLACE: + return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f); + case TC_CLSFLOWER_DESTROY: + mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f); + return 0; + case TC_CLSFLOWER_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f); + default: + return -EOPNOTSUPP; } +} - return -EOPNOTSUPP; +static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data); + case TC_SETUP_CLSFLOWER: + return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data); + default: + return -EOPNOTSUPP; + } } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -3333,15 +3359,48 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), /* L3 traps */ - MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false), - MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), - MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), - MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), - MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), + MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -3376,15 +3435,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: rate = 16 * 1024; burst_size = 10; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: rate = 1024; burst_size = 7; break; @@ -3433,21 +3494,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) priority = 5; tc = 5; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: priority = 4; tc = 4; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: priority = 3; tc = 3; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: priority = 1; @@ -3694,7 +3757,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_fids_fini(mlxsw_sp); } -static struct mlxsw_config_profile mlxsw_sp_config_profile = { +static const struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, @@ -4363,6 +4426,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, +}; + static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; @@ -4383,6 +4450,7 @@ static int __init mlxsw_sp_module_init(void) register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); @@ -4399,6 +4467,7 @@ err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4409,6 +4478,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5ef98d4d0ab6..84ce83acdc19 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_SUBPORT, MLXSW_SP_RIF_TYPE_VLAN, MLXSW_SP_RIF_TYPE_FID, + MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */ MLXSW_SP_RIF_TYPE_MAX, }; @@ -384,6 +385,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); void @@ -415,6 +418,7 @@ struct mlxsw_sp_acl_profile_ops { int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, struct net_device *dev, bool ingress); void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + u16 (*ruleset_group_id)(void *ruleset_priv); size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -438,11 +442,16 @@ struct mlxsw_sp_acl_ruleset; /* spectrum_acl.c */ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile); +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile); void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset); +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset); struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl); @@ -506,7 +515,7 @@ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops; /* spectrum_flower.c */ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 01a1501b56ca..4b2455e3e079 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -74,6 +74,7 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) struct mlxsw_sp_acl_ruleset_ht_key { struct net_device *dev; /* dev this ruleset is bound to */ bool ingress; + u32 chain_index; const struct mlxsw_sp_acl_profile_ops *ops; }; @@ -163,7 +164,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, - struct net_device *dev, bool ingress) + struct net_device *dev, bool ingress, + u32 chain_index) { const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; @@ -171,13 +173,20 @@ static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, ruleset->ht_key.dev = dev; ruleset->ht_key.ingress = ingress; + ruleset->ht_key.chain_index = chain_index; err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); if (err) return err; - err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); - if (err) - goto err_ops_ruleset_bind; + if (!ruleset->ht_key.chain_index) { + /* We only need ruleset with chain index 0, the implicit one, + * to be directly bound to device. The rest of the rulesets + * are bound by "Goto action set". + */ + err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); + if (err) + goto err_ops_ruleset_bind; + } return 0; err_ops_ruleset_bind: @@ -192,7 +201,8 @@ static void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - ops->ruleset_unbind(mlxsw_sp, ruleset->priv); + if (!ruleset->ht_key.chain_index) + ops->ruleset_unbind(mlxsw_sp, ruleset->priv); rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); } @@ -211,14 +221,48 @@ static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); } +static struct mlxsw_sp_acl_ruleset * +__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl, struct net_device *dev, + bool ingress, u32 chain_index, + const struct mlxsw_sp_acl_profile_ops *ops) +{ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + + memset(&ht_key, 0, sizeof(ht_key)); + ht_key.dev = dev; + ht_key.ingress = ingress; + ht_key.chain_index = chain_index; + ht_key.ops = ops; + return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, + mlxsw_sp_acl_ruleset_ht_params); +} + struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + + ops = acl->ops->profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); + if (!ruleset) + return ERR_PTR(-ENOENT); + return ruleset; +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile) { const struct mlxsw_sp_acl_profile_ops *ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - struct mlxsw_sp_acl_ruleset_ht_key ht_key; struct mlxsw_sp_acl_ruleset *ruleset; int err; @@ -226,12 +270,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, if (!ops) return ERR_PTR(-EINVAL); - memset(&ht_key, 0, sizeof(ht_key)); - ht_key.dev = dev; - ht_key.ingress = ingress; - ht_key.ops = ops; - ruleset = rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, - mlxsw_sp_acl_ruleset_ht_params); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); if (ruleset) { mlxsw_sp_acl_ruleset_ref_inc(ruleset); return ruleset; @@ -239,7 +279,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, ruleset = mlxsw_sp_acl_ruleset_create(mlxsw_sp, ops); if (IS_ERR(ruleset)) return ruleset; - err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, ingress); + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, + ingress, chain_index); if (err) goto err_ruleset_bind; return ruleset; @@ -255,6 +296,13 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + return ops->ruleset_group_id(ruleset->priv); +} + static int mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) @@ -369,7 +417,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port = mlxsw_sp_port->local_port; in_port = false; } else { - /* If out_dev is NULL, the called wants to + /* If out_dev is NULL, the caller wants to * set forward to ingress port. */ local_port = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 85d5001a5818..fb8031828454 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 61a10f166f97..50b40de1fb91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -295,6 +295,12 @@ mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); } +static u16 +mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) +{ + return group->id; +} + static unsigned int mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) { @@ -984,6 +990,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { @@ -1060,6 +1069,14 @@ mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group); } +static u16 +mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->group); +} + static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1096,6 +1113,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del, .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index af2c65a3fd9f..51e6846da72b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -74,6 +74,9 @@ static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { &mlxsw_sp_dpipe_header_metadata, + &devlink_dpipe_header_ethernet, + &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, }; static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { @@ -114,26 +117,6 @@ static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, return devlink_dpipe_match_put(skb, &match); } -static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry) -{ - unsigned int value_count, value_index; - struct devlink_dpipe_value *value; - - value = entry->action_values; - value_count = entry->action_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } - - value = entry->match_values; - value_count = entry->match_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } -} - static void mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, struct devlink_dpipe_action *action) @@ -215,8 +198,8 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, - struct devlink_dpipe_dump_ctx *dump_ctx) +mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) { struct devlink_dpipe_value match_value, action_value; struct devlink_dpipe_action action = {0}; @@ -270,16 +253,16 @@ start_again: goto start_again; rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return 0; err_entry_append: err_entry_get: rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return err; } -static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) +static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) { struct mlxsw_sp *mlxsw_sp = priv; int i; @@ -301,24 +284,29 @@ static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) return 0; } +static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +} + static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, - .entries_dump = mlxsw_sp_table_erif_entries_dump, - .counters_set_update = mlxsw_sp_table_erif_counters_update, + .entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update, + .size_get = mlxsw_sp_dpipe_table_erif_size_get, }; static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - u64 table_size; - table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); return devlink_dpipe_table_register(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF, &mlxsw_sp_erif_ops, - mlxsw_sp, table_size, - false); + mlxsw_sp, false); } static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) @@ -328,6 +316,516 @@ static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); } +static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return devlink_dpipe_match_put(skb, &match); +} + +static int +mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET); +} + +static int +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + return devlink_dpipe_action_put(skb, &action); +} + +enum mlxsw_sp_dpipe_table_host_match { + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } + + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; +} + +static int +mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + + match_value->match = match; + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) +{ + struct devlink_dpipe_value *value; + u32 *rif_value; + u8 *ha_value; + + /* Set Match RIF index */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + rif_value = value->value; + *rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; + + /* Set Match DIP */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + memcpy(value->value, dip, value->value_size); + + /* Set Action DMAC */ + value = entry->action_values; + ha_value = value->value; + ether_addr_copy(ha_value, ha); +} + +static void +mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + unsigned char *ha; + u32 dip; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); +} + +static void +mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif, + int type) +{ + int err; + + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, + &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + int rif_neigh_count = 0; + int rif_neigh_skip = 0; + int neigh_count = 0; + int rif_count; + int i, j; + int err; + + rtnl_lock(); + i = 0; + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + rif_neigh_skip = rif_neigh_count; + for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + + rif_neigh_count = 0; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + if (rif_neigh_count < rif_neigh_skip) + goto skip; + + mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry, + neigh_entry, rif, + type); + entry->index = neigh_count; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + else + goto out; + } + goto err_entry_append; + } + neigh_count++; + j++; +skip: + rif_neigh_count++; + } + rif_neigh_skip = 0; + } +out: + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + + rtnl_unlock(); + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_value action_value; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_entry entry = {0}; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(match_values[0])); + memset(&action_value, 0, sizeof(action_value)); + + mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type); + err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values, + matches, &action_value, + &action, type); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx, + type); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int +mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET); +} + +static void +mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, + bool enable, int type) +{ + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, + neigh_entry, + enable); + } + } + rtnl_unlock(); +} + +static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET); + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) +{ + u64 size = 0; + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + size++; + } + } + rtnl_unlock(); + + return size; +} + +static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, + .size_get = mlxsw_sp_dpipe_table_host4_size_get, +}; + +static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4, + &mlxsw_sp_host4_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4); +} + +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -339,10 +837,22 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) return err; err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); if (err) - goto err_erif_register; + goto err_erif_table_init; + + err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); + if (err) + goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; return 0; -err_erif_register: +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); +err_host4_table_init: + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); +err_erif_table_init: devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); return err; } @@ -351,6 +861,8 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); devlink_dpipe_headers_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index d2089298cba3..283fde4e6783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -35,9 +35,26 @@ #ifndef _MLXSW_PIPELINE_H_ #define _MLXSW_PIPELINE_H_ +#if IS_ENABLED(CONFIG_NET_DEVLINK) + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); +#else + +static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} + +static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ +} + +#endif + #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 6afbe9ec64e2..bbd238e50f05 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, - [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, @@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int *mlxsw_sp_packet_type_sfgc_types[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 21bb2bf62d3e..8aace9a06a5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -45,7 +45,7 @@ #include "core_acl_flex_keys.h" static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tcf_exts *exts) { @@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return 0; /* Count action is inserted first */ @@ -71,6 +71,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_rulei_act_trap(rulei); if (err) return err; + } else if (is_tcf_gact_goto_chain(a)) { + u32 chain_index = tcf_gact_goto_chain_index(a); + struct mlxsw_sp_acl_ruleset *ruleset; + u16 group_id; + + ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, dev, + ingress, + chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + mlxsw_sp_acl_rulei_act_jump(rulei, group_id); } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; @@ -212,11 +226,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u16 n_proto) +{ + struct flow_dissector_key_ip *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + key->ttl, mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + key->tos & 0x3, mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + key->tos >> 6, mask->tos >> 6); + + return 0; +} + static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + u16 n_proto_mask = 0; + u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; @@ -229,12 +278,13 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; } - mlxsw_sp_acl_rulei_priority(rulei, f->prio); + mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = @@ -253,8 +303,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_BASIC, f->mask); - u16 n_proto_key = ntohs(key->n_proto); - u16 n_proto_mask = ntohs(mask->n_proto); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -324,11 +374,16 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; - return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; + + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, ingress, + rulei, f->exts); } int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; @@ -338,6 +393,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, dev, ingress, + f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); @@ -349,7 +405,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, } rulei = mlxsw_sp_acl_rule_rulei(rule); - err = mlxsw_sp_flower_parse(mlxsw_sp, dev, rulei, f); + err = mlxsw_sp_flower_parse(mlxsw_sp, dev, ingress, rulei, f); if (err) goto err_flower_parse; @@ -381,7 +437,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct mlxsw_sp_acl_rule *rule; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return; @@ -407,7 +463,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (WARN_ON(IS_ERR(ruleset))) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c new file mode 100644 index 000000000000..702fe945227c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -0,0 +1,214 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/ip_tunnels.h> + +#include "spectrum_ipip.h" + +static bool +mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.i_flags & TUNNEL_KEY); +} + +static bool +mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.o_flags & TUNNEL_KEY); +} + +static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ? + be32_to_cpu(tun->parms.i_key) : 0; +} + +static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + be32_to_cpu(tun->parms.o_key) : 0; +} + +static int +mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, MLXSW_REG_RATR_TYPE_IPIP, + adj_index, rif_index); + mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, + u32 tunnel_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev); + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev); + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + unsigned int type_check; + u32 daddr4; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + + type_check = has_ikey ? + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE; + + /* Linux demuxes tunnels based on packet SIP (which must match tunnel + * remote IP). Thus configure decap so that it filters out packets that + * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is + * generated for packets that fail this criterion. Linux then handles + * such packets in slow path and generates ICMP destination unreachable. + */ + daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev)); + mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index, + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + type_check, has_ikey, daddr4, ikey); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp, + u32 dip, u8 prefix_len, u16 ul_vr_id, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op, + ul_vr_id, prefix_len, dip); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb); + __be32 dip; + int err; + + err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index, + ipip_entry); + if (err) + return err; + + dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->ol_dev).addr4; + return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip), + 32, ul_vr_id, op, + tunnel_index); +} + +static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev); + union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev); + union mlxsw_sp_l3addr naddr = {0}; + + /* Tunnels with unset local or remote address are valid in Linux and + * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access + * (NBMA) tunnels. In principle these can be offloaded, but the driver + * currently doesn't support this. So punt. + */ + return memcmp(&saddr, &naddr, sizeof(naddr)) && + memcmp(&daddr, &naddr, sizeof(naddr)); +} + +static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto) +{ + struct ip_tunnel *tunnel = netdev_priv(ol_dev); + __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + bool inherit_ttl = tunnel->parms.iph.ttl == 0; + bool inherit_tos = tunnel->parms.iph.tos & 0x1; + + return (tunnel->parms.i_flags & ~okflags) == 0 && + (tunnel->parms.o_flags & ~okflags) == 0 && + inherit_ttl && inherit_tos && + mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + + lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; + return (struct mlxsw_sp_rif_ipip_lb_config){ + .lb_ipipt = lb_ipipt, + .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), + .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, + .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ol_dev), + }; +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { + .dev_type = ARPHRD_IPGRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV4, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4, + .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4, + .can_offload = mlxsw_sp_ipip_can_offload_gre4, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h new file mode 100644 index 000000000000..1c2db831d83b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -0,0 +1,79 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_IPIP_H_ +#define _MLXSW_IPIP_H_ + +#include "spectrum_router.h" +#include <net/ip_fib.h> + +enum mlxsw_sp_ipip_type { + MLXSW_SP_IPIP_TYPE_GRE4, + MLXSW_SP_IPIP_TYPE_MAX, +}; + +struct mlxsw_sp_ipip_entry { + enum mlxsw_sp_ipip_type ipipt; + struct net_device *ol_dev; /* Overlay. */ + struct mlxsw_sp_rif_ipip_lb *ol_lb; + unsigned int ref_count; /* Number of next hops using the tunnel. */ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct list_head ipip_list_node; +}; + +struct mlxsw_sp_ipip_ops { + int dev_type; + enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ + + int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry); + + bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto); + + /* Return a configuration for creating an overlay loopback RIF. */ + struct mlxsw_sp_rif_ipip_lb_config + (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev); + + int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index); +}; + +extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[]; + +#endif /* _MLXSW_IPIP_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4b2e0fd7d51e..f0fb898533fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,18 +44,27 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/socket.h> +#include <linux/route.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/ip6_fib.h> #include <net/fib_rules.h> +#include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> +#include <net/fib_notifier.h> #include "spectrum.h" #include "core.h" #include "reg.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_ipip.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -79,9 +89,11 @@ struct mlxsw_sp_router { struct delayed_work nexthop_probe_dw; #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_neighs_list; + struct list_head ipip_list; bool aborted; struct notifier_block fib_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; struct mlxsw_sp_rif { @@ -122,6 +134,17 @@ struct mlxsw_sp_rif_subport { bool lag; }; +struct mlxsw_sp_rif_ipip_lb { + struct mlxsw_sp_rif common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; + u16 ul_vr_id; /* Reserved for Spectrum-2. */ +}; + +struct mlxsw_sp_rif_params_ipip_lb { + struct mlxsw_sp_rif_params common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; +}; + struct mlxsw_sp_rif_ops { enum mlxsw_sp_rif_type type; size_t rif_size; @@ -304,7 +327,7 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) struct mlxsw_sp_prefix_usage { DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); @@ -314,19 +337,6 @@ struct mlxsw_sp_prefix_usage { for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) static bool -mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, - struct mlxsw_sp_prefix_usage *prefix_usage2) -{ - unsigned char prefix; - - mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { - if (!test_bit(prefix, prefix_usage2->b)) - return false; - } - return true; -} - -static bool mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, struct mlxsw_sp_prefix_usage *prefix_usage2) { @@ -371,6 +381,14 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + + /* This is a special case of local delivery, where a packet should be + * decapsulated on reception. Note that there is no corresponding ENCAP, + * because that's a type of next hop, not of FIB entry. (There can be + * several next hops in a REMOTE entry, and some of them may be + * encapsulating entries.) + */ + MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, }; struct mlxsw_sp_nexthop_group; @@ -384,11 +402,9 @@ struct mlxsw_sp_fib_node { struct mlxsw_sp_fib_key key; }; -struct mlxsw_sp_fib_entry_params { - u32 tb_id; - u32 prio; - u8 tos; - u8 type; +struct mlxsw_sp_fib_entry_decap { + struct mlxsw_sp_ipip_entry *ipip_entry; + u32 tunnel_index; }; struct mlxsw_sp_fib_entry { @@ -397,13 +413,26 @@ struct mlxsw_sp_fib_entry { enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; - struct mlxsw_sp_fib_entry_params params; - bool offloaded; + struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ +}; + +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + +struct mlxsw_sp_fib6_entry { + struct mlxsw_sp_fib_entry common; + struct list_head rt6_list; + unsigned int nrt6; }; -enum mlxsw_sp_l3proto { - MLXSW_SP_L3_PROTO_IPV4, - MLXSW_SP_L3_PROTO_IPV6, +struct mlxsw_sp_rt6 { + struct list_head list; + struct rt6_info *rt; }; struct mlxsw_sp_lpm_tree { @@ -428,6 +457,7 @@ struct mlxsw_sp_vr { u32 tb_id; /* kernel fib table id */ unsigned int rif_count; struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -487,15 +517,15 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } -static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { char ralta_pl[MLXSW_REG_RALTA_LEN]; mlxsw_reg_ralta_pack(ralta_pl, false, (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, lpm_tree->id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } static int @@ -551,10 +581,10 @@ err_left_struct_set: return ERR_PTR(err); } -static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { - return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -571,24 +601,21 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) - goto inc_ref_count; + return lpm_tree; } - lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, - proto); - if (IS_ERR(lpm_tree)) - return lpm_tree; + return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); +} -inc_ref_count: +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) +{ lpm_tree->ref_count++; - return lpm_tree; } -static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { if (--lpm_tree->ref_count == 0) - return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); - return 0; + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -625,7 +652,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4; + return !!vr->fib4 || !!vr->fib6; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -642,13 +669,13 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib *fib) + const struct mlxsw_sp_fib *fib, u8 tree_id) { char raltb_pl[MLXSW_REG_RALTB_LEN]; mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, (enum mlxsw_reg_ralxx_protocol) fib->proto, - fib->lpm_tree->id); + tree_id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -694,7 +721,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, case MLXSW_SP_L3_PROTO_IPV4: return vr->fib4; case MLXSW_SP_L3_PROTO_IPV6: - BUG_ON(1); + return vr->fib6; } return NULL; } @@ -703,6 +730,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; + int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) @@ -710,54 +738,26 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); + vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(vr->fib6)) { + err = PTR_ERR(vr->fib6); + goto err_fib6_create; + } vr->tb_id = tb_id; return vr; -} -static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) -{ +err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; + return ERR_PTR(err); } -static int -mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, - struct mlxsw_sp_prefix_usage *req_prefix_usage) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree; - struct mlxsw_sp_lpm_tree *new_tree; - int err; - - if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) - return 0; - - new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, - fib->proto); - if (IS_ERR(new_tree)) { - /* We failed to get a tree according to the required - * prefix usage. However, the current tree might be still good - * for us if our requirement is subset of the prefixes used - * in the tree. - */ - if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, - &lpm_tree->prefix_usage)) - return 0; - return PTR_ERR(new_tree); - } - - /* Prevent packet loss by overwriting existing binding */ - fib->lpm_tree = new_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - - return 0; - -err_tree_bind: - fib->lpm_tree = lpm_tree; - mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); - return err; + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; } static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) @@ -773,10 +773,105 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list)) mlxsw_sp_vr_destroy(vr); } +static bool +mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto, u8 tree_id) +{ + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + + if (!mlxsw_sp_vr_is_used(vr)) + return false; + if (fib->lpm_tree && fib->lpm_tree->id == tree_id) + return true; + return false; +} + +static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + int err; + + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + return 0; +} + +static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + enum mlxsw_sp_l3proto proto = fib->proto; + u8 old_id, new_id = new_tree->id; + struct mlxsw_sp_vr *vr; + int i, err; + + if (!old_tree) + goto no_replace; + old_id = old_tree->id; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) + continue; + err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + new_tree); + if (err) + goto err_tree_replace; + } + + return 0; + +err_tree_replace: + for (i--; i >= 0; i--) { + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id)) + continue; + mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + old_tree); + } + return err; + +no_replace: + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + return 0; +} + +static void +mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto proto, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + unsigned char prefix; + + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage) + mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix); + } +} + static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_vr *vr; @@ -816,6 +911,374 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->vrs); } +static struct net_device * +__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + struct net *net = dev_net(ol_dev); + + return __dev_get_by_index(net, tun->parms.link); +} + +static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +{ + struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + + if (d) + return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + else + return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params); + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_rif_params_ipip_lb lb_params; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_rif *rif; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { + .common.dev = ol_dev, + .common.lag = false, + .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), + }; + + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common); + if (IS_ERR(rif)) + return ERR_CAST(rif); + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ret = NULL; + + ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); + if (!ipip_entry) + return ERR_PTR(-ENOMEM); + + ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry->ol_lb)) { + ret = ERR_CAST(ipip_entry->ol_lb); + goto err_ol_ipip_lb_create; + } + + ipip_entry->ipipt = ipipt; + ipip_entry->ol_dev = ol_dev; + + return ipip_entry; + +err_ol_ipip_lb_create: + kfree(ipip_entry); + return ret; +} + +static void +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +{ + WARN_ON(ipip_entry->ref_count > 0); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + kfree(ipip_entry); +} + +static __be32 +mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.saddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.daddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} + +static bool +mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, + const enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + union mlxsw_sp_l3addr tun_saddr; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + return tun_ul_tb_id == ul_tb_id && + mlxsw_sp_l3addr_eq(&tun_saddr, &saddr); +} + +static int +mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tunnel_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index); + if (err) + return err; + + ipip_entry->decap_fib_entry = fib_entry; + fib_entry->decap.ipip_entry = ipip_entry; + fib_entry->decap.tunnel_index = tunnel_index; + return 0; +} + +static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + /* Unlink this node from the IPIP entry that it's the decap entry of. */ + fib_entry->decap.ipip_entry->decap_fib_entry = NULL; + fib_entry->decap.ipip_entry = NULL; + mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static void +mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry; + + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + +static void +mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct mlxsw_sp_fib_entry *decap_fib_entry) +{ + if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry, + ipip_entry)) + return; + decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + + if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry)) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +/* Given an IPIP entry, find the corresponding decap route. */ +static struct mlxsw_sp_fib_entry * +mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + static struct mlxsw_sp_fib_node *fib_node; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_fib_entry *fib_entry; + unsigned char saddr_prefix_len; + union mlxsw_sp_l3addr saddr; + struct mlxsw_sp_fib *ul_fib; + struct mlxsw_sp_vr *ul_vr; + const void *saddrp; + size_t saddr_len; + u32 ul_tb_id; + u32 saddr4; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id); + if (!ul_vr) + return NULL; + + ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto); + saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto, + ipip_entry->ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(saddr.addr4); + saddrp = &saddr4; + saddr_len = 4; + saddr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, + saddr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + return NULL; + + return fib_entry; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry->ol_dev == ol_dev) + goto inc_ref_count; + + /* The configuration where several tunnels have the same local + * address in the same underlay table needs special treatment in + * the HW. That is currently not implemented in the driver. + */ + ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) + return ERR_PTR(-EEXIST); + } + + ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return ipip_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + + list_add_tail(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list); + +inc_ref_count: + ++ipip_entry->ref_count; + return ipip_entry; +} + +static void +mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (--ipip_entry->ref_count == 0) { + list_del(&ipip_entry->ipip_list_node); + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(ipip_entry); + } +} + +static bool +mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + struct net_device *ipip_ul_dev; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, + ul_tb_id, ipip_entry) && + (!ipip_ul_dev || ipip_ul_dev == ul_dev); +} + +/* Given decap parameters, find the corresponding IPIP entry. */ +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, + ul_proto, ul_dip, + ipip_entry)) + return ipip_entry; + + return NULL; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -831,6 +1294,8 @@ struct mlxsw_sp_neigh_entry { * this neigh entry */ struct list_head nexthop_neighs_list_node; + unsigned int counter_index; + bool counter_valid; }; static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { @@ -839,6 +1304,62 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { .key_len = sizeof(struct mlxsw_sp_neigh_key), }; +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry) { + if (list_empty(&rif->neigh_list)) + return NULL; + else + return list_first_entry(&rif->neigh_list, + typeof(*neigh_entry), + rif_list_node); + } + if (neigh_entry->rif_list_node.next == &rif->neigh_list) + return NULL; + return list_next_entry(neigh_entry, rif_list_node); +} + +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->key.n->tbl->family; +} + +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->ha; +} + +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return ntohl(*((__be32 *) n->primary_key)); +} + +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter) +{ + if (!neigh_entry->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index, + p_counter, NULL); +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, u16 rif) @@ -879,6 +1400,53 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_ht_params); } +static bool +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } + + devlink = priv_to_devlink(mlxsw_sp->core); + return devlink_dpipe_table_counter_enabled(devlink, table_name); +} + +static void +mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) + return; + + neigh_entry->counter_valid = true; +} + +static void +mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, + neigh_entry->counter_index); + neigh_entry->counter_valid = false; +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { @@ -898,6 +1466,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) if (err) goto err_neigh_entry_insert; + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -912,6 +1481,7 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { list_del(&neigh_entry->rif_list_node); + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_free(neigh_entry); } @@ -929,8 +1499,15 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) static void mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long interval; +#if IS_ENABLED(CONFIG_IPV6) + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); +#else + interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); +#endif mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); } @@ -965,6 +1542,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} +#else +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ +} +#endif + static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) @@ -988,6 +1603,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, } +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) { @@ -997,7 +1621,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, rec_index); break; case MLXSW_REG_RAUHTD_TYPE_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); break; } } @@ -1022,22 +1647,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) return false; } -static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) { - char *rauhtd_pl; - u8 num_rec; - int i, err; - - rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); - if (!rauhtd_pl) - return -ENOMEM; + int i, num_rec; + int err; /* Make sure the neighbour's netdev isn't removed in the * process. */ rtnl_lock(); do { - mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { @@ -1051,6 +1674,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: kfree(rauhtd_pl); return err; } @@ -1143,9 +1787,43 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static void +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n = neigh_entry->key.n; + + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + static void mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, @@ -1154,11 +1832,29 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; - if (neigh_entry->key.n->tbl == &arp_tbl) + if (neigh_entry->key.n->tbl->family == AF_INET) { mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); - else + } else if (neigh_entry->key.n->tbl->family == AF_INET6) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + return; + mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + } else { WARN_ON_ONCE(1); + } +} + +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (adding) + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + else + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } struct mlxsw_sp_neigh_event_work { @@ -1227,7 +1923,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, p = ptr; /* We don't care about changes in the default table. */ - if (!p->dev || p->tbl != &arp_tbl) + if (!p->dev || (p->tbl->family != AF_INET && + p->tbl->family != AF_INET6)) return NOTIFY_DONE; /* We are in atomic context and can't take RTNL mutex, @@ -1246,7 +1943,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, case NETEVENT_NEIGH_UPDATE: n = ptr; - if (n->tbl != &arp_tbl) + if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); @@ -1307,27 +2004,23 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); -} - static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } +enum mlxsw_sp_nexthop_type { + MLXSW_SP_NEXTHOP_TYPE_ETH, + MLXSW_SP_NEXTHOP_TYPE_IPIP, +}; + struct mlxsw_sp_nexthop_key { struct fib_nh *fib_nh; }; @@ -1340,6 +2033,8 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; + int ifindex; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -1350,17 +2045,18 @@ struct mlxsw_sp_nexthop { update:1; /* set indicates that MAC of this neigh should be * updated in HW */ - struct mlxsw_sp_neigh_entry *neigh_entry; -}; - -struct mlxsw_sp_nexthop_group_key { - struct fib_info *fi; + enum mlxsw_sp_nexthop_type type; + union { + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + }; }; struct mlxsw_sp_nexthop_group { + void *priv; struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ - struct mlxsw_sp_nexthop_group_key key; + struct neigh_table *neigh_tbl; u8 adj_index_valid:1, gateway:1; /* routes using the group use a gateway */ u32 adj_index; @@ -1370,15 +2066,154 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +static struct fib_info * +mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->priv; +} + +struct mlxsw_sp_nexthop_group_cmp_arg { + enum mlxsw_sp_l3proto proto; + union { + struct fib_info *fi; + struct mlxsw_sp_fib6_entry *fib6_entry; + }; +}; + +static bool +mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct in6_addr *gw, int ifindex) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + const struct mlxsw_sp_nexthop *nh; + + nh = &nh_grp->nexthops[i]; + if (nh->ifindex == ifindex && + ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) + return true; + } + + return false; +} + +static bool +mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + if (nh_grp->count != fib6_entry->nrt6) + return false; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct in6_addr *gw; + int ifindex; + + ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; + gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex)) + return false; + } + + return true; +} + +static int +mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; + const struct mlxsw_sp_nexthop_group *nh_grp = ptr; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); + case MLXSW_SP_L3_PROTO_IPV6: + return !mlxsw_sp_nexthop6_group_cmp(nh_grp, + cmp_arg->fib6_entry); + default: + WARN_ON(1); + return 1; + } +} + +static int +mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->neigh_tbl->family; +} + +static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group *nh_grp = data; + const struct mlxsw_sp_nexthop *nh; + struct fib_info *fi; + unsigned int val; + int i; + + switch (mlxsw_sp_nexthop_group_type(nh_grp)) { + case AF_INET: + fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + return jhash(&fi, sizeof(fi), seed); + case AF_INET6: + val = nh_grp->count; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + val ^= nh->ifindex; + } + return jhash(&val, sizeof(val), seed); + default: + WARN_ON(1); + return 0; + } +} + +static u32 +mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) +{ + unsigned int val = fib6_entry->nrt6; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct net_device *dev; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + dev = mlxsw_sp_rt6->rt->dst.dev; + val ^= dev->ifindex; + } + + return jhash(&val, sizeof(val), seed); +} + +static u32 +mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + default: + WARN_ON(1); + return 0; + } +} + static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), - .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), + .hashfn = mlxsw_sp_nexthop_group_hash, + .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj, + .obj_cmpfn = mlxsw_sp_nexthop_group_cmp, }; static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return 0; + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); @@ -1387,16 +2222,38 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return; + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); } static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group_key key) +mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.fi = fi; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) { - return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &key, + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.fib6_entry = fib6_entry; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, mlxsw_sp_nexthop_group_ht_params); } @@ -1473,15 +2330,26 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, char ratr_pl[MLXSW_REG_RATR_LEN]; mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, adj_index, neigh_entry->rif); + true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); +} + static int -mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - bool reallocate) +mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + bool reallocate) { u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; @@ -1497,8 +2365,16 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, } if (nh->update || reallocate) { - err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, - adj_index, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + err = mlxsw_sp_nexthop_mac_update + (mlxsw_sp, adj_index, nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + err = mlxsw_sp_nexthop_ipip_update + (mlxsw_sp, adj_index, nh); + break; + } if (err) return err; nh->update = 0; @@ -1509,9 +2385,6 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry); - static bool mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, const struct mlxsw_sp_fib_entry *fib_entry); @@ -1535,6 +2408,24 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op, int err); + +static void +mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) +{ + enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; + struct mlxsw_sp_fib_entry *fib_entry; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, + fib_entry)) + continue; + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); + } +} + +static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { @@ -1556,7 +2447,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - if (nh->should_offload ^ nh->offloaded) { + if (nh->should_offload != nh->offloaded) { offload_change = true; if (nh->should_offload) nh->update = 1; @@ -1568,8 +2459,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, - false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1596,7 +2486,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1621,6 +2511,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); goto set_trap; } + + /* Offload state within the group changed, so update the flags. */ + mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); + return; set_trap: @@ -1640,9 +2534,9 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else if (nh->offloaded) nh->should_offload = 0; nh->update = 1; } @@ -1684,7 +2578,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; int err; @@ -1693,13 +2586,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, return 0; /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. + * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -1761,18 +2655,131 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct mlxsw_sp_nexthop *nh, + struct net_device *ol_dev) +{ + if (!nh->nh_grp->gateway || nh->ipip_entry) + return 0; + + nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(nh->ipip_entry)) + return PTR_ERR(nh->ipip_entry); + + __mlxsw_sp_nexthop_neigh_update(nh, false); + return 0; +} + +static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry; + + if (!ipip_entry) + return; + + __mlxsw_sp_nexthop_neigh_update(nh, true); + mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); + nh->ipip_entry = NULL; +} + +static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct fib_nh *fib_nh, + enum mlxsw_sp_ipip_type *p_ipipt) { struct net_device *dev = fib_nh->nh_dev; - struct in_device *in_dev; + + return dev && + fib_nh->nh_parent->fib_type == RTN_UNICAST && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); +} + +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } +} + +static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = fib_nh->nh_dev; + enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; + if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + mlxsw_sp_nexthop_rif_init(nh, rif); + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_neigh_init; + + return 0; + +err_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct net_device *dev = fib_nh->nh_dev; + struct in_device *in_dev; + int err; + nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; + memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -1785,37 +2792,29 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, fib_nh->nh_flags & RTNH_F_LINKDOWN) return 0; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); if (err) goto err_nexthop_neigh_init; return 0; err_nexthop_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } -static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, - unsigned long event, struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - struct mlxsw_sp_rif *rif; if (mlxsw_sp->router->aborted) return; @@ -1825,18 +2824,12 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, if (WARN_ON_ONCE(!nh)) return; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); - if (!rif) - return; - switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop_rif_init(nh, rif); - mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); break; } @@ -1849,14 +2842,20 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, *tmp; list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } } +static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct fib_info *fi) +{ + return fi->fib_nh->nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); +} + static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; @@ -1870,17 +2869,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp = kzalloc(alloc_size, GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); + nh_grp->priv = fi; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; + nh_grp->neigh_tbl = &arp_tbl; + + nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); nh_grp->count = fi->fib_nhs; - nh_grp->key.fi = fi; fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; - err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) - goto err_nexthop_init; + goto err_nexthop4_init; } err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) @@ -1889,19 +2890,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) return nh_grp; err_nexthop_group_insert: -err_nexthop_init: +err_nexthop4_init: for (i--; i >= 0; i--) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } - fib_info_put(nh_grp->key.fi); + fib_info_put(fi); kfree(nh_grp); return ERR_PTR(err); } static void -mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop *nh; int i; @@ -1909,25 +2910,23 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); WARN_ON_ONCE(nh_grp->adj_index_valid); - fib_info_put(nh_grp->key.fi); + fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); kfree(nh_grp); } -static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - struct fib_info *fi) +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) { - struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; - key.fi = fi; - nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); if (!nh_grp) { - nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } @@ -1936,15 +2935,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; - mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->tos; } static bool @@ -1952,29 +2961,133 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - if (fib_entry->params.tos) - return false; + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: return !!nh_group->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return true; default: return false; } } -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + if (nh->rif && nh->rif->dev == rt->dst.dev && + ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, + &rt->rt6i_gateway)) + return nh; + continue; + } + + return NULL; +} + +static void +mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + return; + } + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { - fib_entry->offloaded = true; + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + return; + } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + else + mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_inc(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_set(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_set(fib_entry); + break; } } @@ -1983,13 +3096,12 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_dec(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_unset(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_unset(fib_entry); + break; } - - fib_entry->offloaded = false; } static void @@ -1998,17 +3110,13 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, { switch (op) { case MLXSW_REG_RALUE_OP_WRITE_DELETE: - if (!fib_entry->offloaded) - return; return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: if (err) return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry) && - !fib_entry->offloaded) + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && - fib_entry->offloaded) + else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: @@ -2016,13 +3124,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, } } -static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -2041,24 +3173,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; u16 trap_id = 0; u16 rif_index = 0; @@ -2070,42 +3197,53 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int +mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; + const struct mlxsw_sp_ipip_ops *ipip_ops; + + if (WARN_ON(!ipip_entry)) + return -EINVAL; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, + fib_entry->decap.tunnel_index); +} + +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, + fib_entry, op); } return -EINVAL; } @@ -2114,16 +3252,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - int err = -EINVAL; + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - switch (fib_entry->fib_node->fib->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); - break; - case MLXSW_SP_L3_PROTO_IPV6: - return err; - } mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } @@ -2146,11 +3278,23 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct net_device *dev = fen_info->fi->fib_dev; + struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; switch (fen_info->type) { - case RTN_BROADCAST: /* fall through */ case RTN_LOCAL: + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4, dip); + if (ipip_entry) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, + fib_entry, + ipip_entry); + } + /* fall through */ + case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; case RTN_UNREACHABLE: /* fall through */ @@ -2163,82 +3307,87 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; case RTN_UNICAST: - if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else + if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; default: return -EINVAL; } } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, const struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_entry *fib_entry; int err; - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) { - err = -ENOMEM; - goto err_fib_entry_alloc; - } + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) - goto err_nexthop_group_get; + goto err_nexthop4_group_get; - fib_entry->params.prio = fen_info->fi->fib_priority; - fib_entry->params.tb_id = fen_info->tb_id; - fib_entry->params.type = fen_info->type; - fib_entry->params.tos = fen_info->tos; + fib4_entry->prio = fen_info->fi->fib_priority; + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->tos = fen_info->tos; fib_entry->fib_node = fib_node; - return fib_entry; + return fib4_entry; -err_nexthop_group_get: +err_nexthop4_group_get: err_fib4_entry_type_set: - kfree(fib_entry); -err_fib_entry_alloc: + kfree(fib4_entry); return ERR_PTR(err); } static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); - kfree(fib_entry); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); } -static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info); - -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_node)) + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id == fen_info->tb_id && - fib_entry->params.tos == fen_info->tos && - fib_entry->params.type == fen_info->type && - fib_entry->nh_group->key.fi == fen_info->fi) { - return fib_entry; + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == + fen_info->fi) { + return fib4_entry; } } @@ -2311,6 +3460,67 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib_entry, list) == fib_entry; } +static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + /* Since the tree is shared between all virtual routers we must + * make sure it contains all the required prefix lengths. This + * can be computed by either adding the new prefix length to the + * existing prefix usage of a bound tree, or by aggregating the + * prefix lengths across all virtual routers and adding the new + * one as well. + */ + if (fib->lpm_tree) + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &fib->lpm_tree->prefix_usage); + else + mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + + if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id) + return 0; + + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + return err; + + return 0; +} + +static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + + /* Aggregate prefix lengths across all virtual routers to make + * sure we only have used prefix lengths in the LPM tree. + */ + mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + goto err_tree_get; + mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + +err_tree_get: + if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) + return; + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); + fib->lpm_tree = NULL; +} + static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; @@ -2333,8 +3543,6 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage; - struct mlxsw_sp_lpm_tree *lpm_tree; int err; err = mlxsw_sp_fib_node_insert(fib, fib_node); @@ -2342,33 +3550,15 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, return err; fib_node->fib = fib; - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); - - if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, - &req_prefix_usage); - if (err) - goto err_tree_check; - } else { - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - return PTR_ERR(lpm_tree); - fib->lpm_tree = lpm_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - } + err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node); + if (err) + goto err_fib_lpm_tree_link; mlxsw_sp_fib_node_prefix_inc(fib_node); return 0; -err_tree_bind: - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); -err_tree_check: +err_fib_lpm_tree_link: fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); return err; @@ -2377,46 +3567,34 @@ err_tree_check: static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; struct mlxsw_sp_fib *fib = fib_node->fib; mlxsw_sp_fib_node_prefix_dec(fib_node); - - if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - } else { - mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage); - } - + mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib); fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); - fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + fib = mlxsw_sp_vr_fib(vr, proto); - fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); if (!fib_node) { err = -ENOMEM; goto err_fib_node_create; @@ -2435,8 +3613,8 @@ err_fib_node_create: return ERR_PTR(err); } -static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { struct mlxsw_sp_vr *vr = fib_node->fib->vr; @@ -2447,95 +3625,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(vr); } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry_params *params) + const struct mlxsw_sp_fib4_entry *new4_entry) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id > params->tb_id) + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id > new4_entry->tb_id) continue; - if (fib_entry->params.tb_id != params->tb_id) + if (fib4_entry->tb_id != new4_entry->tb_id) break; - if (fib_entry->params.tos > params->tos) + if (fib4_entry->tos > new4_entry->tos) continue; - if (fib_entry->params.prio >= params->prio || - fib_entry->params.tos < params->tos) - return fib_entry; + if (fib4_entry->prio >= new4_entry->prio || + fib4_entry->tos < new4_entry->tos) + return fib4_entry; } return NULL; } -static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, - struct mlxsw_sp_fib_entry *new_entry) +static int +mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, + struct mlxsw_sp_fib4_entry *new4_entry) { struct mlxsw_sp_fib_node *fib_node; - if (WARN_ON(!fib_entry)) + if (WARN_ON(!fib4_entry)) return -EINVAL; - fib_node = fib_entry->fib_node; - list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id != new_entry->params.tb_id || - fib_entry->params.tos != new_entry->params.tos || - fib_entry->params.prio != new_entry->params.prio) + fib_node = fib4_entry->common.fib_node; + list_for_each_entry_from(fib4_entry, &fib_node->entry_list, + common.list) { + if (fib4_entry->tb_id != new4_entry->tb_id || + fib4_entry->tos != new4_entry->tos || + fib4_entry->prio != new4_entry->prio) break; } - list_add_tail(&new_entry->list, &fib_entry->list); + list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); return 0; } static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *new_entry, +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_entry; - fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); if (append) - return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); - if (replace && WARN_ON(!fib_entry)) + return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); + if (replace && WARN_ON(!fib4_entry)) return -EINVAL; /* Insert new entry before replaced one, so that we can later * remove the second. */ - if (fib_entry) { - list_add_tail(&new_entry->list, &fib_entry->list); + if (fib4_entry) { + list_add_tail(&new4_entry->common.list, + &fib4_entry->common.list); } else { - struct mlxsw_sp_fib_entry *last; + struct mlxsw_sp_fib4_entry *last; - list_for_each_entry(last, &fib_node->entry_list, list) { - if (new_entry->params.tb_id > last->params.tb_id) + list_for_each_entry(last, &fib_node->entry_list, common.list) { + if (new4_entry->tb_id > last->tb_id) break; - fib_entry = last; + fib4_entry = last; } - if (fib_entry) - list_add(&new_entry->list, &fib_entry->list); + if (fib4_entry) + list_add(&new4_entry->common.list, + &fib4_entry->common.list); else - list_add(&new_entry->list, &fib_node->entry_list); + list_add(&new4_entry->common.list, + &fib_node->entry_list); } return 0; } static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) { - list_del(&fib_entry->list); + list_del(&fib4_entry->common.list); } -static int -mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return 0; @@ -2552,11 +3735,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static void -mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return; @@ -2574,54 +3757,53 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, - append); + err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); if (err) return err; - err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) - goto err_fib4_node_entry_add; + goto err_fib_node_entry_add; return 0; -err_fib4_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib_entry); +err_fib_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib4_entry); return err; } static void mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_node_list_remove(fib4_entry); - mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); - mlxsw_sp_fib4_node_list_remove(fib_entry); + if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *replaced; if (!replace) return; /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib_entry, list); + replaced = list_next_entry(fib4_entry, common.list); mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static int @@ -2629,76 +3811,774 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; int err; if (mlxsw_sp->router->aborted) return 0; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(fib_node)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); return PTR_ERR(fib_node); } - fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); - if (IS_ERR(fib_entry)) { + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); - err = PTR_ERR(fib_entry); + err = PTR_ERR(fib4_entry); goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, append); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); return 0; err_fib4_node_entry_link: - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_node *fib_node; + + if (mlxsw_sp->router->aborted) + return; + + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib4_entry)) + return; + fib_node = fib4_entry->common.fib_node; + + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +{ + /* Packets with link-local destination IP arriving to the router + * are trapped to the CPU, so no need to program specific routes + * for them. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + return true; + + /* Multicast routes aren't supported, so ignore them. Neighbour + * Discovery packets are specifically trapped. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + return true; + + /* Cloned routes are irrelevant in the forwarding path. */ + if (rt->rt6i_flags & RTF_CACHE) + return true; + + return false; +} + +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL); + if (!mlxsw_sp_rt6) + return ERR_PTR(-ENOMEM); + + /* In case of route replace, replaced route is deleted with + * no notification. Take reference to prevent accessing freed + * memory. + */ + mlxsw_sp_rt6->rt = rt; + rt6_hold(rt); + + return mlxsw_sp_rt6; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ + rt6_release(rt); +} +#else +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ +} +#endif + +static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); + kfree(mlxsw_sp_rt6); +} + +static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +{ + /* RTF_CACHE routes are ignored */ + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; +} + +static struct rt6_info * +mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt; +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same + * virtual router. + */ + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (rt->rt6i_metric < nrt->rt6i_metric) + continue; + if (rt->rt6i_metric == nrt->rt6i_metric && + mlxsw_sp_fib6_rt_can_mp(rt)) + return fib6_entry; + if (rt->rt6i_metric > nrt->rt6i_metric) + break; + } + + return NULL; +} + +static struct mlxsw_sp_rt6 * +mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, + const struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + if (mlxsw_sp_rt6->rt == rt) + return mlxsw_sp_rt6; + } + + return NULL; +} + +static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt, + enum mlxsw_sp_ipip_type *ret) +{ + return rt->dst.dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); +} + +static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = rt->dst.dev; + enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_rif *rif; + int err; + + if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + mlxsw_sp_nexthop_rif_init(nh, rif); + + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + + nh->nh_grp = nh_grp; + memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + + if (!dev) + return 0; + nh->ifindex = dev->ifindex; + + return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); +} + +static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); +} + +static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + return rt->rt6i_flags & RTF_GATEWAY || + mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct mlxsw_sp_nexthop *nh; + size_t alloc_size; + int i = 0; + int err; + + alloc_size = sizeof(*nh_grp) + + fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); +#if IS_ENABLED(CONFIG_IPV6) + nh_grp->neigh_tbl = &nd_tbl; +#endif + mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nh_grp->count = fib6_entry->nrt6; + for (i = 0; i < nh_grp->count; i++) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + nh = &nh_grp->nexthops[i]; + err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); + if (err) + goto err_nexthop6_init; + mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); + } + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_group_insert: +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i = nh_grp->count; + + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON(nh_grp->adj_index_valid); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + + return 0; +} + +static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + int err; + + fib6_entry->common.nh_group = NULL; + list_del(&fib6_entry->common.nexthop_group_node); + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + /* In case this entry is offloaded, then the adjacency index + * currently associated with it in the device's table is that + * of the old group. Start using the new one instead. + */ + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + if (list_empty(&old_nh_grp->fib_list)) + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); +err_nexthop6_group_get: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &old_nh_grp->fib_list); + fib6_entry->common.nh_group = old_nh_grp; + return err; +} + +static int +mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) + return PTR_ERR(mlxsw_sp_rt6); + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_update; + + return 0; + +err_nexthop6_group_update: + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + return err; +} + +static void +mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); + if (WARN_ON(!mlxsw_sp_rt6)) + return; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +} + +static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct rt6_info *rt) +{ + /* Packets hitting RTF_REJECT routes need to be discarded by the + * stack. We can rely on their destination device not having a + * RIF (it's the loopback device) and can thus use action type + * local, which will cause them to be trapped with a lower + * priority than packets that need to be locally received. + */ + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->rt6i_flags & RTF_REJECT) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; +} + +static void +mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp; + + list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list, + list) { + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); + if (!fib6_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib6_entry->common; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + + INIT_LIST_HEAD(&fib6_entry->rt6_list); + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6 = 1; + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + fib_entry->fib_node = fib_node; + + return fib6_entry; + +err_nexthop6_group_get: + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +err_rt6_create: + kfree(fib6_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); + WARN_ON(fib6_entry->nrt6); + kfree(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (mlxsw_sp_fib6_rt_can_mp(rt) == + mlxsw_sp_fib6_rt_can_mp(nrt)) + return fib6_entry; + if (mlxsw_sp_fib6_rt_can_mp(nrt)) + fallback = fallback ?: fib6_entry; + } + if (rt->rt6i_metric > nrt->rt6i_metric) + return fallback ?: fib6_entry; + } + + return fallback; +} + +static int +mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; + struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct mlxsw_sp_fib6_entry *fib6_entry; + + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + + if (replace && WARN_ON(!fib6_entry)) + return -EINVAL; + + if (fib6_entry) { + list_add_tail(&new6_entry->common.list, + &fib6_entry->common.list); + } else { + struct mlxsw_sp_fib6_entry *last; + + list_for_each_entry(last, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + + if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + break; + fib6_entry = last; + } + + if (fib6_entry) + list_add(&new6_entry->common.list, + &fib6_entry->common.list); + else + list_add(&new6_entry->common.list, + &fib_node->entry_list); + } + + return 0; +} + +static void +mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + list_del(&fib6_entry->common.list); +} + +static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + int err; + + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + if (err) + return err; + + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_fib6_node_list_remove(fib6_entry); + return err; +} + +static void +mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_node_list_remove(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); + if (!fib_node) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && + rt->rt6i_metric == iter_rt->rt6i_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; + } + + return NULL; +} + +static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + struct mlxsw_sp_fib6_entry *replaced; + + if (!replace) + return; + + replaced = list_next_entry(fib6_entry, common.list); + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (rt->rt6i_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, + &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + /* Before creating a new entry, try to append route to an existing + * multipath entry. + */ + fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); + if (fib6_entry) { + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + if (err) + goto err_fib6_entry_nexthop_add; + return 0; + } + + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + if (IS_ERR(fib6_entry)) { + err = PTR_ERR(fib6_entry); + goto err_fib6_entry_create; + } + + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + if (err) + goto err_fib6_node_entry_link; + + mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + + return 0; + +err_fib6_node_entry_link: + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); +err_fib6_entry_create: +err_fib6_entry_nexthop_add: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; if (mlxsw_sp->router->aborted) return; - fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib_entry)) + if (mlxsw_sp_fib6_rt_should_ignore(rt)) return; - fib_node = fib_entry->fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (WARN_ON(!fib6_entry)) + return; + + /* If route is part of a multipath entry, but not the last one + * removed, then only reduce its nexthop group. + */ + if (!list_is_singular(&fib6_entry->rt6_list)) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + return; + } + + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_ralxx_protocol proto, + u8 tree_id) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); if (err) return err; @@ -2708,20 +4588,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) char raltb_pl[MLXSW_REG_RALTB_LEN]; char ralue_pl[MLXSW_REG_RALUE_LEN]; - if (!mlxsw_sp_vr_is_used(vr)) - continue; - - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, - 0); + mlxsw_reg_ralue_pack(ralue_pl, proto, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -2732,17 +4606,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + int err; + + err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN); + if (err) + return err; + + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN + 1); +} + static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib_entry *fib_entry, *tmp; + struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { - bool do_break = &tmp->list == &fib_node->entry_list; + list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); /* Break when entry list is empty and node was freed. * Otherwise, we'll access freed memory in the next * iteration. @@ -2752,6 +4642,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; + + list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -2760,7 +4667,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node); break; } } @@ -2791,10 +4698,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2811,6 +4725,7 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_fib_event_work { struct work_struct work; union { + struct fib6_entry_notifier_info fen6_info; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -2819,7 +4734,7 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; -static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); @@ -2839,7 +4754,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, replace, append); if (err) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: @@ -2850,13 +4765,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -2864,6 +4779,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + err = mlxsw_sp_router_fib6_add(mlxsw_sp, + fib_work->fen6_info.rt, replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!fib6_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); + /* Take referece on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + } +} + +static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + rt6_hold(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -2879,31 +4875,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, if (WARN_ON(!fib_work)) return NOTIFY_BAD; - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); router = container_of(nb, struct mlxsw_sp_router, fib_nb); fib_work->mlxsw_sp = router->mlxsw_sp; fib_work->event = event; - switch (event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ - case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); - /* Take referece on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. - */ - fib_info_hold(fib_work->fen_info.fi); + switch (info->family) { + case AF_INET: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); - break; - case FIB_EVENT_NH_ADD: /* fall through */ - case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + case AF_INET6: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + mlxsw_sp_router_fib6_event(fib_work, info); break; } @@ -2948,17 +4931,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, - const struct in_device *in_dev, - unsigned long event) +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) { + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + switch (event) { case NETDEV_UP: - if (!rif) - return true; - return false; + return rif == NULL; case NETDEV_DOWN: - if (rif && !in_dev->ifa_list && + idev = __in_dev_get_rtnl(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + + if (rif && addr_list_empty && !netif_is_l3_slave(rif->dev)) return true; /* It is possible we already removed the RIF ourselves @@ -2977,7 +4971,10 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, { enum mlxsw_sp_fid_type type; - /* RIF type is derived from the type of the underlying FID */ + if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL)) + return MLXSW_SP_RIF_TYPE_IPIP_LB; + + /* Otherwise RIF type is derived from the type of the underlying FID. */ if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev))) type = MLXSW_SP_FID_TYPE_8021Q; else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev)) @@ -3036,6 +5033,16 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) return rif->rif_index; } +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->common.rif_index; +} + +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_vr_id; +} + int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { return rif->dev->ifindex; @@ -3047,9 +5054,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, { u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; struct mlxsw_sp_rif *rif; - struct mlxsw_sp_fid *fid; struct mlxsw_sp_vr *vr; u16 rif_index; int err; @@ -3073,12 +5080,14 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; - fid = ops->fid_get(rif); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - goto err_fid_get; + if (ops->fid_get) { + fid = ops->fid_get(rif); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + rif->fid = fid; } - rif->fid = fid; if (ops->setup) ops->setup(rif, params); @@ -3087,22 +5096,15 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr, - mlxsw_sp_fid_index(fid), true); - if (err) - goto err_rif_fdb_op; - mlxsw_sp_rif_counters_alloc(rif); - mlxsw_sp_fid_rif_set(fid, rif); mlxsw_sp->router->rifs[rif_index] = rif; vr->rif_count++; return rif; -err_rif_fdb_op: - ops->deconfigure(rif); err_configure: - mlxsw_sp_fid_put(fid); + if (fid) + mlxsw_sp_fid_put(fid); err_fid_get: kfree(rif); err_rif_alloc: @@ -3123,12 +5125,11 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; - mlxsw_sp_fid_rif_set(fid, NULL); mlxsw_sp_rif_counters_free(rif); - mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(fid), false); ops->deconfigure(rif); - mlxsw_sp_fid_put(fid); + if (fid) + /* Loopback RIFs are not associated with a FID. */ + mlxsw_sp_fid_put(fid); kfree(rif); mlxsw_sp_vr_put(vr); } @@ -3356,7 +5357,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(dev, event); @@ -3364,6 +5365,61 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(dev, event); +out: + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -3501,8 +5557,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) rif_subport = mlxsw_sp_rif_subport_rif(rif); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu, - rif->dev->dev_addr); + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, rif_subport->lag ? rif_subport->lag_id : rif_subport->system_port, @@ -3513,11 +5569,32 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif) { - return mlxsw_sp_rif_subport_op(rif, true); + int err; + + err = mlxsw_sp_rif_subport_op(rif, true); + if (err) + return err; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_subport_op(rif, false); + return err; } static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) { + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_subport_op(rif, false); } @@ -3544,7 +5621,8 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, - rif->dev->mtu, rif->dev->dev_addr); + rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); @@ -3565,25 +5643,48 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); return 0; +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); return err; } static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); } @@ -3614,25 +5715,48 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); return 0; +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 fid_index = mlxsw_sp_fid_index(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } @@ -3650,10 +5774,104 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { .fid_get = mlxsw_sp_rif_fid_fid_get, }; +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static void +mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_params_ipip_lb *params_lb; + struct mlxsw_sp_rif_ipip_lb *rif_lb; + + params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb, + common); + rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif); + rif_lb->lb_config = params_lb->lb_config; +} + +static int +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, + struct mlxsw_sp_vr *ul_vr, bool enable) +{ + struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct mlxsw_sp_rif *rif = &lb_rif->common; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u32 saddr4; + + switch (lb_cf.ul_protocol) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(lb_cf.saddr.addr4); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, + ul_vr->id, saddr4, lb_cf.okey); + break; + + case MLXSW_SP_L3_PROTO_IPV6: + return -EAFNOSUPPORT; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int +mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + int err; + + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id); + if (IS_ERR(ul_vr)) + return PTR_ERR(ul_vr); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = ul_vr->id; + ++ul_vr->rif_count; + return 0; + +err_loopback_op: + mlxsw_sp_vr_put(ul_vr); + return err; +} + +static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + + ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + + --ul_vr->rif_count; + mlxsw_sp_vr_put(ul_vr); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, +}; + static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, }; static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) @@ -3681,6 +5899,18 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->rifs); } +static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; + INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + return 0; +} + +static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list)); +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp_router *router; @@ -3704,7 +5934,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) @@ -3716,7 +5946,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; - mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } @@ -3740,6 +5970,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rifs_init; + err = mlxsw_sp_ipips_init(mlxsw_sp); + if (err) + goto err_ipips_init; + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); if (err) @@ -3781,6 +6015,8 @@ err_lpm_init: err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); @@ -3797,6 +6033,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); kfree(mlxsw_sp->router); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index a3e8d2b25148..345fcc4f38e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -36,15 +36,38 @@ #define _MLXSW_ROUTER_H_ #include "spectrum.h" +#include "reg.h" + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +union mlxsw_sp_l3addr { + __be32 addr4; + struct in6_addr addr6; +}; + +struct mlxsw_sp_rif_ipip_lb; +struct mlxsw_sp_rif_ipip_lb_config { + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + u32 okey; + enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */ + union mlxsw_sp_l3addr saddr; +}; enum mlxsw_sp_rif_counter_dir { MLXSW_SP_RIF_COUNTER_INGRESS, MLXSW_SP_RIF_COUNTER_EGRESS, }; +struct mlxsw_sp_neigh_entry; + struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, @@ -56,5 +79,33 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry); +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); + +#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ + for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ + neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry)) +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter); +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 74341fe0eb25..ab7a29846bfa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -497,7 +497,7 @@ static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core) mlxsw_sib_ports_remove(mlxsw_sib); } -static struct mlxsw_config_profile mlxsw_sib_config_profile = { +static const struct mlxsw_config_profile mlxsw_sib_config_profile = { .used_max_system_port = 1, .max_system_port = 48000, .used_max_ib_mc = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3b0f72455681..f3c29bbf07e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1674,7 +1674,7 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) mlxsw_sx_ports_remove(mlxsw_sx); } -static struct mlxsw_config_profile mlxsw_sx_config_profile = { +static const struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12b5ed58f3eb..f396a1fef633 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -61,11 +61,33 @@ enum { MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, - MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, - MLXSW_TRAP_ID_BGP_IPV4 = 0x88, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, MLXSW_TRAP_ID_MAX = 0x1FF |