diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 311 |
1 files changed, 149 insertions, 162 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6ffd1849a604..4f3a9b27ce4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -234,23 +234,24 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u8 owner) { __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; void *end = ring->buf + ring->buf_size; __be32 *ptr = (__be32 *)tx_desc; int i; /* Optimize the common case when there are no wraparounds */ - if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } } else { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; @@ -265,11 +266,11 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; void *end = ring->buf + ring->buf_size; struct sk_buff *skb = tx_info->skb; @@ -288,19 +289,20 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, skb_tstamp_tx(skb, &hwts); } - /* Optimize the common case when there are no wraparounds */ - if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { - if (!tx_info->inl) { - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); + if (!tx_info->inl) { + if (tx_info->linear) + dma_unmap_single(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + else + dma_unmap_page(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { for (i = 1; i < nr_maps; i++) { data++; dma_unmap_page(priv->ddev, @@ -308,23 +310,10 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); } - } - } else { - if (!tx_info->inl) { - if ((void *) data >= end) { + } else { + if ((void *)data >= end) data = ring->buf + ((void *)data - end); - } - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); for (i = 1; i < nr_maps; i++) { data++; /* Check for wraparound before unmapping */ @@ -344,7 +333,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; @@ -381,8 +370,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) while (ring->cons != ring->prod) { ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size), 0, - 0 /* Non-NAPI caller */); + 0, 0 /* Non-NAPI caller */); ring->cons += ring->last_nr_txbb; cnt++; } @@ -396,15 +384,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static bool mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq, int napi_budget) +bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_cqe *cqe; - u16 index; - u16 new_index, ring_index, stamp_index; + u16 index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; @@ -419,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, u32 last_nr_txbb; u32 ring_cons; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return true; netdev_txq_bql_complete_prefetchw(ring->tx_queue); @@ -434,6 +421,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && (done < budget)) { + u16 new_index; + /* * make sure we read the CQE after we read the * ownership bit @@ -464,8 +453,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* free next descriptor */ last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, - !!((ring_cons + txbbs_skipped) & - ring->size), timestamp, napi_budget); + timestamp, napi_budget); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & @@ -481,7 +469,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } - /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. @@ -494,7 +481,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; - if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + if (cq->type == TX_XDP) return done < budget; netdev_tx_completed_queue(ring->tx_queue, packets, bytes); @@ -506,6 +493,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } + return done < budget; } @@ -526,7 +514,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); - int clean_complete; + bool clean_complete; clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); if (!clean_complete) @@ -543,7 +531,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, u32 index, unsigned int desc_size) { - u32 copy = (ring->size - index) * TXBB_SIZE; + u32 copy = (ring->size - index) << LOG_TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { @@ -558,12 +546,12 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, if ((i & (TXBB_SIZE - 1)) == 0) wmb(); - *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = + *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ - return ring->buf + index * TXBB_SIZE; + return ring->buf + (index << LOG_TXBB_SIZE); } /* Decide if skb can be inlined in tx descriptor to avoid dma mapping @@ -703,15 +691,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; - u8 up = 0; if (netdev_get_num_tc(dev)) return skb_tx_hash(dev, skb); - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; - - return fallback(dev, skb) % rings_p_up + up * rings_p_up; + return fallback(dev, skb) % rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, const void *src, @@ -775,37 +759,101 @@ static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, } } +static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, + struct skb_shared_info *shinfo, + struct mlx4_wqe_data_seg *data, + struct sk_buff *skb, + int lso_header_size, + __be32 mr_key, + struct mlx4_en_tx_info *tx_info) +{ + struct device *ddev = priv->ddev; + dma_addr_t dma = 0; + u32 byte_count = 0; + int i_frag; + + /* Map fragments if any */ + for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { + const struct skb_frag_struct *frag; + + frag = &shinfo->frags[i_frag]; + byte_count = skb_frag_size(frag); + dma = skb_frag_dma_map(ddev, frag, + 0, byte_count, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + --data; + } + + /* Map linear part if needed */ + if (tx_info->linear) { + byte_count = skb_headlen(skb) - lso_header_size; + + dma = dma_map_single(ddev, skb->data + + lso_header_size, byte_count, + PCI_DMA_TODEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + } + /* tx completion can avoid cache line miss for common cases */ + tx_info->map0_dma = dma; + tx_info->map0_byte_count = byte_count; + + return true; + +tx_drop_unmap: + en_err(priv, "DMA mapping error\n"); + + while (++i_frag < shinfo->nr_frags) { + ++data; + dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + } + + return false; +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int tx_ind = 0; + int tx_ind; int nr_txbb; int desc_size; int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_proto = 0; - int i_frag; int lso_header_size; void *fragptr = NULL; bool bounce = false; bool send_doorbell; bool stop_queue; bool inline_ok; + u8 data_offset; u32 ring_cons; bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[TX][tx_ind]; - if (!priv->port_up) + if (unlikely(!priv->port_up)) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ @@ -818,7 +866,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); - nr_txbb = desc_size / TXBB_SIZE; + nr_txbb = desc_size >> LOG_TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); @@ -827,6 +875,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { + u16 vlan_proto; + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); if (vlan_proto == ETH_P_8021AD) @@ -851,7 +901,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring->size)) - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; @@ -863,64 +913,31 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->skb = skb; tx_info->nr_txbb = nr_txbb; - data = &tx_desc->data; - if (lso_header_size) - data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, - DS_SIZE)); + if (!lso_header_size) { + data = &tx_desc->data; + data_offset = offsetof(struct mlx4_en_tx_desc, data); + } else { + int lso_align = ALIGN(lso_header_size + 4, DS_SIZE); + + data = (void *)&tx_desc->lso + lso_align; + data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align; + } /* valid only for none inline segments */ - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = data_offset; tx_info->inl = inline_ok; - tx_info->linear = (lso_header_size < skb_headlen(skb) && - !inline_ok) ? 1 : 0; + tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok; tx_info->nr_maps = shinfo->nr_frags + tx_info->linear; data += tx_info->nr_maps - 1; - if (!tx_info->inl) { - dma_addr_t dma = 0; - u32 byte_count = 0; - - /* Map fragments if any */ - for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { - const struct skb_frag_struct *frag; - - frag = &shinfo->frags[i_frag]; - byte_count = skb_frag_size(frag); - dma = skb_frag_dma_map(ddev, frag, - 0, byte_count, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - --data; - } - - /* Map linear part if needed */ - if (tx_info->linear) { - byte_count = skb_headlen(skb) - lso_header_size; - - dma = dma_map_single(ddev, skb->data + - lso_header_size, byte_count, - PCI_DMA_TODEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - } - /* tx completion can avoid cache line miss for common cases */ - tx_info->map0_dma = dma; - tx_info->map0_byte_count = byte_count; - } + if (!tx_info->inl) + if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb, + lso_header_size, ring->mr_key, + tx_info)) + goto tx_drop_count; /* * For timestamping add flag to skb_shinfo and @@ -1056,16 +1073,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) } return NETDEV_TX_OK; -tx_drop_unmap: - en_err(priv, "DMA mapping error\n"); - - while (++i_frag < shinfo->nr_frags) { - ++data; - dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr), - be32_to_cpu(data->byte_count), - PCI_DMA_TODEVICE); - } - tx_drop_count: ring->tx_dropped++; tx_drop: @@ -1073,52 +1080,41 @@ tx_drop: return NETDEV_TX_OK; } +#define MLX4_EN_XDP_TX_NRTXBB 1 +#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ + / 16) & 0x3f) + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, - int tx_ind, int *doorbell_pending) + int tx_ind, bool *doorbell_pending) { struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; - struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int index, bf_index; - bool send_doorbell; - int nr_txbb = 1; - bool stop_queue; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_ring *ring; dma_addr_t dma; - int real_size; __be32 op_own; - u32 ring_cons; - bool bf_ok; + int index; - BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, - "mlx4_en_xmit_frame requires minimum size tx desc"); + if (unlikely(!priv->port_up)) + goto tx_drop; ring = priv->tx_ring[TX_XDP][tx_ind]; - if (!priv->port_up) - goto tx_drop; - - if (mlx4_en_is_tx_ring_full(ring)) + if (unlikely(mlx4_en_is_tx_ring_full(ring))) goto tx_drop_count; - /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = READ_ONCE(ring->cons); - index = ring->prod & ring->size_mask; tx_info = &ring->tx_info[index]; - bf_ok = ring->bf_enabled; - /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, - (u32)(ring->prod - ring_cons - 1)); + (u32)(ring->prod - READ_ONCE(ring->cons) - 1)); - bf_index = ring->prod; - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; dma = frame->dma; @@ -1127,9 +1123,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, frame->page = NULL; tx_info->map0_dma = dma; tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = nr_txbb; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); tx_info->ts_requested = 0; tx_info->nr_maps = 1; tx_info->linear = 1; @@ -1153,28 +1149,19 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, rx_ring->xdp_tx++; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); - ring->prod += nr_txbb; - - stop_queue = mlx4_en_is_tx_ring_full(ring); - send_doorbell = stop_queue || - *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; - bf_ok &= send_doorbell; + ring->prod += MLX4_EN_XDP_TX_NRTXBB; - real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; - if (bf_ok) - qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); - else - qpn_vlan.fence_size = real_size; - - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, - op_own, bf_ok, send_doorbell); - *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, + op_own, false, false); + *doorbell_pending = true; return NETDEV_TX_OK; tx_drop_count: rx_ring->xdp_tx_full++; + *doorbell_pending = true; tx_drop: return NETDEV_TX_BUSY; } |