diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 272 |
1 files changed, 232 insertions, 40 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 77115c25d96f..b936febc315a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -26,6 +26,7 @@ #include <linux/prefetch.h> #include <net/busy_poll.h> +#include <linux/bpf_trace.h> #include "i40e.h" #include "i40e_trace.h" #include "i40e_prototype.h" @@ -629,6 +630,8 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, if (tx_buffer->skb) { if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) kfree(tx_buffer->raw_buf); + else if (ring_is_xdp(ring)) + page_frag_free(tx_buffer->raw_buf); else dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) @@ -770,8 +773,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, total_bytes += tx_buf->bytecount; total_packets += tx_buf->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + /* free the skb/XDP data */ + if (ring_is_xdp(tx_ring)) + page_frag_free(tx_buf->raw_buf); + else + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -847,6 +853,9 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } + if (ring_is_xdp(tx_ring)) + return !!budget; + /* notify netdev of completed buffers */ netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -1195,6 +1204,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) void i40e_free_rx_resources(struct i40e_ring *rx_ring) { i40e_clean_rx_ring(rx_ring); + rx_ring->xdp_prog = NULL; kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; @@ -1241,6 +1251,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; + return 0; err: kfree(rx_ring->rx_bi); @@ -1593,6 +1605,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed + * @rx_desc: pointer to the EOP Rx descriptor * * Also address the case where we are pulling data in on pages only * and as such no data is present in the skb header. @@ -1602,8 +1615,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, * * Returns true if an error was encountered and skb was freed. **/ -static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) +static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, + union i40e_rx_desc *rx_desc) + { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + + /* ERR_MASK will only have valid bits if EOP set, and + * what we are doing here is actually checking + * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in + * the error field + */ + if (unlikely(i40e_test_staterr(rx_desc, + BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { + dev_kfree_skb_any(skb); + return true; + } + /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1776,7 +1806,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb + * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the @@ -1784,9 +1814,9 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, */ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else @@ -1796,9 +1826,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif /* allocate a skb to store the frags */ @@ -1811,10 +1841,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > I40E_RX_HDR_SIZE) - headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); + headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, + ALIGN(headlen, sizeof(long))); /* update all of the pointers */ size -= headlen; @@ -1841,16 +1872,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @rx_buffer: Rx buffer to pull data from - * @size: size of buffer to add to skb + * @xdp: xdp_buff pointing to the data * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; #if (PAGE_SIZE < 8192) unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else @@ -1860,12 +1891,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); + prefetch(xdp->data + L1_CACHE_BYTES); #endif /* build an skb around the page buffer */ - skb = build_skb(va - I40E_SKB_PAD, truesize); + skb = build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; @@ -1944,6 +1975,75 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, return true; } +#define I40E_XDP_PASS 0 +#define I40E_XDP_CONSUMED 1 +#define I40E_XDP_TX 2 + +static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring); + +/** + * i40e_run_xdp - run an XDP program + * @rx_ring: Rx ring being processed + * @xdp: XDP buffer containing the frame + **/ +static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, + struct xdp_buff *xdp) +{ + int result = I40E_XDP_PASS; + struct i40e_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + + if (!xdp_prog) + goto xdp_out; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + result = i40e_xmit_xdp_ring(xdp, xdp_ring); + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping packet */ + case XDP_DROP: + result = I40E_XDP_CONSUMED; + break; + } +xdp_out: + rcu_read_unlock(); + return ERR_PTR(-result); +} + +/** + * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region + * @rx_ring: Rx ring + * @rx_buffer: Rx buffer to adjust + * @size: Size of adjustment + **/ +static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; + + rx_buffer->page_offset ^= truesize; +#else + unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size); + + rx_buffer->page_offset += truesize; +#endif +} + /** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on @@ -1961,11 +2061,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - bool failure = false; + bool failure = false, xdp_xmit = false; while (likely(total_rx_packets < budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + struct xdp_buff xdp; unsigned int size; u16 vlan_tag; u8 rx_ptype; @@ -2006,12 +2107,32 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_buffer = i40e_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ - if (skb) + if (!skb) { + xdp.data = page_address(rx_buffer->page) + + rx_buffer->page_offset; + xdp.data_hard_start = xdp.data - + i40e_rx_offset(rx_ring); + xdp.data_end = xdp.data + size; + + skb = i40e_run_xdp(rx_ring, &xdp); + } + + if (IS_ERR(skb)) { + if (PTR_ERR(skb) == -I40E_XDP_TX) { + xdp_xmit = true; + i40e_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; + } + total_rx_bytes += size; + total_rx_packets++; + } else if (skb) { i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = i40e_build_skb(rx_ring, rx_buffer, size); - else - skb = i40e_construct_skb(rx_ring, rx_buffer, size); + } else if (ring_uses_build_skb(rx_ring)) { + skb = i40e_build_skb(rx_ring, rx_buffer, &xdp); + } else { + skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp); + } /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -2026,18 +2147,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (i40e_is_non_eop(rx_ring, rx_desc, skb)) continue; - /* ERR_MASK will only have valid bits if EOP set, and - * what we are doing here is actually checking - * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in - * the error field - */ - if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { - dev_kfree_skb_any(skb); - skb = NULL; - continue; - } - - if (i40e_cleanup_headers(rx_ring, skb)) { + if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) { skb = NULL; continue; } @@ -2063,6 +2173,19 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) total_rx_packets++; } + if (xdp_xmit) { + struct i40e_ring *xdp_ring; + + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + + writel(xdp_ring->next_to_use, xdp_ring->tail); + } + rx_ring->skb = skb; u64_stats_update_begin(&rx_ring->syncp); @@ -2629,8 +2752,10 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, if (pf->ptp_tx && !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, pf->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + pf->ptp_tx_start = jiffies; pf->ptp_tx_skb = skb_get(skb); } else { + pf->tx_hwtstamp_skipped++; return 0; } @@ -2933,10 +3058,12 @@ bool __i40e_chk_linearize(struct sk_buff *skb) * @hdr_len: size of the packet header * @td_cmd: the command field in the descriptor * @td_offset: offset for checksum or crc + * + * Returns 0 on success, -1 on failure to DMA **/ -static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, - struct i40e_tx_buffer *first, u32 tx_flags, - const u8 hdr_len, u32 td_cmd, u32 td_offset) +static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, + struct i40e_tx_buffer *first, u32 tx_flags, + const u8 hdr_len, u32 td_cmd, u32 td_offset) { unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); @@ -3094,7 +3221,7 @@ do_rs: mmiowb(); } - return; + return 0; dma_error: dev_info(tx_ring->dev, "TX DMA map failed\n"); @@ -3111,6 +3238,61 @@ dma_error: } tx_ring->next_to_use = i; + + return -1; +} + +/** + * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring + * @xdp: data to transmit + * @xdp_ring: XDP Tx ring + **/ +static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring) +{ + u32 size = xdp->data_end - xdp->data; + u16 i = xdp_ring->next_to_use; + struct i40e_tx_buffer *tx_bi; + struct i40e_tx_desc *tx_desc; + dma_addr_t dma; + + if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + return I40E_XDP_CONSUMED; + } + + dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + return I40E_XDP_CONSUMED; + + tx_bi = &xdp_ring->tx_bi[i]; + tx_bi->bytecount = size; + tx_bi->gso_segs = 1; + tx_bi->raw_buf = xdp->data; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + tx_desc = I40E_TX_DESC(xdp_ring, i); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC + | I40E_TXD_CMD, + 0, size, 0); + + /* Make certain all of the status bits have been updated + * before next_to_watch is written. + */ + smp_wmb(); + + i++; + if (i == xdp_ring->count) + i = 0; + + tx_bi->next_to_watch = tx_desc; + xdp_ring->next_to_use = i; + + return I40E_XDP_TX; } /** @@ -3211,8 +3393,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, */ i40e_atr(tx_ring, skb, tx_flags); - i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, - td_cmd, td_offset); + if (i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, + td_cmd, td_offset)) + goto cleanup_tx_tstamp; return NETDEV_TX_OK; @@ -3220,6 +3403,15 @@ out_drop: i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring); dev_kfree_skb_any(first->skb); first->skb = NULL; +cleanup_tx_tstamp: + if (unlikely(tx_flags & I40E_TX_FLAGS_TSYN)) { + struct i40e_pf *pf = i40e_netdev_to_pf(tx_ring->netdev); + + dev_kfree_skb_any(pf->ptp_tx_skb); + pf->ptp_tx_skb = NULL; + clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state); + } + return NETDEV_TX_OK; } |