diff options
Diffstat (limited to 'drivers/net/ethernet/sfc/tx.c')
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 627 |
1 files changed, 251 insertions, 376 deletions
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 18713436b443..5e090e54298e 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,14 +22,6 @@ #include "nic.h" #include "workarounds.h" -/* - * TX descriptor ring full threshold - * - * The tx_queue descriptor ring fill-level must fall below this value - * before we restart the netif queue - */ -#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) - static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct device *dma_dev = &tx_queue->efx->pci_dev->dev; dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); - if (buffer->unmap_single) + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); else dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); buffer->unmap_len = 0; - buffer->unmap_single = false; } - if (buffer->skb) { + if (buffer->flags & EFX_TX_BUF_SKB) { (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_kfree_skb_any((struct sk_buff *) buffer->skb); - buffer->skb = NULL; netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_HEAP) { + kfree(buffer->heap_buf); } -} -/** - * struct efx_tso_header - a DMA mapped buffer for packet headers - * @next: Linked list of free ones. - * The list is protected by the TX queue lock. - * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. - * @dma_addr: The DMA address of the header below. - * - * This controls the memory used for a TSO header. Use TSOH_DATA() - * to find the packet header data. Use TSOH_SIZE() to calculate the - * total size required for a given packet header length. TSO headers - * in the free list are exactly %TSOH_STD_SIZE bytes in size. - */ -struct efx_tso_header { - union { - struct efx_tso_header *next; - size_t unmap_len; - }; - dma_addr_t dma_addr; -}; + buffer->len = 0; + buffer->flags = 0; +} static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb); -static void efx_fini_tso(struct efx_tx_queue *tx_queue); -static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh); - -static void efx_tsoh_free(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer) -{ - if (buffer->tsoh) { - if (likely(!buffer->tsoh->unmap_len)) { - buffer->tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = buffer->tsoh; - } else { - efx_tsoh_heap_free(tx_queue, buffer->tsoh); - } - buffer->tsoh = NULL; - } -} - static inline unsigned efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) @@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider both queues that the net core sees as one */ + struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); + struct efx_nic *efx = txq1->efx; + unsigned int fill_level; + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + txq1->old_read_count = ACCESS_ONCE(txq1->read_count); + txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + /* * Add a socket buffer to a TX queue * @@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * - * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * Returns NETDEV_TX_OK. * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) @@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, fill_level, insert_ptr; + unsigned int len, unmap_len = 0, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; - bool unmap_single; - int q_space, i = 0; - netdev_tx_t rc = NETDEV_TX_OK; + unsigned short dma_flags; + int i = 0; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - q_space = efx->txq_entries - 1 - fill_level; - /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. */ - unmap_single = true; + dma_flags = EFX_TX_BUF_MAP_SINGLE; dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ @@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have - * happened since the xmit path last - * checked. Update the xmit path's - * copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the - * change of queue state from the access - * of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } - smp_mb(); - if (likely(!efx->loopback_selftest)) - netif_tx_start_queue( - tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->tsoh); - EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->flags); EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); @@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ - buffer->unmap_single = unmap_single; + buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; unmap_len = 0; @@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) len = skb_frag_size(fragment); i++; /* Map for DMA */ - unmap_single = false; + dma_flags = 0; dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; - buffer->continuation = false; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; netdev_tx_sent_queue(tx_queue->core_txq, skb->len); /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + return NETDEV_TX_OK; dma_err: @@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); - unwind: /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; @@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->len = 0; } /* Free the fragment we were mid-way through pushing */ if (unmap_len) { - if (unmap_single) + if (dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, unmap_len, DMA_TO_DEVICE); else @@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) DMA_TO_DEVICE); } - return rc; + return NETDEV_TX_OK; } /* Remove packets from the TX queue @@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, } efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -366,6 +339,12 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); + /* PTP "event" packet */ + if (unlikely(efx_xmit_with_hwtstamp(skb)) && + unlikely(efx_ptp_is_ptp_tx(efx, skb))) { + return efx_ptp_tx(efx, skb); + } + index = skb_get_queue_mapping(skb); type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; if (index >= efx->n_tx_channels) { @@ -450,6 +429,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned fill_level; struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; unsigned int pkts_compl = 0, bytes_compl = 0; EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); @@ -457,15 +437,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); - /* See if we need to restart the netif queue. This barrier - * separates the update of read_count from the test of the - * queue state. */ + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ smp_mb(); if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && likely(efx->port_enabled) && likely(netif_device_present(efx->net_dev))) { - fill_level = tx_queue->insert_count - tx_queue->read_count; - if (fill_level < EFX_TXQ_THRESHOLD(efx)) + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) netif_tx_wake_queue(tx_queue->core_txq); } @@ -480,11 +463,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } } +/* Size of page-based TSO header buffers. Larger blocks must be + * allocated from the heap. + */ +#define TSOH_STD_SIZE 128 +#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) + +/* At most half the descriptors in the queue at any time will refer to + * a TSO header buffer, since they must always be followed by a + * payload descriptor referring to an skb. + */ +static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE); +} + int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; unsigned int entries; - int i, rc; + int rc; /* Create the smallest power-of-two aligned ring */ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); @@ -500,17 +498,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) GFP_KERNEL); if (!tx_queue->buffer) return -ENOMEM; - for (i = 0; i <= tx_queue->ptr_mask; ++i) - tx_queue->buffer[i].continuation = true; + + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) { + tx_queue->tsoh_page = + kcalloc(efx_tsoh_page_count(tx_queue), + sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL); + if (!tx_queue->tsoh_page) { + rc = -ENOMEM; + goto fail1; + } + } /* Allocate hardware ring */ rc = efx_nic_probe_tx(tx_queue); if (rc) - goto fail; + goto fail2; return 0; - fail: +fail2: + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; +fail1: kfree(tx_queue->buffer); tx_queue->buffer = NULL; return rc; @@ -546,8 +555,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) unsigned int pkts_compl = 0, bytes_compl = 0; buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; } @@ -568,13 +575,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) efx_nic_fini_tx(tx_queue); efx_release_tx_buffers(tx_queue); - - /* Free up TSO header cache */ - efx_fini_tso(tx_queue); } void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) { + int i; + if (!tx_queue->buffer) return; @@ -582,6 +588,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) "destroying TX queue %d\n", tx_queue->queue); efx_nic_remove_tx(tx_queue); + if (tx_queue->tsoh_page) { + for (i = 0; i < efx_tsoh_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->tsoh_page[i]); + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; + } + kfree(tx_queue->buffer); tx_queue->buffer = NULL; } @@ -604,22 +618,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) #define TSOH_OFFSET NET_IP_ALIGN #endif -#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) - -/* Total size of struct efx_tso_header, buffer and padding */ -#define TSOH_SIZE(hdr_len) \ - (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) - -/* Size of blocks on free list. Larger blocks must be allocated from - * the heap. - */ -#define TSOH_STD_SIZE 128 - #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) -#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) -#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) -#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) -#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data) /** * struct tso_state - TSO state for an SKB @@ -631,10 +630,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) * @in_len: Remaining length in current SKB fragment * @unmap_len: Length of SKB fragment * @unmap_addr: DMA address of SKB fragment - * @unmap_single: DMA single vs page mapping flag + * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 * @protocol: Network protocol (after any VLAN header) + * @ip_off: Offset of IP header + * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header - * @full_packet_size: Number of bytes to put in each outgoing segment + * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -651,11 +652,13 @@ struct tso_state { unsigned in_len; unsigned unmap_len; dma_addr_t unmap_addr; - bool unmap_single; + unsigned short dma_flags; __be16 protocol; + unsigned int ip_off; + unsigned int tcp_off; unsigned header_len; - int full_packet_size; + unsigned int ip_base_len; }; @@ -687,91 +690,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb) return protocol; } - -/* - * Allocate a page worth of efx_tso_header structures, and string them - * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. - */ -static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) +static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, unsigned int len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - struct efx_tso_header *tsoh; - dma_addr_t dma_addr; - u8 *base_kva, *kva; + u8 *result; - base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC); - if (base_kva == NULL) { - netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev, - "Unable to allocate page for TSO headers\n"); - return -ENOMEM; - } - - /* dma_alloc_coherent() allocates pages. */ - EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); - - for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { - tsoh = (struct efx_tso_header *)kva; - tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); - tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh; - } - - return 0; -} - - -/* Free up a TSO header, and all others in the same page. */ -static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, - struct device *dma_dev) -{ - struct efx_tso_header **p; - unsigned long base_kva; - dma_addr_t base_dma; - - base_kva = (unsigned long)tsoh & PAGE_MASK; - base_dma = tsoh->dma_addr & PAGE_MASK; - - p = &tx_queue->tso_headers_free; - while (*p != NULL) { - if (((unsigned long)*p & PAGE_MASK) == base_kva) - *p = (*p)->next; - else - p = &(*p)->next; - } + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); - dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma); -} + if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) { + unsigned index = + (tx_queue->insert_count & tx_queue->ptr_mask) / 2; + struct efx_buffer *page_buf = + &tx_queue->tsoh_page[index / TSOH_PER_PAGE]; + unsigned offset = + TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET; + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE)) + return NULL; + + result = (u8 *)page_buf->addr + offset; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->flags = EFX_TX_BUF_CONT; + } else { + tx_queue->tso_long_headers++; -static struct efx_tso_header * -efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) -{ - struct efx_tso_header *tsoh; - - tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); - if (unlikely(!tsoh)) - return NULL; - - tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - TSOH_BUFFER(tsoh), header_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr))) { - kfree(tsoh); - return NULL; + buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC); + if (unlikely(!buffer->heap_buf)) + return NULL; + result = (u8 *)buffer->heap_buf + TSOH_OFFSET; + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; } - tsoh->unmap_len = header_len; - return tsoh; -} + buffer->len = len; -static void -efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) -{ - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr, tsoh->unmap_len, - DMA_TO_DEVICE); - kfree(tsoh); + return result; } /** @@ -781,47 +736,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) * @len: Length of fragment * @final_buffer: The final buffer inserted into the queue * - * Push descriptors onto the TX queue. Return 0 on success or 1 if - * @tx_queue full. + * Push descriptors onto the TX queue. */ -static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, unsigned len, - struct efx_tx_buffer **final_buffer) +static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned len, + struct efx_tx_buffer **final_buffer) { struct efx_tx_buffer *buffer; struct efx_nic *efx = tx_queue->efx; - unsigned dma_len, fill_level, insert_ptr; - int q_space; + unsigned dma_len, insert_ptr; EFX_BUG_ON_PARANOID(len <= 0); - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - /* -1 as there is no way to represent all descriptors used */ - q_space = efx->txq_entries - 1 - fill_level; - while (1) { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have happened - * since the xmit path last checked. Update - * the xmit path's copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the change of - * queue state from the access of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - *final_buffer = NULL; - return 1; - } - smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; ++tx_queue->insert_count; @@ -830,12 +757,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, tx_queue->read_count >= efx->txq_entries); - efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->len); EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); + EFX_BUG_ON_PARANOID(buffer->flags); buffer->dma_addr = dma_addr; @@ -845,7 +769,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, if (dma_len >= len) break; - buffer->len = dma_len; /* Don't set the other members */ + buffer->len = dma_len; + buffer->flags = EFX_TX_BUF_CONT; dma_addr += dma_len; len -= dma_len; } @@ -853,7 +778,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, EFX_BUG_ON_PARANOID(!len); buffer->len = len; *final_buffer = buffer; - return 0; } @@ -864,54 +788,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * a single fragment, and we know it doesn't cross a page boundary. It * also allows us to not worry about end-of-packet etc. */ -static void efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, unsigned len) +static int efx_tso_put_header(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, u8 *header) { - struct efx_tx_buffer *buffer; - - buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); - buffer->len = len; - buffer->dma_addr = tsoh->dma_addr; - buffer->tsoh = tsoh; + if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { + buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, + header, buffer->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, + buffer->dma_addr))) { + kfree(buffer->heap_buf); + buffer->len = 0; + buffer->flags = 0; + return -ENOMEM; + } + buffer->unmap_len = buffer->len; + buffer->flags |= EFX_TX_BUF_MAP_SINGLE; + } ++tx_queue->insert_count; + return 0; } -/* Remove descriptors put into a tx_queue. */ +/* Remove buffers put into a tx_queue. None of the buffers must have + * an skb attached. + */ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) { struct efx_tx_buffer *buffer; - dma_addr_t unmap_addr; /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->skb); - if (buffer->unmap_len) { - unmap_addr = (buffer->dma_addr + buffer->len - - buffer->unmap_len); - if (buffer->unmap_single) - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - buffer->len = 0; - buffer->continuation = true; + efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); } } @@ -919,17 +831,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Parse the SKB header and initialise state. */ static void tso_start(struct tso_state *st, const struct sk_buff *skb) { - /* All ethernet/IP/TCP headers combined size is TCP header size - * plus offset of TCP header relative to start of packet. - */ - st->header_len = ((tcp_hdr(skb)->doff << 2u) - + PTR_DIFF(tcp_hdr(skb), skb->data)); - st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size; - - if (st->protocol == htons(ETH_P_IP)) + st->ip_off = skb_network_header(skb) - skb->data; + st->tcp_off = skb_transport_header(skb) - skb->data; + st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + if (st->protocol == htons(ETH_P_IP)) { + st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); - else + } else { + st->ip_base_len = st->header_len - st->tcp_off; st->ipv4_id = 0; + } st->seqnum = ntohl(tcp_hdr(skb)->seq); EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); @@ -938,7 +849,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb) st->out_len = skb->len - st->header_len; st->unmap_len = 0; - st->unmap_single = false; + st->dma_flags = 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -947,7 +858,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = false; + st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; @@ -965,7 +876,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, len, DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = true; + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; st->unmap_len = len; st->in_len = len; st->dma_addr = st->unmap_addr; @@ -982,20 +893,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, * @st: TSO state * * Form descriptors for the current fragment, until we reach the end - * of fragment or end-of-packet. Return 0 on success, 1 if not enough - * space in @tx_queue. + * of fragment or end-of-packet. */ -static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) +static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) { struct efx_tx_buffer *buffer; - int n, end_of_packet, rc; + int n; if (st->in_len == 0) - return 0; + return; if (st->packet_space == 0) - return 0; + return; EFX_BUG_ON_PARANOID(st->in_len <= 0); EFX_BUG_ON_PARANOID(st->packet_space <= 0); @@ -1006,25 +916,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, st->out_len -= n; st->in_len -= n; - rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - if (likely(rc == 0)) { - if (st->out_len == 0) - /* Transfer ownership of the skb */ - buffer->skb = skb; + efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - end_of_packet = st->out_len == 0 || st->packet_space == 0; - buffer->continuation = !end_of_packet; + if (st->out_len == 0) { + /* Transfer ownership of the skb */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + } else if (st->packet_space != 0) { + buffer->flags = EFX_TX_BUF_CONT; + } - if (st->in_len == 0) { - /* Transfer ownership of the DMA mapping */ - buffer->unmap_len = st->unmap_len; - buffer->unmap_single = st->unmap_single; - st->unmap_len = 0; - } + if (st->in_len == 0) { + /* Transfer ownership of the DMA mapping */ + buffer->unmap_len = st->unmap_len; + buffer->flags |= st->dma_flags; + st->unmap_len = 0; } st->dma_addr += n; - return rc; } @@ -1035,36 +944,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, * @st: TSO state * * Generate a new header and prepare for the new packet. Return 0 on - * success, or -1 if failed to alloc header. + * success, or -%ENOMEM if failed to alloc header. */ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, struct tso_state *st) { - struct efx_tso_header *tsoh; + struct efx_tx_buffer *buffer = + &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; struct tcphdr *tsoh_th; unsigned ip_length; u8 *header; + int rc; - /* Allocate a DMA-mapped header buffer. */ - if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { - if (tx_queue->tso_headers_free == NULL) { - if (efx_tsoh_block_alloc(tx_queue)) - return -1; - } - EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); - tsoh = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh->next; - tsoh->unmap_len = 0; - } else { - tx_queue->tso_long_headers++; - tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len); - if (unlikely(!tsoh)) - return -1; - } + /* Allocate and insert a DMA-mapped header buffer. */ + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; - header = TSOH_BUFFER(tsoh); - tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); + tsoh_th = (struct tcphdr *)(header + st->tcp_off); /* Copy and update the headers. */ memcpy(header, skb->data, st->header_len); @@ -1073,19 +971,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->seqnum += skb_shinfo(skb)->gso_size; if (st->out_len > skb_shinfo(skb)->gso_size) { /* This packet will not finish the TSO burst. */ - ip_length = st->full_packet_size - ETH_HDR_LEN(skb); + st->packet_space = skb_shinfo(skb)->gso_size; tsoh_th->fin = 0; tsoh_th->psh = 0; } else { /* This packet will be the last in the TSO burst. */ - ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; + st->packet_space = st->out_len; tsoh_th->fin = tcp_hdr(skb)->fin; tsoh_th->psh = tcp_hdr(skb)->psh; } + ip_length = st->ip_base_len + st->packet_space; if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + SKB_IPV4_OFF(skb)); + struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off); tsoh_iph->tot_len = htons(ip_length); @@ -1094,16 +992,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->ipv4_id++; } else { struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb)); + (struct ipv6hdr *)(header + st->ip_off); - tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); + tsoh_iph->payload_len = htons(ip_length); } - st->packet_space = skb_shinfo(skb)->gso_size; - ++tx_queue->tso_packets; + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; - /* Form a descriptor for this header. */ - efx_tso_put_header(tx_queue, tsoh, st->header_len); + ++tx_queue->tso_packets; return 0; } @@ -1118,13 +1016,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK or %NETDEV_TX_BUSY. + * %NETDEV_TX_OK. */ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; - int frag_i, rc, rc2 = NETDEV_TX_OK; + int frag_i, rc; struct tso_state state; /* Find the packet protocol and sanity-check it */ @@ -1156,11 +1054,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, goto mem_err; while (1) { - rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); - if (unlikely(rc)) { - rc2 = NETDEV_TX_BUSY; - goto unwind; - } + tso_fill_packet_with_fragment(tx_queue, skb, &state); /* Move onto the next fragment? */ if (state.in_len == 0) { @@ -1184,6 +1078,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + tx_queue->tso_bursts++; return NETDEV_TX_OK; @@ -1192,10 +1088,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, "Out of memory for TSO headers, or DMA mapping error\n"); dev_kfree_skb_any(skb); - unwind: /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { - if (state.unmap_single) + if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); else @@ -1204,25 +1099,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, } efx_enqueue_unwind(tx_queue); - return rc2; -} - - -/* - * Free up all TSO datastructures associated with tx_queue. This - * routine should be called only once the tx_queue is both empty and - * will no longer be used. - */ -static void efx_fini_tso(struct efx_tx_queue *tx_queue) -{ - unsigned i; - - if (tx_queue->buffer) { - for (i = 0; i <= tx_queue->ptr_mask; ++i) - efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); - } - - while (tx_queue->tso_headers_free != NULL) - efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, - &tx_queue->efx->pci_dev->dev); + return NETDEV_TX_OK; } |