diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-25 20:02:57 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-25 20:02:57 -0800 |
| commit | 386403a115f95997c2715691226e11a7b5cffcfd (patch) | |
| tree | a685df70bd3d5b295683713818ddf0752c3d75b6 /drivers/net/ethernet/sfc/efx.c | |
| parent | 642356cb5f4a8c82b5ca5ebac288c327d10df236 (diff) | |
| parent | 622dc5ad8052f4f0c6b7a12787696a5caa3c6a58 (diff) | |
| download | blackbird-op-linux-386403a115f95997c2715691226e11a7b5cffcfd.tar.gz blackbird-op-linux-386403a115f95997c2715691226e11a7b5cffcfd.zip | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
"Another merge window, another pull full of stuff:
1) Support alternative names for network devices, from Jiri Pirko.
2) Introduce per-netns netdev notifiers, also from Jiri Pirko.
3) Support MSG_PEEK in vsock/virtio, from Matias Ezequiel Vara
Larsen.
4) Allow compiling out the TLS TOE code, from Jakub Kicinski.
5) Add several new tracepoints to the kTLS code, also from Jakub.
6) Support set channels ethtool callback in ena driver, from Sameeh
Jubran.
7) New SCTP events SCTP_ADDR_ADDED, SCTP_ADDR_REMOVED,
SCTP_ADDR_MADE_PRIM, and SCTP_SEND_FAILED_EVENT. From Xin Long.
8) Add XDP support to mvneta driver, from Lorenzo Bianconi.
9) Lots of netfilter hw offload fixes, cleanups and enhancements,
from Pablo Neira Ayuso.
10) PTP support for aquantia chips, from Egor Pomozov.
11) Add UDP segmentation offload support to igb, ixgbe, and i40e. From
Josh Hunt.
12) Add smart nagle to tipc, from Jon Maloy.
13) Support L2 field rewrite by TC offloads in bnxt_en, from Venkat
Duvvuru.
14) Add a flow mask cache to OVS, from Tonghao Zhang.
15) Add XDP support to ice driver, from Maciej Fijalkowski.
16) Add AF_XDP support to ice driver, from Krzysztof Kazimierczak.
17) Support UDP GSO offload in atlantic driver, from Igor Russkikh.
18) Support it in stmmac driver too, from Jose Abreu.
19) Support TIPC encryption and auth, from Tuong Lien.
20) Introduce BPF trampolines, from Alexei Starovoitov.
21) Make page_pool API more numa friendly, from Saeed Mahameed.
22) Introduce route hints to ipv4 and ipv6, from Paolo Abeni.
23) Add UDP segmentation offload to cxgb4, Rahul Lakkireddy"
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1857 commits)
libbpf: Fix usage of u32 in userspace code
mm: Implement no-MMU variant of vmalloc_user_node_flags
slip: Fix use-after-free Read in slip_open
net: dsa: sja1105: fix sja1105_parse_rgmii_delays()
macvlan: schedule bc_work even if error
enetc: add support Credit Based Shaper(CBS) for hardware offload
net: phy: add helpers phy_(un)lock_mdio_bus
mdio_bus: don't use managed reset-controller
ax88179_178a: add ethtool_op_get_ts_info()
mlxsw: spectrum_router: Fix use of uninitialized adjacency index
mlxsw: spectrum_router: After underlay moves, demote conflicting tunnels
bpf: Simplify __bpf_arch_text_poke poke type handling
bpf: Introduce BPF_TRACE_x helper for the tracing tests
bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT
bpf, testing: Add various tail call test cases
bpf, x86: Emit patchable direct jump as tail call
bpf: Constant map key tracking for prog array pokes
bpf: Add poke dependency tracking for prog array maps
bpf: Add initial poke descriptor table for jit images
bpf: Move owner type, jited info into array auxiliary data
...
Diffstat (limited to 'drivers/net/ethernet/sfc/efx.c')
| -rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 283 |
1 files changed, 244 insertions, 39 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 2fef7402233e..992c773620ec 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -226,6 +226,10 @@ static void efx_fini_napi_channel(struct efx_channel *channel); static void efx_fini_struct(struct efx_nic *efx); static void efx_start_all(struct efx_nic *efx); static void efx_stop_all(struct efx_nic *efx); +static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog); +static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); +static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, + u32 flags); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ @@ -340,6 +344,8 @@ static int efx_poll(struct napi_struct *napi, int budget) spent = efx_process_channel(channel, budget); + xdp_do_flush_map(); + if (spent < budget) { if (efx_channel_has_rx_queue(channel) && efx->irq_rx_adaptive && @@ -349,7 +355,7 @@ static int efx_poll(struct napi_struct *napi, int budget) #ifdef CONFIG_RFS_ACCEL /* Perhaps expire some ARFS filters */ - schedule_work(&channel->filter_work); + mod_delayed_work(system_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will @@ -481,7 +487,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) } #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif rx_queue = &channel->rx_queue; @@ -527,7 +533,7 @@ efx_copy_channel(const struct efx_channel *old_channel) memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif return channel; @@ -579,9 +585,14 @@ efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) int number; number = channel->channel; - if (efx->tx_channel_offset == 0) { + + if (number >= efx->xdp_channel_offset && + !WARN_ON_ONCE(!efx->n_xdp_channels)) { + type = "-xdp"; + number -= efx->xdp_channel_offset; + } else if (efx->tx_channel_offset == 0) { type = ""; - } else if (channel->channel < efx->tx_channel_offset) { + } else if (number < efx->tx_channel_offset) { type = "-rx"; } else { type = "-tx"; @@ -651,7 +662,7 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len = (efx->rx_prefix_size + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + + rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + efx->rx_ip_align + efx->rx_dma_len); if (rx_buf_len <= PAGE_SIZE) { efx->rx_scatter = efx->type->always_rx_scatter; @@ -774,6 +785,7 @@ static void efx_stop_datapath(struct efx_nic *efx) efx_for_each_possible_channel_tx_queue(tx_queue, channel) efx_fini_tx_queue(tx_queue); } + efx->xdp_rxq_info_failed = false; } static void efx_remove_channel(struct efx_channel *channel) @@ -798,6 +810,8 @@ static void efx_remove_channels(struct efx_nic *efx) efx_for_each_channel(channel, efx) efx_remove_channel(channel); + + kfree(efx->xdp_tx_queues); } int @@ -1435,6 +1449,101 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) return count; } +static int efx_allocate_msix_channels(struct efx_nic *efx, + unsigned int max_channels, + unsigned int extra_channels, + unsigned int parallelism) +{ + unsigned int n_channels = parallelism; + int vec_count; + int n_xdp_tx; + int n_xdp_ev; + + if (efx_separate_tx_channels) + n_channels *= 2; + n_channels += extra_channels; + + /* To allow XDP transmit to happen from arbitrary NAPI contexts + * we allocate a TX queue per CPU. We share event queues across + * multiple tx queues, assuming tx and ev queues are both + * maximum size. + */ + + n_xdp_tx = num_possible_cpus(); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); + + /* Check resources. + * We need a channel per event queue, plus a VI per tx queue. + * This may be more pessimistic than it needs to be. + */ + if (n_channels + n_xdp_ev > max_channels) { + netif_err(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; + } else { + efx->n_xdp_channels = n_xdp_ev; + efx->xdp_tx_per_channel = EFX_TXQ_TYPES; + efx->xdp_tx_queue_count = n_xdp_tx; + n_channels += n_xdp_ev; + netif_dbg(efx, drv, efx->net_dev, + "Allocating %d TX and %d event queues for XDP\n", + n_xdp_tx, n_xdp_ev); + } + + n_channels = min(n_channels, max_channels); + + vec_count = pci_msix_vec_count(efx->pci_dev); + if (vec_count < 0) + return vec_count; + if (vec_count < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", + vec_count, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = vec_count; + } + + efx->n_channels = n_channels; + + /* Do not create the PTP TX queue(s) if PTP uses the MC directly. */ + if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx)) + n_channels--; + + /* Ignore XDP tx channels when creating rx channels. */ + n_channels -= efx->n_xdp_channels; + + if (efx_separate_tx_channels) { + efx->n_tx_channels = + min(max(n_channels / 2, 1U), + efx->max_tx_channels); + efx->tx_channel_offset = + n_channels - efx->n_tx_channels; + efx->n_rx_channels = + max(n_channels - + efx->n_tx_channels, 1U); + } else { + efx->n_tx_channels = min(n_channels, efx->max_tx_channels); + efx->tx_channel_offset = 0; + efx->n_rx_channels = n_channels; + } + + if (efx->n_xdp_channels) + efx->xdp_channel_offset = efx->tx_channel_offset + + efx->n_tx_channels; + else + efx->xdp_channel_offset = efx->n_channels; + + netif_dbg(efx, drv, efx->net_dev, + "Allocating %u RX channels\n", + efx->n_rx_channels); + + return efx->n_channels; +} + /* Probe the number and type of interrupts we are able to obtain, and * the resulting numbers of channels and RX queues. */ @@ -1449,19 +1558,19 @@ static int efx_probe_interrupts(struct efx_nic *efx) ++extra_channels; if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + unsigned int parallelism = efx_wanted_parallelism(efx); struct msix_entry xentries[EFX_MAX_CHANNELS]; unsigned int n_channels; - n_channels = efx_wanted_parallelism(efx); - if (efx_separate_tx_channels) - n_channels *= 2; - n_channels += extra_channels; - n_channels = min(n_channels, efx->max_channels); - - for (i = 0; i < n_channels; i++) - xentries[i].entry = i; - rc = pci_enable_msix_range(efx->pci_dev, - xentries, 1, n_channels); + rc = efx_allocate_msix_channels(efx, efx->max_channels, + extra_channels, parallelism); + if (rc >= 0) { + n_channels = rc; + for (i = 0; i < n_channels; i++) + xentries[i].entry = i; + rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, + n_channels); + } if (rc < 0) { /* Fall back to single channel MSI */ netif_err(efx, drv, efx->net_dev, @@ -1480,21 +1589,6 @@ static int efx_probe_interrupts(struct efx_nic *efx) } if (rc > 0) { - efx->n_channels = n_channels; - if (n_channels > extra_channels) - n_channels -= extra_channels; - if (efx_separate_tx_channels) { - efx->n_tx_channels = min(max(n_channels / 2, - 1U), - efx->max_tx_channels); - efx->n_rx_channels = max(n_channels - - efx->n_tx_channels, - 1U); - } else { - efx->n_tx_channels = min(n_channels, - efx->max_tx_channels); - efx->n_rx_channels = n_channels; - } for (i = 0; i < efx->n_channels; i++) efx_get_channel(efx, i)->irq = xentries[i].vector; @@ -1506,6 +1600,8 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -1524,12 +1620,14 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; } - /* Assign extra channels if possible */ + /* Assign extra channels if possible, before XDP channels */ efx->n_extra_tx_channels = 0; - j = efx->n_channels; + j = efx->xdp_channel_offset; for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { if (!efx->extra_channel_type[i]) continue; @@ -1724,29 +1822,50 @@ static void efx_remove_interrupts(struct efx_nic *efx) efx->legacy_irq = 0; } -static void efx_set_channels(struct efx_nic *efx) +static int efx_set_channels(struct efx_nic *efx) { struct efx_channel *channel; struct efx_tx_queue *tx_queue; + int xdp_queue_number; efx->tx_channel_offset = efx_separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; + if (efx->xdp_tx_queue_count) { + EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); + + /* Allocate array for XDP TX queue lookup. */ + efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, + sizeof(*efx->xdp_tx_queues), + GFP_KERNEL); + if (!efx->xdp_tx_queues) + return -ENOMEM; + } + /* We need to mark which channels really have RX and TX * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels. */ + xdp_queue_number = 0; efx_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels) channel->rx_queue.core_index = channel->channel; else channel->rx_queue.core_index = -1; - efx_for_each_channel_tx_queue(tx_queue, channel) + efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue -= (efx->tx_channel_offset * EFX_TXQ_TYPES); + + if (efx_channel_is_xdp_tx(channel) && + xdp_queue_number < efx->xdp_tx_queue_count) { + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + xdp_queue_number++; + } + } } + return 0; } static int efx_probe_nic(struct efx_nic *efx) @@ -1776,7 +1895,9 @@ static int efx_probe_nic(struct efx_nic *efx) if (rc) goto fail1; - efx_set_channels(efx); + rc = efx_set_channels(efx); + if (rc) + goto fail1; /* dimension_resources can fail with EAGAIN */ rc = efx->type->dimension_resources(efx); @@ -1848,6 +1969,8 @@ static int efx_probe_filters(struct efx_nic *efx) ++i) channel->rps_flow_id[i] = RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; } if (!success) { @@ -1857,8 +1980,6 @@ static int efx_probe_filters(struct efx_nic *efx) rc = -ENOMEM; goto out_unlock; } - - efx->rps_expire_index = efx->rps_expire_channel = 0; } #endif out_unlock: @@ -1872,8 +1993,10 @@ static void efx_remove_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL struct efx_channel *channel; - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { + cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); + } #endif down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); @@ -2022,6 +2145,10 @@ static void efx_stop_all(struct efx_nic *efx) static void efx_remove_all(struct efx_nic *efx) { + rtnl_lock(); + efx_xdp_setup_prog(efx, NULL); + rtnl_unlock(); + efx_remove_channels(efx); efx_remove_filters(efx); #ifdef CONFIG_SFC_SRIOV @@ -2082,6 +2209,8 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, channel->irq_moderation_us = rx_usecs; else if (efx_channel_has_tx_queues(channel)) channel->irq_moderation_us = tx_usecs; + else if (efx_channel_is_xdp_tx(channel)) + channel->irq_moderation_us = tx_usecs; } return 0; @@ -2277,6 +2406,17 @@ static void efx_watchdog(struct net_device *net_dev) efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); } +static unsigned int efx_xdp_max_mtu(struct efx_nic *efx) +{ + /* The maximum MTU that we can fit in a single page, allowing for + * framing, overhead and XDP headroom. + */ + int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + + efx->rx_prefix_size + efx->type->rx_buffer_padding + + efx->rx_ip_align + XDP_PACKET_HEADROOM; + + return PAGE_SIZE - overhead; +} /* Context: process, rtnl_lock() held. */ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) @@ -2288,6 +2428,14 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) if (rc) return rc; + if (rtnl_dereference(efx->xdp_prog) && + new_mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big for XDP (max: %d)\n", + new_mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); efx_device_detach_sync(efx); @@ -2489,8 +2637,65 @@ static const struct net_device_ops efx_netdev_ops = { #endif .ndo_udp_tunnel_add = efx_udp_tunnel_add, .ndo_udp_tunnel_del = efx_udp_tunnel_del, + .ndo_xdp_xmit = efx_xdp_xmit, + .ndo_bpf = efx_xdp }; +static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + if (efx->xdp_rxq_info_failed) { + netif_err(efx, drv, efx->net_dev, + "Unable to bind XDP program due to previous failure of rxq_info\n"); + return -EINVAL; + } + + if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Unable to configure XDP with MTU of %d (max: %d)\n", + efx->net_dev->mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + + old_prog = rtnl_dereference(efx->xdp_prog); + rcu_assign_pointer(efx->xdp_prog, prog); + /* Release the reference that was originally passed by the caller. */ + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +/* Context: process, rtnl_lock() held. */ +static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct efx_nic *efx = netdev_priv(dev); + struct bpf_prog *xdp_prog; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return efx_xdp_setup_prog(efx, xdp->prog); + case XDP_QUERY_PROG: + xdp_prog = rtnl_dereference(efx->xdp_prog); + xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + +static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, + u32 flags) +{ + struct efx_nic *efx = netdev_priv(dev); + + if (!netif_running(dev)) + return -EINVAL; + + return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH); +} + static void efx_update_name(struct efx_nic *efx) { strcpy(efx->name, efx->net_dev->name); |

