diff options
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r-- | drivers/net/hyperv/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 34 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc.c | 69 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_bpf.c | 218 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 293 | ||||
-rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 30 |
6 files changed, 576 insertions, 70 deletions
diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile index 3a2aa0708166..0db7ccaec4a4 100644 --- a/drivers/net/hyperv/Makefile +++ b/drivers/net/hyperv/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o -hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o +hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o netvsc_bpf.o diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ecc9af050387..abda736e7c7d 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -142,6 +142,8 @@ struct netvsc_device_info { u32 send_section_size; u32 recv_section_size; + struct bpf_prog *bprog; + u8 rss_key[NETVSC_HASH_KEYLEN]; }; @@ -169,7 +171,6 @@ struct rndis_device { u8 hw_mac_adr[ETH_ALEN]; u8 rss_key[NETVSC_HASH_KEYLEN]; - u16 rx_table[ITAB_NUM]; }; @@ -190,7 +191,8 @@ int netvsc_send(struct net_device *net, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, struct hv_page_buffer *page_buffer, - struct sk_buff *skb); + struct sk_buff *skb, + bool xdp_tx); void netvsc_linkstatus_callback(struct net_device *net, struct rndis_message *resp); int netvsc_recv_callback(struct net_device *net, @@ -199,6 +201,16 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); +u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, + struct xdp_buff *xdp); +unsigned int netvsc_xdp_fraglen(unsigned int len); +struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev); +int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack, + struct netvsc_device *nvdev); +int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog); +int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf); + int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev, struct netvsc_device_info *dev_info); @@ -609,7 +621,8 @@ struct nvsp_5_send_indirect_table { /* The number of entries in the send indirection table */ u32 count; - /* The offset of the send indirection table from top of this struct. + /* The offset of the send indirection table from the beginning of + * struct nvsp_message. * The send indirection table tells which channel to put the send * traffic on. Each entry is a channel number. */ @@ -822,7 +835,8 @@ struct nvsp_message { #define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \ NETIF_F_TSO | NETIF_F_IPV6_CSUM | \ - NETIF_F_TSO6 | NETIF_F_LRO) + NETIF_F_TSO6 | NETIF_F_LRO | \ + NETIF_F_SG | NETIF_F_RXHASH) #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ #define VRSS_CHANNEL_MAX 64 @@ -831,6 +845,8 @@ struct nvsp_message { #define RNDIS_MAX_PKT_DEFAULT 8 #define RNDIS_PKT_ALIGN_DEFAULT 8 +#define NETVSC_XDP_HDRM 256 + struct multi_send_data { struct sk_buff *skb; /* skb containing the pkt */ struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ @@ -853,6 +869,7 @@ struct multi_recv_comp { struct nvsc_rsc { const struct ndis_pkt_8021q_info *vlan; const struct ndis_tcp_ip_checksum_info *csum_info; + const u32 *hash_info; u8 is_last; /* last RNDIS msg in a vmtransfer_page */ u32 cnt; /* #fragments in an RSC packet */ u32 pktlen; /* Full packet length */ @@ -865,6 +882,7 @@ struct netvsc_stats { u64 bytes; u64 broadcast; u64 multicast; + u64 xdp_drop; struct u64_stats_sync syncp; }; @@ -937,6 +955,8 @@ struct net_device_context { u32 tx_table[VRSS_SEND_TAB_SIZE]; + u16 rx_table[ITAB_NUM]; + /* Ethtool settings */ u8 duplex; u32 speed; @@ -952,6 +972,9 @@ struct net_device_context { u32 vf_alloc; /* Serial number of the VF to team with */ u32 vf_serial; + + /* Used to temporarily save the config info across hibernation */ + struct netvsc_device_info *saved_netvsc_dev_info; }; /* Per channel data */ @@ -965,6 +988,9 @@ struct netvsc_channel { atomic_t queue_sends; struct nvsc_rsc rsc; + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; + struct netvsc_stats tx_stats; struct netvsc_stats rx_stats; }; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d22a36fc7a7c..ae3f3084c2ed 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -122,8 +122,10 @@ static void free_netvsc_device(struct rcu_head *head) vfree(nvdev->send_buf); kfree(nvdev->send_section_map); - for (i = 0; i < VRSS_CHANNEL_MAX; i++) + for (i = 0; i < VRSS_CHANNEL_MAX; i++) { + xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq); vfree(nvdev->chan_table[i].mrc.slots); + } kfree(nvdev); } @@ -900,7 +902,8 @@ int netvsc_send(struct net_device *ndev, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, struct hv_page_buffer *pb, - struct sk_buff *skb) + struct sk_buff *skb, + bool xdp_tx) { struct net_device_context *ndev_ctx = netdev_priv(ndev); struct netvsc_device *net_device @@ -923,10 +926,11 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = NETVSC_INVALID_INDEX; packet->cp_partial = false; - /* Send control message directly without accessing msd (Multi-Send - * Data) field which may be changed during data packet processing. + /* Send a control message or XDP packet directly without accessing + * msd (Multi-Send Data) field which may be changed during data packet + * processing. */ - if (!skb) + if (!skb || xdp_tx) return netvsc_send_pkt(device, packet, net_device, pb, skb); /* batch packets in send buffer if possible */ @@ -1178,20 +1182,39 @@ static int netvsc_receive(struct net_device *ndev, } static void netvsc_send_table(struct net_device *ndev, - const struct nvsp_message *nvmsg) + struct netvsc_device *nvscdev, + const struct nvsp_message *nvmsg, + u32 msglen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - u32 count, *tab; + u32 count, offset, *tab; int i; count = nvmsg->msg.v5_msg.send_table.count; + offset = nvmsg->msg.v5_msg.send_table.offset; + if (count != VRSS_SEND_TAB_SIZE) { netdev_err(ndev, "Received wrong send-table size:%u\n", count); return; } - tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + - nvmsg->msg.v5_msg.send_table.offset); + /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be + * wrong due to a host bug. So fix the offset here. + */ + if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 && + msglen >= sizeof(struct nvsp_message_header) + + sizeof(union nvsp_6_message_uber) + count * sizeof(u32)) + offset = sizeof(struct nvsp_message_header) + + sizeof(union nvsp_6_message_uber); + + /* Boundary check for all versions */ + if (offset > msglen - count * sizeof(u32)) { + netdev_err(ndev, "Received send-table offset too big:%u\n", + offset); + return; + } + + tab = (void *)nvmsg + offset; for (i = 0; i < count; i++) net_device_ctx->tx_table[i] = tab[i]; @@ -1209,12 +1232,14 @@ static void netvsc_send_vf(struct net_device *ndev, net_device_ctx->vf_alloc ? "added" : "removed"); } -static void netvsc_receive_inband(struct net_device *ndev, - const struct nvsp_message *nvmsg) +static void netvsc_receive_inband(struct net_device *ndev, + struct netvsc_device *nvscdev, + const struct nvsp_message *nvmsg, + u32 msglen) { switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: - netvsc_send_table(ndev, nvmsg); + netvsc_send_table(ndev, nvscdev, nvmsg, msglen); break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: @@ -1232,6 +1257,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device, { struct vmbus_channel *channel = nvchan->channel; const struct nvsp_message *nvmsg = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); trace_nvsp_recv(ndev, channel, nvmsg); @@ -1247,7 +1273,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(ndev, nvmsg); + netvsc_receive_inband(ndev, net_device, nvmsg, msglen); break; default: @@ -1370,6 +1396,21 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, nvchan->net_device = net_device; u64_stats_init(&nvchan->tx_stats.syncp); u64_stats_init(&nvchan->rx_stats.syncp); + + ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i); + + if (ret) { + netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret); + goto cleanup2; + } + + ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + + if (ret) { + netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret); + goto cleanup2; + } } /* Enable NAPI handler before init callbacks */ @@ -1415,6 +1456,8 @@ close: cleanup: netif_napi_del(&net_device->chan_table[0].napi); + +cleanup2: free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c new file mode 100644 index 000000000000..b86611041db6 --- /dev/null +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019, Microsoft Corporation. + * + * Author: + * Haiyang Zhang <haiyangz@microsoft.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/kernel.h> +#include <net/xdp.h> + +#include <linux/mutex.h> +#include <linux/rtnetlink.h> + +#include "hyperv_net.h" + +u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, + struct xdp_buff *xdp) +{ + void *data = nvchan->rsc.data[0]; + u32 len = nvchan->rsc.len[0]; + struct page *page = NULL; + struct bpf_prog *prog; + u32 act = XDP_PASS; + + xdp->data_hard_start = NULL; + + rcu_read_lock(); + prog = rcu_dereference(nvchan->bpf_prog); + + if (!prog) + goto out; + + /* allocate page buffer for data */ + page = alloc_page(GFP_ATOMIC); + if (!page) { + act = XDP_DROP; + goto out; + } + + xdp->data_hard_start = page_address(page); + xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM; + xdp_set_data_meta_invalid(xdp); + xdp->data_end = xdp->data + len; + xdp->rxq = &nvchan->xdp_rxq; + xdp->handle = 0; + + memcpy(xdp->data, data, len); + + act = bpf_prog_run_xdp(prog, xdp); + + switch (act) { + case XDP_PASS: + case XDP_TX: + case XDP_DROP: + break; + + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + break; + + default: + bpf_warn_invalid_xdp_action(act); + } + +out: + rcu_read_unlock(); + + if (page && act != XDP_PASS && act != XDP_TX) { + __free_page(page); + xdp->data_hard_start = NULL; + } + + return act; +} + +unsigned int netvsc_xdp_fraglen(unsigned int len) +{ + return SKB_DATA_ALIGN(len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +} + +struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev) +{ + return rtnl_dereference(nvdev->chan_table[0].bpf_prog); +} + +int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack, + struct netvsc_device *nvdev) +{ + struct bpf_prog *old_prog; + int buf_max, i; + + old_prog = netvsc_xdp_get(nvdev); + + if (!old_prog && !prog) + return 0; + + buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN); + if (prog && buf_max > PAGE_SIZE) { + netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n", + dev->mtu, buf_max); + NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large"); + + return -EOPNOTSUPP; + } + + if (prog && (dev->features & NETIF_F_LRO)) { + netdev_err(dev, "XDP: not support LRO\n"); + NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO"); + + return -EOPNOTSUPP; + } + + if (prog) + bpf_prog_add(prog, nvdev->num_chn - 1); + + for (i = 0; i < nvdev->num_chn; i++) + rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog); + + if (old_prog) + for (i = 0; i < nvdev->num_chn; i++) + bpf_prog_put(old_prog); + + return 0; +} + +int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) +{ + struct netdev_bpf xdp; + bpf_op_t ndo_bpf; + int ret; + + ASSERT_RTNL(); + + if (!vf_netdev) + return 0; + + ndo_bpf = vf_netdev->netdev_ops->ndo_bpf; + if (!ndo_bpf) + return 0; + + memset(&xdp, 0, sizeof(xdp)); + + if (prog) + bpf_prog_inc(prog); + + xdp.command = XDP_SETUP_PROG; + xdp.prog = prog; + + ret = ndo_bpf(vf_netdev, &xdp); + + if (ret && prog) + bpf_prog_put(prog); + + return ret; +} + +static u32 netvsc_xdp_query(struct netvsc_device *nvdev) +{ + struct bpf_prog *prog = netvsc_xdp_get(nvdev); + + if (prog) + return prog->aux->id; + + return 0; +} + +int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + struct net_device_context *ndevctx = netdev_priv(dev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); + struct netlink_ext_ack *extack = bpf->extack; + int ret; + + if (!nvdev || nvdev->destroy) { + if (bpf->command == XDP_QUERY_PROG) { + bpf->prog_id = 0; + return 0; /* Query must always succeed */ + } else { + return -ENODEV; + } + } + + switch (bpf->command) { + case XDP_SETUP_PROG: + ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev); + + if (ret) + return ret; + + ret = netvsc_vf_setxdp(vf_netdev, bpf->prog); + + if (ret) { + netdev_err(dev, "vf_setxdp failed:%d\n", ret); + NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed"); + + netvsc_xdp_set(dev, NULL, extack, nvdev); + } + + return ret; + + case XDP_QUERY_PROG: + bpf->prog_id = netvsc_xdp_query(nvdev); + return 0; + + default: + return -EINVAL; + } +} diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e8fce6d715ef..65e12cb07f45 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/netpoll.h> +#include <linux/bpf.h> #include <net/arp.h> #include <net/route.h> @@ -285,9 +286,9 @@ static inline u32 netvsc_get_hash( else if (flow.basic.n_proto == htons(ETH_P_IPV6)) hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); else - hash = 0; + return 0; - skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + __skb_set_sw_hash(skb, hash, false); } return hash; @@ -435,7 +436,7 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, skb_frag_t *frag = skb_shinfo(skb)->frags + i; slots_used += fill_pg_buf(skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), &pb[slots_used]); } return slots_used; @@ -449,7 +450,7 @@ static int count_skb_frag_slots(struct sk_buff *skb) for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; + unsigned long offset = skb_frag_off(frag); /* Skip unused frames from start of page */ offset &= ~PAGE_MASK; @@ -519,7 +520,7 @@ static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev, return rc; } -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) +static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; @@ -571,7 +572,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > - FIELD_SIZEOF(struct sk_buff, cb)); + sizeof_field(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; packet->q_idx = skb_get_queue_mapping(skb); @@ -686,7 +687,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) /* timestamp packet in software */ skb_tx_timestamp(skb); - ret = netvsc_send(net, packet, rndis_msg, pb, skb); + ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx); if (likely(ret == 0)) return NETDEV_TX_OK; @@ -709,6 +710,11 @@ no_memory: goto drop; } +static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + return netvsc_xmit(skb, ndev, false); +} + /* * netvsc_linkstatus_callback - Link up/down notification */ @@ -751,6 +757,22 @@ void netvsc_linkstatus_callback(struct net_device *net, schedule_delayed_work(&ndev_ctx->dwork, 0); } +static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + int rc; + + skb->queue_mapping = skb_get_rx_queue(skb); + __skb_push(skb, ETH_HLEN); + + rc = netvsc_xmit(skb, ndev, true); + + if (dev_xmit_complete(rc)) + return; + + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; +} + static void netvsc_comp_ipcsum(struct sk_buff *skb) { struct iphdr *iph = (struct iphdr *)skb->data; @@ -760,25 +782,46 @@ static void netvsc_comp_ipcsum(struct sk_buff *skb) } static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, - struct netvsc_channel *nvchan) + struct netvsc_channel *nvchan, + struct xdp_buff *xdp) { struct napi_struct *napi = &nvchan->napi; const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan; const struct ndis_tcp_ip_checksum_info *csum_info = nvchan->rsc.csum_info; + const u32 *hash_info = nvchan->rsc.hash_info; struct sk_buff *skb; + void *xbuf = xdp->data_hard_start; int i; - skb = napi_alloc_skb(napi, nvchan->rsc.pktlen); - if (!skb) - return skb; + if (xbuf) { + unsigned int hdroom = xdp->data - xdp->data_hard_start; + unsigned int xlen = xdp->data_end - xdp->data; + unsigned int frag_size = netvsc_xdp_fraglen(hdroom + xlen); - /* - * Copy to skb. This copy is needed here since the memory pointed by - * hv_netvsc_packet cannot be deallocated - */ - for (i = 0; i < nvchan->rsc.cnt; i++) - skb_put_data(skb, nvchan->rsc.data[i], nvchan->rsc.len[i]); + skb = build_skb(xbuf, frag_size); + + if (!skb) { + __free_page(virt_to_page(xbuf)); + return NULL; + } + + skb_reserve(skb, hdroom); + skb_put(skb, xlen); + skb->dev = napi->dev; + } else { + skb = napi_alloc_skb(napi, nvchan->rsc.pktlen); + + if (!skb) + return NULL; + + /* Copy to skb. This copy is needed here since the memory + * pointed by hv_netvsc_packet cannot be deallocated. + */ + for (i = 0; i < nvchan->rsc.cnt; i++) + skb_put_data(skb, nvchan->rsc.data[i], + nvchan->rsc.len[i]); + } skb->protocol = eth_type_trans(skb, net); @@ -795,14 +838,16 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, skb->protocol == htons(ETH_P_IP)) netvsc_comp_ipcsum(skb); - /* Do L4 checksum offload if enabled and present. - */ + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || csum_info->receive.udp_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; } + if (hash_info && (net->features & NETIF_F_RXHASH)) + skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4); + if (vlan) { u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) | (vlan->cfi ? VLAN_CFI_MASK : 0); @@ -826,13 +871,25 @@ int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel = nvchan->channel; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct sk_buff *skb; - struct netvsc_stats *rx_stats; + struct netvsc_stats *rx_stats = &nvchan->rx_stats; + struct xdp_buff xdp; + u32 act; if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; + act = netvsc_run_xdp(net, nvchan, &xdp); + + if (act != XDP_PASS && act != XDP_TX) { + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; + u64_stats_update_end(&rx_stats->syncp); + + return NVSP_STAT_SUCCESS; /* consumed by XDP */ + } + /* Allocate a skb - TODO direct I/O to pages? */ - skb = netvsc_alloc_recv_skb(net, nvchan); + skb = netvsc_alloc_recv_skb(net, nvchan, &xdp); if (unlikely(!skb)) { ++net_device_ctx->eth_stats.rx_no_memory; @@ -846,7 +903,6 @@ int netvsc_recv_callback(struct net_device *net, * on the synthetic device because modifying the VF device * statistics will not work correctly. */ - rx_stats = &nvchan->rx_stats; u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += nvchan->rsc.pktlen; @@ -857,6 +913,11 @@ int netvsc_recv_callback(struct net_device *net, ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); + if (act == XDP_TX) { + netvsc_xdp_xmit(skb, net); + return NVSP_STAT_SUCCESS; + } + napi_gro_receive(&nvchan->napi, skb); return NVSP_STAT_SUCCESS; } @@ -883,10 +944,11 @@ static void netvsc_get_channels(struct net_device *net, /* Alloc struct netvsc_device_info, and initialize it from either existing * struct netvsc_device, or from default values. */ -static struct netvsc_device_info *netvsc_devinfo_get - (struct netvsc_device *nvdev) +static +struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev) { struct netvsc_device_info *dev_info; + struct bpf_prog *prog; dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC); @@ -894,6 +956,8 @@ static struct netvsc_device_info *netvsc_devinfo_get return NULL; if (nvdev) { + ASSERT_RTNL(); + dev_info->num_chn = nvdev->num_chn; dev_info->send_sections = nvdev->send_section_cnt; dev_info->send_section_size = nvdev->send_section_size; @@ -902,6 +966,12 @@ static struct netvsc_device_info *netvsc_devinfo_get memcpy(dev_info->rss_key, nvdev->extension->rss_key, NETVSC_HASH_KEYLEN); + + prog = netvsc_xdp_get(nvdev); + if (prog) { + bpf_prog_inc(prog); + dev_info->bprog = prog; + } } else { dev_info->num_chn = VRSS_CHANNEL_DEFAULT; dev_info->send_sections = NETVSC_DEFAULT_TX; @@ -913,6 +983,17 @@ static struct netvsc_device_info *netvsc_devinfo_get return dev_info; } +/* Free struct netvsc_device_info */ +static void netvsc_devinfo_put(struct netvsc_device_info *dev_info) +{ + if (dev_info->bprog) { + ASSERT_RTNL(); + bpf_prog_put(dev_info->bprog); + } + + kfree(dev_info); +} + static int netvsc_detach(struct net_device *ndev, struct netvsc_device *nvdev) { @@ -924,6 +1005,8 @@ static int netvsc_detach(struct net_device *ndev, if (cancel_work_sync(&nvdev->subchan_work)) nvdev->num_chn = 1; + netvsc_xdp_set(ndev, NULL, NULL, nvdev); + /* If device was up (receiving) then shutdown */ if (netif_running(ndev)) { netvsc_tx_disable(nvdev, ndev); @@ -957,7 +1040,8 @@ static int netvsc_attach(struct net_device *ndev, struct hv_device *hdev = ndev_ctx->device_ctx; struct netvsc_device *nvdev; struct rndis_device *rdev; - int ret; + struct bpf_prog *prog; + int ret = 0; nvdev = rndis_filter_device_add(hdev, dev_info); if (IS_ERR(nvdev)) @@ -973,6 +1057,16 @@ static int netvsc_attach(struct net_device *ndev, } } + prog = dev_info->bprog; + if (prog) { + bpf_prog_inc(prog); + ret = netvsc_xdp_set(ndev, prog, NULL, nvdev); + if (ret) { + bpf_prog_put(prog); + goto err1; + } + } + /* In any case device is now ready */ netif_device_attach(ndev); @@ -982,7 +1076,7 @@ static int netvsc_attach(struct net_device *ndev, if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - return ret; + goto err2; rdev = nvdev->extension; if (!rdev->link_state) @@ -990,6 +1084,14 @@ static int netvsc_attach(struct net_device *ndev, } return 0; + +err2: + netif_device_detach(ndev); + +err1: + rndis_filter_device_remove(hdev, nvdev); + + return ret; } static int netvsc_set_channels(struct net_device *net, @@ -1036,7 +1138,7 @@ static int netvsc_set_channels(struct net_device *net, } out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; } @@ -1143,7 +1245,7 @@ rollback_vf: dev_set_mtu(vf_netdev, orig_mtu); out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; } @@ -1368,8 +1470,8 @@ static const struct { /* statistics per queue (rx/tx packets/bytes) */ #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats)) -/* 4 statistics per queue (rx/tx packets/bytes) */ -#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) +/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5) static int netvsc_get_sset_count(struct net_device *dev, int string_set) { @@ -1401,6 +1503,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct netvsc_ethtool_pcpu_stats *pcpu_sum; unsigned int start; u64 packets, bytes; + u64 xdp_drop; int i, j, cpu; if (!nvdev) @@ -1429,9 +1532,11 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, start = u64_stats_fetch_begin_irq(&qstats->syncp); packets = qstats->packets; bytes = qstats->bytes; + xdp_drop = qstats->xdp_drop; } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_drop; } pcpu_sum = kvmalloc_array(num_possible_cpus(), @@ -1479,6 +1584,8 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_xdp_drop", i); + p += ETH_GSTRING_LEN; } for_each_present_cpu(cpu) { @@ -1652,7 +1759,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - indir[i] = rndis_dev->rx_table[i]; + indir[i] = ndc->rx_table[i]; } if (key) @@ -1682,7 +1789,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, return -EINVAL; for (i = 0; i < ITAB_NUM; i++) - rndis_dev->rx_table[i] = indir[i]; + ndc->rx_table[i] = indir[i]; } if (!key) { @@ -1775,23 +1882,42 @@ static int netvsc_set_ringparam(struct net_device *ndev, } out: - kfree(device_info); + netvsc_devinfo_put(device_info); return ret; } +static netdev_features_t netvsc_fix_features(struct net_device *ndev, + netdev_features_t features) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + + if (!nvdev || nvdev->destroy) + return features; + + if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) { + features ^= NETIF_F_LRO; + netdev_info(ndev, "Skip LRO - unsupported with XDP\n"); + } + + return features; +} + static int netvsc_set_features(struct net_device *ndev, netdev_features_t features) { netdev_features_t change = features ^ ndev->features; struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); struct ndis_offload_params offloads; + int ret = 0; if (!nvdev || nvdev->destroy) return -ENODEV; if (!(change & NETIF_F_LRO)) - return 0; + goto syncvf; memset(&offloads, 0, sizeof(struct ndis_offload_params)); @@ -1803,7 +1929,21 @@ static int netvsc_set_features(struct net_device *ndev, offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED; } - return rndis_filter_set_offload_params(ndev, nvdev, &offloads); + ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads); + + if (ret) { + features ^= NETIF_F_LRO; + ndev->features = features; + } + +syncvf: + if (!vf_netdev) + return ret; + + vf_netdev->wanted_features = features; + netdev_update_features(vf_netdev); + + return ret; } static u32 netvsc_get_msglevel(struct net_device *ndev) @@ -1849,12 +1989,14 @@ static const struct net_device_ops device_ops = { .ndo_start_xmit = netvsc_start_xmit, .ndo_change_rx_flags = netvsc_change_rx_flags, .ndo_set_rx_mode = netvsc_set_rx_mode, + .ndo_fix_features = netvsc_fix_features, .ndo_set_features = netvsc_set_features, .ndo_change_mtu = netvsc_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = netvsc_set_mac_addr, .ndo_select_queue = netvsc_select_queue, .ndo_get_stats64 = netvsc_get_stats64, + .ndo_bpf = netvsc_bpf, }; /* @@ -2141,6 +2283,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + struct bpf_prog *prog; struct net_device *ndev; int ret; @@ -2181,6 +2324,13 @@ static int netvsc_register_vf(struct net_device *vf_netdev) dev_hold(vf_netdev); rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); + + vf_netdev->wanted_features = ndev->features; + netdev_update_features(vf_netdev); + + prog = netvsc_xdp_get(netvsc_dev); + netvsc_vf_setxdp(vf_netdev, prog); + return NOTIFY_OK; } @@ -2222,6 +2372,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + netvsc_vf_setxdp(vf_netdev, NULL); + netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); @@ -2313,12 +2465,10 @@ static int netvsc_probe(struct hv_device *dev, /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | - NETIF_F_HIGHDMA | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - netdev_lockdep_set_classes(net); - /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) @@ -2335,14 +2485,14 @@ static int netvsc_probe(struct hv_device *dev, list_add(&net_device_ctx->list, &netvsc_dev_list); rtnl_unlock(); - kfree(device_info); + netvsc_devinfo_put(device_info); return 0; register_failed: rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: - kfree(device_info); + netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); no_stats: @@ -2370,8 +2520,10 @@ static int netvsc_remove(struct hv_device *dev) rtnl_lock(); nvdev = rtnl_dereference(ndev_ctx->nvdev); - if (nvdev) + if (nvdev) { cancel_work_sync(&nvdev->subchan_work); + netvsc_xdp_set(net, NULL, NULL, nvdev); + } /* * Call to the vsc driver to let it know that the device is being @@ -2396,6 +2548,61 @@ static int netvsc_remove(struct hv_device *dev) return 0; } +static int netvsc_suspend(struct hv_device *dev) +{ + struct net_device_context *ndev_ctx; + struct net_device *vf_netdev, *net; + struct netvsc_device *nvdev; + int ret; + + net = hv_get_drvdata(dev); + + ndev_ctx = netdev_priv(net); + cancel_delayed_work_sync(&ndev_ctx->dwork); + + rtnl_lock(); + + nvdev = rtnl_dereference(ndev_ctx->nvdev); + if (nvdev == NULL) { + ret = -ENODEV; + goto out; + } + + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (vf_netdev) + netvsc_unregister_vf(vf_netdev); + + /* Save the current config info */ + ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); + + ret = netvsc_detach(net, nvdev); +out: + rtnl_unlock(); + + return ret; +} + +static int netvsc_resume(struct hv_device *dev) +{ + struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *net_device_ctx; + struct netvsc_device_info *device_info; + int ret; + + rtnl_lock(); + + net_device_ctx = netdev_priv(net); + device_info = net_device_ctx->saved_netvsc_dev_info; + + ret = netvsc_attach(net, device_info); + + netvsc_devinfo_put(device_info); + net_device_ctx->saved_netvsc_dev_info = NULL; + + rtnl_unlock(); + + return ret; +} static const struct hv_vmbus_device_id id_table[] = { /* Network guid */ { HV_NIC_GUID, }, @@ -2410,6 +2617,8 @@ static struct hv_driver netvsc_drv = { .id_table = id_table, .probe = netvsc_probe, .remove = netvsc_remove, + .suspend = netvsc_suspend, + .resume = netvsc_resume, .driver = { .probe_type = PROBE_FORCE_SYNCHRONOUS, }, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 317dbe9356c2..b81ceba38218 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -235,7 +235,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL); + ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); rcu_read_unlock_bh(); return ret; @@ -358,6 +358,7 @@ static inline void rsc_add_data(struct netvsc_channel *nvchan, const struct ndis_pkt_8021q_info *vlan, const struct ndis_tcp_ip_checksum_info *csum_info, + const u32 *hash_info, void *data, u32 len) { u32 cnt = nvchan->rsc.cnt; @@ -368,6 +369,7 @@ void rsc_add_data(struct netvsc_channel *nvchan, nvchan->rsc.vlan = vlan; nvchan->rsc.csum_info = csum_info; nvchan->rsc.pktlen = len; + nvchan->rsc.hash_info = hash_info; } nvchan->rsc.data[cnt] = data; @@ -385,6 +387,7 @@ static int rndis_filter_receive_data(struct net_device *ndev, const struct ndis_tcp_ip_checksum_info *csum_info; const struct ndis_pkt_8021q_info *vlan; const struct rndis_pktinfo_id *pktinfo_id; + const u32 *hash_info; u32 data_offset; void *data; bool rsc_more = false; @@ -411,6 +414,8 @@ static int rndis_filter_receive_data(struct net_device *ndev, csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0); + hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE, 0); + pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1); data = (void *)msg + data_offset; @@ -441,7 +446,8 @@ static int rndis_filter_receive_data(struct net_device *ndev, * rndis_pkt->data_len tell us the real data length, we only copy * the data packet to the stack, without the rndis trailer padding */ - rsc_add_data(nvchan, vlan, csum_info, data, rndis_pkt->data_len); + rsc_add_data(nvchan, vlan, csum_info, hash_info, + data, rndis_pkt->data_len); if (rsc_more) return NVSP_STAT_SUCCESS; @@ -767,6 +773,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, const u8 *rss_key, u16 flag) { struct net_device *ndev = rdev->ndev; + struct net_device_context *ndc = netdev_priv(ndev); struct rndis_request *request; struct rndis_set_request *set; struct rndis_set_complete *set_complete; @@ -806,7 +813,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) - itab[i] = rdev->rx_table[i]; + itab[i] = ndc->rx_table[i]; /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->hashkey_offset); @@ -1165,6 +1172,9 @@ int rndis_set_subchannel(struct net_device *ndev, wait_event(nvdev->subchan_open, atomic_read(&nvdev->open_chn) == nvdev->num_chn); + for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) + ndev_ctx->tx_table[i] = i % nvdev->num_chn; + /* ignore failures from setting rss parameters, still have channels */ if (dev_info) rndis_filter_set_rss_param(rdev, dev_info->rss_key); @@ -1174,9 +1184,6 @@ int rndis_set_subchannel(struct net_device *ndev, netif_set_real_num_tx_queues(ndev, nvdev->num_chn); netif_set_real_num_rx_queues(ndev, nvdev->num_chn); - for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) - ndev_ctx->tx_table[i] = i % nvdev->num_chn; - return 0; } @@ -1207,6 +1214,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, /* Compute tx offload settings based on hw capabilities */ net->hw_features |= NETIF_F_RXCSUM; + net->hw_features |= NETIF_F_SG; + net->hw_features |= NETIF_F_RXHASH; if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) { /* Can checksum TCP */ @@ -1304,6 +1313,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *ndc = netdev_priv(net); struct netvsc_device *net_device; struct rndis_device *rndis_device; struct ndis_recv_scale_cap rsscap; @@ -1390,9 +1400,11 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, /* We will use the given number of channels if available. */ net_device->num_chn = min(net_device->max_chn, device_info->num_chn); - for (i = 0; i < ITAB_NUM; i++) - rndis_device->rx_table[i] = ethtool_rxfh_indir_default( + if (!netif_is_rxfh_configured(net)) { + for (i = 0; i < ITAB_NUM; i++) + ndc->rx_table[i] = ethtool_rxfh_indir_default( i, net_device->num_chn); + } atomic_set(&net_device->open_chn, 1); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); @@ -1431,8 +1443,6 @@ void rndis_filter_device_remove(struct hv_device *dev, /* Halt and release the rndis device */ rndis_filter_halt_device(net_dev, rndis_dev); - net_dev->extension = NULL; - netvsc_device_remove(dev); } |