diff options
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r-- | drivers/net/hyperv/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 33 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc.c | 55 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 266 | ||||
-rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 61 |
5 files changed, 268 insertions, 148 deletions
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index 23a2d145813a..0765d5f61714 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -2,6 +2,5 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV select UCS2_STRING - select FAILOVER help Select this option to enable the Hyper-V virtual network driver. diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 99d8e7398a5b..4b6e308199d2 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -189,7 +189,6 @@ struct netvsc_device; struct net_device_context; extern u32 netvsc_ring_bytes; -extern struct reciprocal_value netvsc_ring_reciprocal; struct netvsc_device *netvsc_device_add(struct hv_device *device, const struct netvsc_device_info *info); @@ -211,7 +210,7 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); -void rndis_set_subchannel(struct work_struct *w); +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, @@ -902,6 +901,8 @@ struct net_device_context { struct hv_device *device_ctx; /* netvsc_device */ struct netvsc_device __rcu *nvdev; + /* list of netvsc net_devices */ + struct list_head list; /* reconfigure work */ struct delayed_work dwork; /* last reconfig time */ @@ -932,8 +933,6 @@ struct net_device_context { u32 vf_alloc; /* Serial number of the VF to team with */ u32 vf_serial; - - struct failover *failover; }; /* Per channel data */ @@ -1278,17 +1277,17 @@ struct ndis_lsov2_offload { struct ndis_ipsecv2_offload { u32 encap; - u16 ip6; - u16 ip4opt; - u16 ip6ext; - u16 ah; - u16 esp; - u16 ah_esp; - u16 xport; - u16 tun; - u16 xport_tun; - u16 lso; - u16 extseq; + u8 ip6; + u8 ip4opt; + u8 ip6ext; + u8 ah; + u8 esp; + u8 ah_esp; + u8 xport; + u8 tun; + u8 xport_tun; + u8 lso; + u8 extseq; u32 udp_esp; u32 auth; u32 crypto; @@ -1296,8 +1295,8 @@ struct ndis_ipsecv2_offload { }; struct ndis_rsc_offload { - u16 ip4; - u16 ip6; + u8 ip4; + u8 ip6; }; struct ndis_encap_offload { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d2ee66c259a7..8e9d0ee1572b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -31,7 +31,6 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/prefetch.h> -#include <linux/reciprocal_div.h> #include <asm/sync_bitops.h> @@ -66,6 +65,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) VM_PKT_DATA_INBAND, 0); } +/* Worker to setup sub channels on initial setup + * Initial hotplug event occurs in softirq context + * and can't wait for channels. + */ +static void netvsc_subchan_work(struct work_struct *w) +{ + struct netvsc_device *nvdev = + container_of(w, struct netvsc_device, subchan_work); + struct rndis_device *rdev; + int i, ret; + + /* Avoid deadlock with device removal already under RTNL */ + if (!rtnl_trylock()) { + schedule_work(w); + return; + } + + rdev = nvdev->extension; + if (rdev) { + ret = rndis_set_subchannel(rdev->ndev, nvdev); + if (ret == 0) { + netif_device_attach(rdev->ndev); + } else { + /* fallback to only primary channel */ + for (i = 1; i < nvdev->num_chn; i++) + netif_napi_del(&nvdev->chan_table[i].napi); + + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + + rtnl_unlock(); +} + static struct netvsc_device *alloc_net_device(void) { struct netvsc_device *net_device; @@ -82,7 +116,7 @@ static struct netvsc_device *alloc_net_device(void) init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); - INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); + INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); return net_device; } @@ -635,17 +669,6 @@ void netvsc_device_remove(struct hv_device *device) #define RING_AVAIL_PERCENT_HIWATER 20 #define RING_AVAIL_PERCENT_LOWATER 10 -/* - * Get the percentage of available bytes to write in the ring. - * The return value is in range from 0 to 100. - */ -static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info *ring_info) -{ - u32 avail_write = hv_get_bytes_to_write(ring_info); - - return reciprocal_divide(avail_write * 100, netvsc_ring_reciprocal); -} - static inline void netvsc_free_send_slot(struct netvsc_device *net_device, u32 index) { @@ -694,8 +717,8 @@ static void netvsc_send_tx_complete(struct net_device *ndev, struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); if (netif_tx_queue_stopped(txq) && - (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || - queue_sends < 1)) { + (hv_get_avail_to_write_percent(&channel->outbound) > + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { netif_tx_wake_queue(txq); ndev_ctx->eth_stats.wake_queue++; } @@ -802,7 +825,7 @@ static inline int netvsc_send_pkt( struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; - u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); + u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound); nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; if (skb) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bef4d55a108c..dd1d6e115145 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -35,7 +35,6 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/netpoll.h> -#include <linux/reciprocal_div.h> #include <net/arp.h> #include <net/route.h> @@ -43,7 +42,6 @@ #include <net/pkt_sched.h> #include <net/checksum.h> #include <net/ip6_checksum.h> -#include <net/failover.h> #include "hyperv_net.h" @@ -59,7 +57,6 @@ static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); unsigned int netvsc_ring_bytes __ro_after_init; -struct reciprocal_value netvsc_ring_reciprocal __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | @@ -70,6 +67,8 @@ static int debug = -1; module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static LIST_HEAD(netvsc_dev_list); + static void netvsc_change_rx_flags(struct net_device *net, int change) { struct net_device_context *ndev_ctx = netdev_priv(net); @@ -127,8 +126,10 @@ static int netvsc_open(struct net_device *net) } rdev = nvdev->extension; - if (!rdev->link_state) + if (!rdev->link_state) { netif_carrier_on(net); + netif_tx_wake_all_queues(net); + } if (vf_netdev) { /* Setting synthetic device up transparently sets @@ -904,8 +905,20 @@ static int netvsc_attach(struct net_device *ndev, if (IS_ERR(nvdev)) return PTR_ERR(nvdev); - /* Note: enable and attach happen when sub-channels setup */ + if (nvdev->num_chn > 1) { + ret = rndis_set_subchannel(ndev, nvdev); + + /* if unavailable, just proceed with one queue */ + if (ret) { + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + /* In any case device is now ready */ + netif_device_attach(ndev); + + /* Note: enable and attach happen when sub-channels setup */ netif_carrier_off(ndev); if (netif_running(ndev)) { @@ -1780,6 +1793,36 @@ out_unlock: rtnl_unlock(); } +static struct net_device *get_netvsc_bymac(const u8 *mac) +{ + struct net_device_context *ndev_ctx; + + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx); + + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; + } + + return NULL; +} + +static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) +{ + struct net_device_context *net_device_ctx; + struct net_device *dev; + + dev = netdev_master_upper_dev_get(vf_netdev); + if (!dev || dev->netdev_ops != &device_ops) + return NULL; /* not a netvsc device */ + + net_device_ctx = netdev_priv(dev); + if (!rtnl_dereference(net_device_ctx->nvdev)) + return NULL; /* device is removed */ + + return dev; +} + /* Called when VF is injecting data into network stack. * Change the associated network device from VF to netvsc. * note: already called with rcu_read_lock @@ -1802,6 +1845,46 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) return RX_HANDLER_ANOTHER; } +static int netvsc_vf_join(struct net_device *vf_netdev, + struct net_device *ndev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + int ret; + + ret = netdev_rx_handler_register(vf_netdev, + netvsc_vf_handle_frame, ndev); + if (ret != 0) { + netdev_err(vf_netdev, + "can not register netvsc VF receive handler (err = %d)\n", + ret); + goto rx_handler_failed; + } + + ret = netdev_master_upper_dev_link(vf_netdev, ndev, + NULL, NULL, NULL); + if (ret != 0) { + netdev_err(vf_netdev, + "can not set master device %s (err = %d)\n", + ndev->name, ret); + goto upper_link_failed; + } + + /* set slave flag before open to prevent IPv6 addrconf */ + vf_netdev->flags |= IFF_SLAVE; + + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + + call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); + + netdev_info(vf_netdev, "joined to %s\n", ndev->name); + return 0; + +upper_link_failed: + netdev_rx_handler_unregister(vf_netdev); +rx_handler_failed: + return ret; +} + static void __netvsc_vf_setup(struct net_device *ndev, struct net_device *vf_netdev) { @@ -1852,95 +1935,104 @@ static void netvsc_vf_setup(struct work_struct *w) rtnl_unlock(); } -static int netvsc_pre_register_vf(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_register_vf(struct net_device *vf_netdev) { + struct net_device *ndev; struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + int ret; + + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + + /* + * We will use the MAC address to locate the synthetic interface to + * associate with the VF interface. If we don't find a matching + * synthetic interface, move on. + */ + ndev = get_netvsc_bymac(vf_netdev->perm_addr); + if (!ndev) + return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) - return -ENODEV; + return NOTIFY_DONE; - return 0; -} - -static int netvsc_register_vf(struct net_device *vf_netdev, - struct net_device *ndev) -{ - struct net_device_context *ndev_ctx = netdev_priv(ndev); - - /* set slave flag before open to prevent IPv6 addrconf */ - vf_netdev->flags |= IFF_SLAVE; + /* if syntihetic interface is a different namespace, + * then move the VF to that namespace; join will be + * done again in that context. + */ + if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) { + ret = dev_change_net_namespace(vf_netdev, + dev_net(ndev), "eth%d"); + if (ret) + netdev_err(vf_netdev, + "could not move to same namespace as %s: %d\n", + ndev->name, ret); + else + netdev_info(vf_netdev, + "VF moved to namespace with: %s\n", + ndev->name); + return NOTIFY_DONE; + } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); - - netdev_info(vf_netdev, "joined to %s\n", ndev->name); + if (netvsc_vf_join(vf_netdev, ndev) != 0) + return NOTIFY_DONE; dev_hold(vf_netdev); - rcu_assign_pointer(ndev_ctx->vf_netdev, vf_netdev); - - return 0; + rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); + return NOTIFY_OK; } /* VF up/down change detected, schedule to change data path */ -static int netvsc_vf_changed(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_vf_changed(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; + struct net_device *ndev; bool vf_is_up = netif_running(vf_netdev); + ndev = get_netvsc_byref(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + net_device_ctx = netdev_priv(ndev); netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); if (!netvsc_dev) - return -ENODEV; + return NOTIFY_DONE; netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); - return 0; + return NOTIFY_OK; } -static int netvsc_pre_unregister_vf(struct net_device *vf_netdev, - struct net_device *ndev) +static int netvsc_unregister_vf(struct net_device *vf_netdev) { + struct net_device *ndev; struct net_device_context *net_device_ctx; - net_device_ctx = netdev_priv(ndev); - cancel_delayed_work_sync(&net_device_ctx->vf_takeover); - - return 0; -} - -static int netvsc_unregister_vf(struct net_device *vf_netdev, - struct net_device *ndev) -{ - struct net_device_context *net_device_ctx; + ndev = get_netvsc_byref(vf_netdev); + if (!ndev) + return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); + cancel_delayed_work_sync(&net_device_ctx->vf_takeover); netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + netdev_rx_handler_unregister(vf_netdev); + netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); - return 0; + return NOTIFY_OK; } -static struct failover_ops netvsc_failover_ops = { - .slave_pre_register = netvsc_pre_register_vf, - .slave_register = netvsc_register_vf, - .slave_pre_unregister = netvsc_pre_unregister_vf, - .slave_unregister = netvsc_unregister_vf, - .slave_link_change = netvsc_vf_changed, - .slave_handle_frame = netvsc_vf_handle_frame, -}; - static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { @@ -2009,6 +2101,9 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + if (nvdev->num_chn > 1) + schedule_work(&nvdev->subchan_work); + /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | NETIF_F_HIGHDMA | NETIF_F_SG | @@ -2024,23 +2119,19 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; - ret = register_netdev(net); + rtnl_lock(); + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); goto register_failed; } - net_device_ctx->failover = failover_register(net, &netvsc_failover_ops); - if (IS_ERR(net_device_ctx->failover)) { - ret = PTR_ERR(net_device_ctx->failover); - goto err_failover; - } - - return ret; + list_add(&net_device_ctx->list, &netvsc_dev_list); + rtnl_unlock(); + return 0; -err_failover: - unregister_netdev(net); register_failed: + rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: free_percpu(net_device_ctx->vf_stats); @@ -2080,14 +2171,13 @@ static int netvsc_remove(struct hv_device *dev) rtnl_lock(); vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); if (vf_netdev) - failover_slave_unregister(vf_netdev); + netvsc_unregister_vf(vf_netdev); if (nvdev) rndis_filter_device_remove(dev, nvdev); unregister_netdevice(net); - - failover_unregister(ndev_ctx->failover); + list_del(&ndev_ctx->list); rtnl_unlock(); rcu_read_unlock(); @@ -2115,8 +2205,54 @@ static struct hv_driver netvsc_drv = { .remove = netvsc_remove, }; +/* + * On Hyper-V, every VF interface is matched with a corresponding + * synthetic interface. The synthetic interface is presented first + * to the guest. When the corresponding VF instance is registered, + * we will take care of switching the data path. + */ +static int netvsc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + + /* Skip our own events */ + if (event_dev->netdev_ops == &device_ops) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_dev)) + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + return netvsc_register_vf(event_dev); + case NETDEV_UNREGISTER: + return netvsc_unregister_vf(event_dev); + case NETDEV_UP: + case NETDEV_DOWN: + return netvsc_vf_changed(event_dev); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block netvsc_netdev_notifier = { + .notifier_call = netvsc_netdev_event, +}; + static void __exit netvsc_drv_exit(void) { + unregister_netdevice_notifier(&netvsc_netdev_notifier); vmbus_driver_unregister(&netvsc_drv); } @@ -2130,12 +2266,12 @@ static int __init netvsc_drv_init(void) ring_size); } netvsc_ring_bytes = ring_size * PAGE_SIZE; - netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes); ret = vmbus_driver_register(&netvsc_drv); if (ret) return ret; + register_netdevice_notifier(&netvsc_netdev_notifier); return 0; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 5428bb261102..9b4e3c3787e5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1062,29 +1062,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) * This breaks overlap of processing the host message for the * new primary channel with the initialization of sub-channels. */ -void rndis_set_subchannel(struct work_struct *w) +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) { - struct netvsc_device *nvdev - = container_of(w, struct netvsc_device, subchan_work); struct nvsp_message *init_packet = &nvdev->channel_init_pkt; - struct net_device_context *ndev_ctx; - struct rndis_device *rdev; - struct net_device *ndev; - struct hv_device *hv_dev; + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hv_dev = ndev_ctx->device_ctx; + struct rndis_device *rdev = nvdev->extension; int i, ret; - if (!rtnl_trylock()) { - schedule_work(w); - return; - } - - rdev = nvdev->extension; - if (!rdev) - goto unlock; /* device was removed */ - - ndev = rdev->ndev; - ndev_ctx = netdev_priv(ndev); - hv_dev = ndev_ctx->device_ctx; + ASSERT_RTNL(); memset(init_packet, 0, sizeof(struct nvsp_message)); init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; @@ -1100,13 +1086,13 @@ void rndis_set_subchannel(struct work_struct *w) VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) { netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); - goto failed; + return ret; } wait_for_completion(&nvdev->channel_init_wait); if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { netdev_err(ndev, "sub channel request failed\n"); - goto failed; + return -EIO; } nvdev->num_chn = 1 + @@ -1125,21 +1111,7 @@ void rndis_set_subchannel(struct work_struct *w) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ndev_ctx->tx_table[i] = i % nvdev->num_chn; - netif_device_attach(ndev); - rtnl_unlock(); - return; - -failed: - /* fallback to only primary channel */ - for (i = 1; i < nvdev->num_chn; i++) - netif_napi_del(&nvdev->chan_table[i].napi); - - nvdev->max_chn = 1; - nvdev->num_chn = 1; - - netif_device_attach(ndev); -unlock: - rtnl_unlock(); + return 0; } static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, @@ -1360,21 +1332,12 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, netif_napi_add(net, &net_device->chan_table[i].napi, netvsc_poll, NAPI_POLL_WEIGHT); - if (net_device->num_chn > 1) - schedule_work(&net_device->subchan_work); + return net_device; out: - /* if unavailable, just proceed with one queue */ - if (ret) { - net_device->max_chn = 1; - net_device->num_chn = 1; - } - - /* No sub channels, device is ready */ - if (net_device->num_chn == 1) - netif_device_attach(net); - - return net_device; + /* setting up multiple channels failed */ + net_device->max_chn = 1; + net_device->num_chn = 1; err_dev_remv: rndis_filter_device_remove(dev, net_device); |