diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-28 11:41:37 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-28 11:41:37 -0800 |
commit | 22f837981514e157f8f9737b25ac6d7d90a14006 (patch) | |
tree | 5537a70dcd9225023335b1bd1cd0e9a9c0e95cb9 /drivers/net/virtio_net.c | |
parent | 949db153b6466c6f7cad5a427ecea94985927311 (diff) | |
parent | 6642f91c92da07369cf1e582503ea3ccb4a7f1a9 (diff) | |
download | talos-op-linux-22f837981514e157f8f9737b25ac6d7d90a14006.tar.gz talos-op-linux-22f837981514e157f8f9737b25ac6d7d90a14006.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking updates from David Miller:
"Much more accumulated than I would have liked due to an unexpected
bout with a nasty flu:
1) AH and ESP input don't set ECN field correctly because the
transport head of the SKB isn't set correctly, fix from Li
RongQing.
2) If netfilter conntrack zones are disabled, we can return an
uninitialized variable instead of the proper error code. Fix from
Borislav Petkov.
3) Fix double SKB free in ath9k driver beacon handling, from Felix
Feitkau.
4) Remove bogus assumption about netns cleanup ordering in
nf_conntrack, from Pablo Neira Ayuso.
5) Remove a bogus BUG_ON in the new TCP fastopen code, from Eric
Dumazet. It uses spin_is_locked() in it's test and is therefore
unsuitable for UP.
6) Fix SELINUX labelling regressions added by the tuntap multiqueue
changes, from Paul Moore.
7) Fix CRC errors with jumbo frame receive in tg3 driver, from Nithin
Nayak Sujir.
8) CXGB4 driver sets interrupt coalescing parameters only on first
queue, rather than all of them. Fix from Thadeu Lima de Souza
Cascardo.
9) Fix regression in the dispatch of read/write registers in dm9601
driver, from Tushar Behera.
10) ipv6_append_data miscalculates header length, from Romain KUNTZ.
11) Fix PMTU handling regressions on ipv4 routes, from Steffen
Klassert, Timo Teräs, and Julian Anastasov.
12) In 3c574_cs driver, add necessary parenthesis to "x << y & z"
expression. From Nickolai Zeldovich.
13) macvlan_get_size() causes underallocation netlink message space,
fix from Eric Dumazet.
14) Avoid division by zero in xfrm_replay_advance_bmp(), from Nickolai
Zeldovich. Amusingly the zero check was already there, we were
just performing it after the modulus :-)
15) Some more splice bug fixes from Eric Dumazet, which fix things
mostly eminating from how we now more aggressively use high-order
pages in SKBs.
16) Fix size calculation bug when freeing hash tables in the IPSEC
xfrm code, from Michal Kubecek.
17) Fix PMTU event propagation into socket cached routes, from Steffen
Klassert.
18) Fix off by one in TX buffer release in netxen driver, from Eric
Dumazet.
19) Fix rediculous memory allocation requirements introduced by the
tuntap multiqueue changes, from Jason Wang.
20) Remove bogus AMD platform workaround in r8169 driver that causes
major problems in normal operation, from Timo Teräs.
21) virtio-net set affinity and select queue don't handle
discontiguous cpu numbers properly, fix from Wanlong Gao.
22) Fix a route refcounting issue in loopback driver, from Eric
Dumazet. There's a similar fix coming that we might add to the
macvlan driver as well.
23) Fix SKB leaks in batman-adv's distributed arp table code, from
Matthias Schiffer.
24) r8169 driver gives descriptor ownership back the hardware before
we're done reading the VLAN tag out of it, fix from Francois
Romieu.
25) Checksums not calculated properly in GRE tunnel driver fix from
Pravin B Shelar.
26) Fix SCTP memory leak on namespace exit."
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (101 commits)
dm9601: support dm9620 variant
SCTP: Free the per-net sysctl table on net exit. v2
net: phy: icplus: fix broken INTR pin settings
net: phy: icplus: Use the RGMII interface mode to configure clock delays
IP_GRE: Fix kernel panic in IP_GRE with GRE csum.
sctp: set association state to established in dupcook_a handler
ip6mr: limit IPv6 MRT_TABLE identifiers
r8169: fix vlan tag read ordering.
net: cdc_ncm: use IAD provided by the USB core
batman-adv: filter ARP packets with invalid MAC addresses in DAT
batman-adv: check for more types of invalid IP addresses in DAT
batman-adv: fix skb leak in batadv_dat_snoop_incoming_arp_reply()
net: loopback: fix a dst refcounting issue
virtio-net: reset virtqueue affinity when doing cpu hotplug
virtio-net: split out clean affinity function
virtio-net: fix the set affinity bug when CPU IDs are not consecutive
can: pch_can: fix invalid error codes
can: ti_hecc: fix invalid error codes
can: c_can: fix invalid error codes
r8169: remove the obsolete and incorrect AMD workaround
...
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 118 |
1 files changed, 98 insertions, 20 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a6fcf15adc4f..35c00c5ea02a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -26,6 +26,7 @@ #include <linux/scatterlist.h> #include <linux/if_vlan.h> #include <linux/slab.h> +#include <linux/cpu.h> static int napi_weight = 128; module_param(napi_weight, int, 0444); @@ -123,6 +124,12 @@ struct virtnet_info { /* Does the affinity hint is set for virtqueues? */ bool affinity_hint_set; + + /* Per-cpu variable to show the mapping from CPU to virtqueue */ + int __percpu *vq_index; + + /* CPU hot plug notifier */ + struct notifier_block nb; }; struct skb_vnet_hdr { @@ -1013,32 +1020,75 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) return 0; } -static void virtnet_set_affinity(struct virtnet_info *vi, bool set) +static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) { int i; + int cpu; + + if (vi->affinity_hint_set) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtqueue_set_affinity(vi->rq[i].vq, -1); + virtqueue_set_affinity(vi->sq[i].vq, -1); + } + + vi->affinity_hint_set = false; + } + + i = 0; + for_each_online_cpu(cpu) { + if (cpu == hcpu) { + *per_cpu_ptr(vi->vq_index, cpu) = -1; + } else { + *per_cpu_ptr(vi->vq_index, cpu) = + ++i % vi->curr_queue_pairs; + } + } +} + +static void virtnet_set_affinity(struct virtnet_info *vi) +{ + int i; + int cpu; /* In multiqueue mode, when the number of cpu is equal to the number of * queue pairs, we let the queue pairs to be private to one cpu by * setting the affinity hint to eliminate the contention. */ - if ((vi->curr_queue_pairs == 1 || - vi->max_queue_pairs != num_online_cpus()) && set) { - if (vi->affinity_hint_set) - set = false; - else - return; + if (vi->curr_queue_pairs == 1 || + vi->max_queue_pairs != num_online_cpus()) { + virtnet_clean_affinity(vi, -1); + return; } - for (i = 0; i < vi->max_queue_pairs; i++) { - int cpu = set ? i : -1; + i = 0; + for_each_online_cpu(cpu) { virtqueue_set_affinity(vi->rq[i].vq, cpu); virtqueue_set_affinity(vi->sq[i].vq, cpu); + *per_cpu_ptr(vi->vq_index, cpu) = i; + i++; } - if (set) - vi->affinity_hint_set = true; - else - vi->affinity_hint_set = false; + vi->affinity_hint_set = true; +} + +static int virtnet_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + + switch(action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + case CPU_DEAD: + virtnet_set_affinity(vi); + break; + case CPU_DOWN_PREPARE: + virtnet_clean_affinity(vi, (long)hcpu); + break; + default: + break; + } + return NOTIFY_OK; } static void virtnet_get_ringparam(struct net_device *dev, @@ -1082,13 +1132,15 @@ static int virtnet_set_channels(struct net_device *dev, if (queue_pairs > vi->max_queue_pairs) return -EINVAL; + get_online_cpus(); err = virtnet_set_queues(vi, queue_pairs); if (!err) { netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); - virtnet_set_affinity(vi, true); + virtnet_set_affinity(vi); } + put_online_cpus(); return err; } @@ -1127,12 +1179,19 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) /* To avoid contending a lock hold by a vcpu who would exit to host, select the * txq based on the processor id. - * TODO: handle cpu hotplug. */ static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) { - int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : - smp_processor_id(); + int txq; + struct virtnet_info *vi = netdev_priv(dev); + + if (skb_rx_queue_recorded(skb)) { + txq = skb_get_rx_queue(skb); + } else { + txq = *__this_cpu_ptr(vi->vq_index); + if (txq == -1) + txq = 0; + } while (unlikely(txq >= dev->real_num_tx_queues)) txq -= dev->real_num_tx_queues; @@ -1248,7 +1307,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi) { struct virtio_device *vdev = vi->vdev; - virtnet_set_affinity(vi, false); + virtnet_clean_affinity(vi, -1); vdev->config->del_vqs(vdev); @@ -1371,7 +1430,10 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; - virtnet_set_affinity(vi, true); + get_online_cpus(); + virtnet_set_affinity(vi); + put_online_cpus(); + return 0; err_free: @@ -1453,6 +1515,10 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->stats == NULL) goto free; + vi->vq_index = alloc_percpu(int); + if (vi->vq_index == NULL) + goto free_stats; + mutex_init(&vi->config_lock); vi->config_enable = true; INIT_WORK(&vi->config_work, virtnet_config_changed_work); @@ -1476,7 +1542,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) - goto free_stats; + goto free_index; netif_set_real_num_tx_queues(dev, 1); netif_set_real_num_rx_queues(dev, 1); @@ -1499,6 +1565,13 @@ static int virtnet_probe(struct virtio_device *vdev) } } + vi->nb.notifier_call = &virtnet_cpu_callback; + err = register_hotcpu_notifier(&vi->nb); + if (err) { + pr_debug("virtio_net: registering cpu notifier failed\n"); + goto free_recv_bufs; + } + /* Assume link up if device can't report link status, otherwise get link status from config. */ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { @@ -1520,6 +1593,8 @@ free_recv_bufs: free_vqs: cancel_delayed_work_sync(&vi->refill); virtnet_del_vqs(vi); +free_index: + free_percpu(vi->vq_index); free_stats: free_percpu(vi->stats); free: @@ -1543,6 +1618,8 @@ static void virtnet_remove(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; + unregister_hotcpu_notifier(&vi->nb); + /* Prevent config work handler from accessing the device. */ mutex_lock(&vi->config_lock); vi->config_enable = false; @@ -1554,6 +1631,7 @@ static void virtnet_remove(struct virtio_device *vdev) flush_work(&vi->config_work); + free_percpu(vi->vq_index); free_percpu(vi->stats); free_netdev(vi->dev); } |