From f7b50c4e7ced702d80d3b873d81a2cdafb580f13 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 23 Jun 2014 10:50:17 +0100 Subject: xen-netback: bookkeep number of active queues in our own module The original code uses netdev->real_num_tx_queues to bookkeep number of queues and invokes netif_set_real_num_tx_queues to set the number of queues. However, netif_set_real_num_tx_queues doesn't allow real_num_tx_queues to be smaller than 1, which means setting the number to 0 will not work and real_num_tx_queues is untouched. This is bogus when xenvif_free is invoked before any number of queues is allocated. That function needs to iterate through all queues to free resources. Using the wrong number of queues results in NULL pointer dereference. So we bookkeep the number of queues in xen-netback to solve this problem. This fixes a regression introduced by multiqueue patchset in 3.16-rc1. There's another bug in original code that the real number of RX queues is never set. In current Xen multiqueue design, the number of TX queues and RX queues are in fact the same. We need to set the numbers of TX and RX queues to the same value. Also remove xenvif_select_queue and leave queue selection to core driver, as suggested by David Miller. Reported-by: Boris Ostrovsky Signed-off-by: Wei Liu CC: Ian Campbell CC: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/xen-netback/common.h') diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 4dd7c4a1923b..2532ce85d718 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -222,6 +222,7 @@ struct xenvif { /* Queues */ struct xenvif_queue *queues; + unsigned int num_queues; /* active queues, resource allocated */ /* Miscellaneous private stuff. */ struct net_device *dev; -- cgit v1.2.3 From f51de24356e49e4dcb5095e87717065580912120 Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Tue, 8 Jul 2014 19:49:14 +0100 Subject: xen-netback: Adding debugfs "io_ring_qX" files This patch adds debugfs capabilities to netback. There used to be a similar patch floating around for classic kernel, but it used procfs. It is based on a very similar blkback patch. It creates xen-netback/[vifname]/io_ring_q[queueno] files, reading them output various ring variables etc. Writing "kick" into it imitates an interrupt happened, it can be useful to check whether the ring is just stalled due to a missed interrupt. Signed-off-by: Zoltan Kiss Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 11 +++ drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netback/netback.c | 11 +++ drivers/net/xen-netback/xenbus.c | 178 +++++++++++++++++++++++++++++++++++- 4 files changed, 200 insertions(+), 2 deletions(-) (limited to 'drivers/net/xen-netback/common.h') diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 2532ce85d718..28c98229e95f 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -44,6 +44,7 @@ #include #include #include +#include typedef unsigned int pending_ring_idx_t; #define INVALID_PENDING_RING_IDX (~0U) @@ -224,6 +225,10 @@ struct xenvif { struct xenvif_queue *queues; unsigned int num_queues; /* active queues, resource allocated */ +#ifdef CONFIG_DEBUG_FS + struct dentry *xenvif_dbg_root; +#endif + /* Miscellaneous private stuff. */ struct net_device *dev; }; @@ -297,10 +302,16 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) /* Callback from stack when TX packet can be released */ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); +irqreturn_t xenvif_interrupt(int irq, void *dev_id); + extern bool separate_tx_rx_irq; extern unsigned int rx_drain_timeout_msecs; extern unsigned int rx_drain_timeout_jiffies; extern unsigned int xenvif_max_queues; +#ifdef CONFIG_DEBUG_FS +extern struct dentry *xen_netback_dbg_root; +#endif + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 9e97c7ca0ddd..ef75b45e5085 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -102,7 +102,7 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t xenvif_interrupt(int irq, void *dev_id) +irqreturn_t xenvif_interrupt(int irq, void *dev_id) { xenvif_tx_interrupt(irq, dev_id); xenvif_rx_interrupt(irq, dev_id); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1844a47636b6..77127ca08ca4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1987,6 +1987,13 @@ static int __init netback_init(void) rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); +#ifdef CONFIG_DEBUG_FS + xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); + if (IS_ERR_OR_NULL(xen_netback_dbg_root)) + pr_warn("Init of debugfs returned %ld!\n", + PTR_ERR(xen_netback_dbg_root)); +#endif /* CONFIG_DEBUG_FS */ + return 0; failed_init: @@ -1997,6 +2004,10 @@ module_init(netback_init); static void __exit netback_fini(void) { +#ifdef CONFIG_DEBUG_FS + if (!IS_ERR_OR_NULL(xen_netback_dbg_root)) + debugfs_remove_recursive(xen_netback_dbg_root); +#endif /* CONFIG_DEBUG_FS */ xenvif_xenbus_fini(); } module_exit(netback_fini); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 3d85acd84bad..580517d857bf 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -44,6 +44,175 @@ static void unregister_hotplug_status_watch(struct backend_info *be); static void set_backend_state(struct backend_info *be, enum xenbus_state state); +#ifdef CONFIG_DEBUG_FS +struct dentry *xen_netback_dbg_root = NULL; + +static int xenvif_read_io_ring(struct seq_file *m, void *v) +{ + struct xenvif_queue *queue = m->private; + struct xen_netif_tx_back_ring *tx_ring = &queue->tx; + struct xen_netif_rx_back_ring *rx_ring = &queue->rx; + + if (tx_ring->sring) { + struct xen_netif_tx_sring *sring = tx_ring->sring; + + seq_printf(m, "Queue %d\nTX: nr_ents %u\n", queue->id, + tx_ring->nr_ents); + seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n", + sring->req_prod, + sring->req_prod - sring->rsp_prod, + tx_ring->req_cons, + tx_ring->req_cons - sring->rsp_prod, + sring->req_event, + sring->req_event - sring->rsp_prod); + seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n", + sring->rsp_prod, + tx_ring->rsp_prod_pvt, + tx_ring->rsp_prod_pvt - sring->rsp_prod, + sring->rsp_event, + sring->rsp_event - sring->rsp_prod); + seq_printf(m, "pending prod %u pending cons %u nr_pending_reqs %u\n", + queue->pending_prod, + queue->pending_cons, + nr_pending_reqs(queue)); + seq_printf(m, "dealloc prod %u dealloc cons %u dealloc_queue %u\n\n", + queue->dealloc_prod, + queue->dealloc_cons, + queue->dealloc_prod - queue->dealloc_cons); + } + + if (rx_ring->sring) { + struct xen_netif_rx_sring *sring = rx_ring->sring; + + seq_printf(m, "RX: nr_ents %u\n", rx_ring->nr_ents); + seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n", + sring->req_prod, + sring->req_prod - sring->rsp_prod, + rx_ring->req_cons, + rx_ring->req_cons - sring->rsp_prod, + sring->req_event, + sring->req_event - sring->rsp_prod); + seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n\n", + sring->rsp_prod, + rx_ring->rsp_prod_pvt, + rx_ring->rsp_prod_pvt - sring->rsp_prod, + sring->rsp_event, + sring->rsp_event - sring->rsp_prod); + } + + seq_printf(m, "NAPI state: %lx NAPI weight: %d TX queue len %u\n" + "Credit timer_pending: %d, credit: %lu, usec: %lu\n" + "remaining: %lu, expires: %lu, now: %lu\n", + queue->napi.state, queue->napi.weight, + skb_queue_len(&queue->tx_queue), + timer_pending(&queue->credit_timeout), + queue->credit_bytes, + queue->credit_usec, + queue->remaining_credit, + queue->credit_timeout.expires, + jiffies); + + return 0; +} + +#define XENVIF_KICK_STR "kick" + +static ssize_t +xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, + loff_t *ppos) +{ + struct xenvif_queue *queue = + ((struct seq_file *)filp->private_data)->private; + int len; + char write[sizeof(XENVIF_KICK_STR)]; + + /* don't allow partial writes and check the length */ + if (*ppos != 0) + return 0; + if (count < sizeof(XENVIF_KICK_STR) - 1) + return -ENOSPC; + + len = simple_write_to_buffer(write, + sizeof(write), + ppos, + buf, + count); + if (len < 0) + return len; + + if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1)) + xenvif_interrupt(0, (void *)queue); + else { + pr_warn("Unknown command to io_ring_q%d. Available: kick\n", + queue->id); + count = -EINVAL; + } + return count; +} + +static int xenvif_dump_open(struct inode *inode, struct file *filp) +{ + int ret; + void *queue = NULL; + + if (inode->i_private) + queue = inode->i_private; + ret = single_open(filp, xenvif_read_io_ring, queue); + filp->f_mode |= FMODE_PWRITE; + return ret; +} + +static const struct file_operations xenvif_dbg_io_ring_ops_fops = { + .owner = THIS_MODULE, + .open = xenvif_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = xenvif_write_io_ring, +}; + +static void xenvif_debugfs_addif(struct xenvif_queue *queue) +{ + struct dentry *pfile; + struct xenvif *vif = queue->vif; + int i; + + if (IS_ERR_OR_NULL(xen_netback_dbg_root)) + return; + + vif->xenvif_dbg_root = debugfs_create_dir(vif->dev->name, + xen_netback_dbg_root); + if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) { + for (i = 0; i < vif->num_queues; ++i) { + char filename[sizeof("io_ring_q") + 4]; + + snprintf(filename, sizeof(filename), "io_ring_q%d", i); + pfile = debugfs_create_file(filename, + S_IRUSR | S_IWUSR, + vif->xenvif_dbg_root, + &vif->queues[i], + &xenvif_dbg_io_ring_ops_fops); + if (IS_ERR_OR_NULL(pfile)) + pr_warn("Creation of io_ring file returned %ld!\n", + PTR_ERR(pfile)); + } + } else + netdev_warn(vif->dev, + "Creation of vif debugfs dir returned %ld!\n", + PTR_ERR(vif->xenvif_dbg_root)); +} + +static void xenvif_debugfs_delif(struct xenvif *vif) +{ + if (IS_ERR_OR_NULL(xen_netback_dbg_root)) + return; + + if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) + debugfs_remove_recursive(vif->xenvif_dbg_root); + vif->xenvif_dbg_root = NULL; +} +#endif /* CONFIG_DEBUG_FS */ + static int netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -246,8 +415,12 @@ static void backend_create_xenvif(struct backend_info *be) static void backend_disconnect(struct backend_info *be) { - if (be->vif) + if (be->vif) { +#ifdef CONFIG_DEBUG_FS + xenvif_debugfs_delif(be->vif); +#endif /* CONFIG_DEBUG_FS */ xenvif_disconnect(be->vif); + } } static void backend_connect(struct backend_info *be) @@ -560,6 +733,9 @@ static void connect(struct backend_info *be) be->vif->num_queues = queue_index; goto err; } +#ifdef CONFIG_DEBUG_FS + xenvif_debugfs_addif(queue); +#endif /* CONFIG_DEBUG_FS */ } /* Initialisation completed, tell core driver the number of -- cgit v1.2.3 From 3d1af1df9762e56e563e8fd088a1b4ce2bcfaf8b Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Mon, 4 Aug 2014 16:20:57 +0100 Subject: xen-netback: Using a new state bit instead of carrier This patch introduces a new state bit VIF_STATUS_CONNECTED to track whether the vif is in a connected state. Using carrier will not work with the next patch in this series, which aims to turn the carrier temporarily off if the guest doesn't seem to be able to receive packets. Signed-off-by: Zoltan Kiss Signed-off-by: David Vrabel Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org v2: - rename the bitshift type to "enum state_bit_shift" here, not in the next patch Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 6 ++++++ drivers/net/xen-netback/interface.c | 19 +++++++++++-------- drivers/net/xen-netback/netback.c | 2 +- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers/net/xen-netback/common.h') diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 28c98229e95f..4a92fc19f410 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -198,6 +198,11 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct xenvif_stats stats; }; +enum state_bit_shift { + /* This bit marks that the vif is connected */ + VIF_STATUS_CONNECTED +}; + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -220,6 +225,7 @@ struct xenvif { * frontend is rogue. */ bool disabled; + unsigned long status; /* Queues */ struct xenvif_queue *queues; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index bd59d9dbf27b..fbdadb3d8220 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -55,7 +55,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue) int xenvif_schedulable(struct xenvif *vif) { - return netif_running(vif->dev) && netif_carrier_ok(vif->dev); + return netif_running(vif->dev) && + test_bit(VIF_STATUS_CONNECTED, &vif->status); } static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) @@ -267,7 +268,7 @@ static void xenvif_down(struct xenvif *vif) static int xenvif_open(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); - if (netif_carrier_ok(dev)) + if (test_bit(VIF_STATUS_CONNECTED, &vif->status)) xenvif_up(vif); netif_tx_start_all_queues(dev); return 0; @@ -276,7 +277,7 @@ static int xenvif_open(struct net_device *dev) static int xenvif_close(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); - if (netif_carrier_ok(dev)) + if (test_bit(VIF_STATUS_CONNECTED, &vif->status)) xenvif_down(vif); netif_tx_stop_all_queues(dev); return 0; @@ -528,6 +529,7 @@ void xenvif_carrier_on(struct xenvif *vif) if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) dev_set_mtu(vif->dev, ETH_DATA_LEN); netdev_update_features(vif->dev); + set_bit(VIF_STATUS_CONNECTED, &vif->status); netif_carrier_on(vif->dev); if (netif_running(vif->dev)) xenvif_up(vif); @@ -625,9 +627,11 @@ void xenvif_carrier_off(struct xenvif *vif) struct net_device *dev = vif->dev; rtnl_lock(); - netif_carrier_off(dev); /* discard queued packets */ - if (netif_running(dev)) - xenvif_down(vif); + if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) { + netif_carrier_off(dev); /* discard queued packets */ + if (netif_running(dev)) + xenvif_down(vif); + } rtnl_unlock(); } @@ -656,8 +660,7 @@ void xenvif_disconnect(struct xenvif *vif) unsigned int num_queues = vif->num_queues; unsigned int queue_index; - if (netif_carrier_ok(vif->dev)) - xenvif_carrier_off(vif); + xenvif_carrier_off(vif); for (queue_index = 0; queue_index < num_queues; ++queue_index) { queue = &vif->queues[queue_index]; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 769e553d3f45..6c4cc0f44da5 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1953,7 +1953,7 @@ int xenvif_kthread_guest_rx(void *data) * context so we defer it here, if this thread is * associated with queue 0. */ - if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0)) + if (unlikely(queue->vif->disabled && queue->id == 0)) xenvif_carrier_off(queue->vif); if (kthread_should_stop()) -- cgit v1.2.3 From f34a4cf9c9b4fd35ba7f9a596cedb011879a1a4d Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Mon, 4 Aug 2014 16:20:58 +0100 Subject: xen-netback: Turn off the carrier if the guest is not able to receive Currently when the guest is not able to receive more packets, qdisc layer starts a timer, and when it goes off, qdisc is started again to deliver a packet again. This is a very slow way to drain the queues, consumes unnecessary resources and slows down other guests shutdown. This patch change the behaviour by turning the carrier off when that timer fires, so all the packets are freed up which were stucked waiting for that vif. Instead of the rx_queue_purge bool it uses the VIF_STATUS_RX_PURGE_EVENT bit to signal the thread that either the timeout happened or an RX interrupt arrived, so the thread can check what it should do. It also disables NAPI, so the guest can't transmit, but leaves the interrupts on, so it can resurrect. Only the queues which brought down the interface can enable it again, the bit QUEUE_STATUS_RX_STALLED makes sure of that. Signed-off-by: Zoltan Kiss Signed-off-by: David Vrabel Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 15 ++++-- drivers/net/xen-netback/interface.c | 49 +++++++++++-------- drivers/net/xen-netback/netback.c | 97 +++++++++++++++++++++++++++++++------ 3 files changed, 123 insertions(+), 38 deletions(-) (limited to 'drivers/net/xen-netback/common.h') diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 4a92fc19f410..ef3026f46a37 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct xen_netif_rx_back_ring rx; struct sk_buff_head rx_queue; RING_IDX rx_last_skb_slots; - bool rx_queue_purge; + unsigned long status; - struct timer_list wake_queue; + struct timer_list rx_stalled; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; @@ -200,7 +200,16 @@ struct xenvif_queue { /* Per-queue data for xenvif */ enum state_bit_shift { /* This bit marks that the vif is connected */ - VIF_STATUS_CONNECTED + VIF_STATUS_CONNECTED, + /* This bit signals the RX thread that queuing was stopped (in + * start_xmit), and either the timer fired or an RX interrupt came + */ + QUEUE_STATUS_RX_PURGE_EVENT, + /* This bit tells the interrupt handler that this queue was the reason + * for the carrier off, so it should kick the thread. Only queues which + * brought it down can turn on the carrier. + */ + QUEUE_STATUS_RX_STALLED }; struct xenvif { diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index fbdadb3d8220..48a55cda979b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -78,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget) /* This vif is rogue, we pretend we've there is nothing to do * for this vif to deschedule it from NAPI. But this interface * will be turned off in thread context later. + * Also, if a guest doesn't post enough slots to receive data on one of + * its queues, the carrier goes down and NAPI is descheduled here so + * the guest can't send more packets until it's ready to receive. */ - if (unlikely(queue->vif->disabled)) { + if (unlikely(queue->vif->disabled || + !netif_carrier_ok(queue->vif->dev))) { napi_complete(napi); return 0; } @@ -97,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + struct netdev_queue *net_queue = + netdev_get_tx_queue(queue->vif->dev, queue->id); + /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR + * the carrier went down and this queue was previously blocked + */ + if (unlikely(netif_tx_queue_stopped(net_queue) || + (!netif_carrier_ok(queue->vif->dev) && + test_bit(QUEUE_STATUS_RX_STALLED, &queue->status)))) + set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); xenvif_kick_thread(queue); return IRQ_HANDLED; @@ -125,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } -/* Callback to wake the queue and drain it on timeout */ -static void xenvif_wake_queue_callback(unsigned long data) +/* Callback to wake the queue's thread and turn the carrier off on timeout */ +static void xenvif_rx_stalled(unsigned long data) { struct xenvif_queue *queue = (struct xenvif_queue *)data; if (xenvif_queue_stopped(queue)) { - netdev_err(queue->vif->dev, "draining TX queue\n"); - queue->rx_queue_purge = true; + set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); xenvif_kick_thread(queue); - xenvif_wake_queue(queue); } } @@ -183,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) * drain. */ if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { - queue->wake_queue.function = xenvif_wake_queue_callback; - queue->wake_queue.data = (unsigned long)queue; + queue->rx_stalled.function = xenvif_rx_stalled; + queue->rx_stalled.data = (unsigned long)queue; xenvif_stop_queue(queue); - mod_timer(&queue->wake_queue, - jiffies + rx_drain_timeout_jiffies); + mod_timer(&queue->rx_stalled, + jiffies + rx_drain_timeout_jiffies); } skb_queue_tail(&queue->rx_queue, skb); @@ -515,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; } - init_timer(&queue->wake_queue); + init_timer(&queue->rx_stalled); netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, XENVIF_NAPI_WEIGHT); @@ -666,7 +677,7 @@ void xenvif_disconnect(struct xenvif *vif) queue = &vif->queues[queue_index]; if (queue->task) { - del_timer_sync(&queue->wake_queue); + del_timer_sync(&queue->rx_stalled); kthread_stop(queue->task); queue->task = NULL; } @@ -708,16 +719,12 @@ void xenvif_free(struct xenvif *vif) /* Here we want to avoid timeout messages if an skb can be legitimately * stuck somewhere else. Realistically this could be an another vif's * internal or QDisc queue. That another vif also has this - * rx_drain_timeout_msecs timeout, but the timer only ditches the - * internal queue. After that, the QDisc queue can put in worst case - * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's - * internal queue, so we need several rounds of such timeouts until we - * can be sure that no another vif should have skb's from us. We are - * not sending more skb's, so newly stuck packets are not interesting - * for us here. + * rx_drain_timeout_msecs timeout, so give it time to drain out. + * Although if that other guest wakes up just before its timeout happens + * and takes only one skb from QDisc, it can hold onto other skbs for a + * longer period. */ - unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * - DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); + unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000); unregister_netdev(vif->dev); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 6c4cc0f44da5..aa2093325be1 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) static inline int rx_work_todo(struct xenvif_queue *queue) { return (!skb_queue_empty(&queue->rx_queue) && - xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) || - queue->rx_queue_purge; + xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)); } static inline int tx_work_todo(struct xenvif_queue *queue) @@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue) xenvif_wake_queue(queue); } +/* Only called from the queue's thread, it handles the situation when the guest + * doesn't post enough requests on the receiving ring. + * First xenvif_start_xmit disables QDisc and start a timer, and then either the + * timer fires, or the guest send an interrupt after posting new request. If it + * is the timer, the carrier is turned off here. + * */ +static void xenvif_rx_purge_event(struct xenvif_queue *queue) +{ + /* Either the last unsuccesful skb or at least 1 slot should fit */ + int needed = queue->rx_last_skb_slots ? + queue->rx_last_skb_slots : 1; + + /* It is assumed that if the guest post new slots after this, the RX + * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up + * the thread again + */ + set_bit(QUEUE_STATUS_RX_STALLED, &queue->status); + if (!xenvif_rx_ring_slots_available(queue, needed)) { + rtnl_lock(); + if (netif_carrier_ok(queue->vif->dev)) { + /* Timer fired and there are still no slots. Turn off + * everything except the interrupts + */ + netif_carrier_off(queue->vif->dev); + skb_queue_purge(&queue->rx_queue); + queue->rx_last_skb_slots = 0; + if (net_ratelimit()) + netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id); + } else { + /* Probably an another queue already turned the carrier + * off, make sure nothing is stucked in the internal + * queue of this queue + */ + skb_queue_purge(&queue->rx_queue); + queue->rx_last_skb_slots = 0; + } + rtnl_unlock(); + } else if (!netif_carrier_ok(queue->vif->dev)) { + unsigned int num_queues = queue->vif->num_queues; + unsigned int i; + /* The carrier was down, but an interrupt kicked + * the thread again after new requests were + * posted + */ + clear_bit(QUEUE_STATUS_RX_STALLED, + &queue->status); + rtnl_lock(); + netif_carrier_on(queue->vif->dev); + netif_tx_wake_all_queues(queue->vif->dev); + rtnl_unlock(); + + for (i = 0; i < num_queues; i++) { + struct xenvif_queue *temp = &queue->vif->queues[i]; + + xenvif_napi_schedule_or_enable_events(temp); + } + if (net_ratelimit()) + netdev_err(queue->vif->dev, "Carrier on again\n"); + } else { + /* Queuing were stopped, but the guest posted + * new requests and sent an interrupt + */ + clear_bit(QUEUE_STATUS_RX_STALLED, + &queue->status); + del_timer_sync(&queue->rx_stalled); + xenvif_start_queue(queue); + } +} + int xenvif_kthread_guest_rx(void *data) { struct xenvif_queue *queue = data; @@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data) wait_event_interruptible(queue->wq, rx_work_todo(queue) || queue->vif->disabled || + test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) || kthread_should_stop()); + if (kthread_should_stop()) + break; + /* This frontend is found to be rogue, disable it in * kthread context. Currently this is only set when * netback finds out frontend sends malformed packet, @@ -1955,24 +2027,21 @@ int xenvif_kthread_guest_rx(void *data) */ if (unlikely(queue->vif->disabled && queue->id == 0)) xenvif_carrier_off(queue->vif); - - if (kthread_should_stop()) - break; - - if (queue->rx_queue_purge) { + else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, + &queue->status))) { + xenvif_rx_purge_event(queue); + } else if (!netif_carrier_ok(queue->vif->dev)) { + /* Another queue stalled and turned the carrier off, so + * purge the internal queue of queues which were not + * blocked + */ skb_queue_purge(&queue->rx_queue); - queue->rx_queue_purge = false; + queue->rx_last_skb_slots = 0; } if (!skb_queue_empty(&queue->rx_queue)) xenvif_rx_action(queue); - if (skb_queue_empty(&queue->rx_queue) && - xenvif_queue_stopped(queue)) { - del_timer_sync(&queue->wake_queue); - xenvif_start_queue(queue); - } - cond_resched(); } -- cgit v1.2.3 From a64bd934528e26e8956112e43a279fba2ee0634e Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 12 Aug 2014 11:48:07 +0100 Subject: xen-netback: don't stop dealloc kthread too early Reference count the number of packets in host stack, so that we don't stop the deallocation thread too early. If not, we can end up with xenvif_free permanently waiting for deallocation thread to unmap grefs. Reported-by: Thomas Leonard Signed-off-by: Wei Liu Cc: Ian Campbell Cc: Zoltan Kiss Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 5 +++++ drivers/net/xen-netback/interface.c | 18 ++++++++++++++++++ drivers/net/xen-netback/netback.c | 26 +++++++++++++++++++------- 3 files changed, 42 insertions(+), 7 deletions(-) (limited to 'drivers/net/xen-netback/common.h') diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ef3026f46a37..d4eb8d2e9cb7 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ u16 dealloc_ring[MAX_PENDING_REQS]; struct task_struct *dealloc_task; wait_queue_head_t dealloc_wq; + atomic_t inflight_packets; /* Use kthread for guest RX */ struct task_struct *task; @@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues; extern struct dentry *xen_netback_dbg_root; #endif +void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, + struct sk_buff *skb); +void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue); + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 5f3d6c06fcf7..0aaca902699a 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -43,6 +43,23 @@ #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 +/* This function is used to set SKBTX_DEV_ZEROCOPY as well as + * increasing the inflight counter. We need to increase the inflight + * counter because core driver calls into xenvif_zerocopy_callback + * which calls xenvif_skb_zerocopy_complete. + */ +void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, + struct sk_buff *skb) +{ + skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + atomic_inc(&queue->inflight_packets); +} + +void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) +{ + atomic_dec(&queue->inflight_packets); +} + static inline void xenvif_stop_queue(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -557,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, init_waitqueue_head(&queue->wq); init_waitqueue_head(&queue->dealloc_wq); + atomic_set(&queue->inflight_packets, 0); if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 4734472aa620..08f65996534c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s /* remove traces of mapped pages and frag_list */ skb_frag_list_init(skb); uarg = skb_shinfo(skb)->destructor_arg; + /* increase inflight counter to offset decrement in callback */ + atomic_inc(&queue->inflight_packets); uarg->callback(uarg, true); skb_shinfo(skb)->destructor_arg = NULL; - skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, nskb); kfree_skb(nskb); return 0; @@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) if (net_ratelimit()) netdev_err(queue->vif->dev, "Not enough memory to consolidate frag_list!\n"); - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); kfree_skb(skb); continue; } @@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) "Can't setup checksum in net_tx_action\n"); /* We have to set this flag to trigger the callback */ if (skb_shinfo(skb)->destructor_arg) - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); kfree_skb(skb); continue; } @@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) * skb. E.g. the __pskb_pull_tail earlier can do such thing. */ if (skb_shinfo(skb)->destructor_arg) { - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); queue->stats.tx_zerocopy_sent++; } @@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) queue->stats.tx_zerocopy_success++; else queue->stats.tx_zerocopy_fail++; + xenvif_skb_zerocopy_complete(queue); } static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) @@ -2058,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data) return 0; } +static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue) +{ + /* Dealloc thread must remain running until all inflight + * packets complete. + */ + return kthread_should_stop() && + !atomic_read(&queue->inflight_packets); +} + int xenvif_dealloc_kthread(void *data) { struct xenvif_queue *queue = data; - while (!kthread_should_stop()) { + for (;;) { wait_event_interruptible(queue->dealloc_wq, tx_dealloc_work_todo(queue) || - kthread_should_stop()); - if (kthread_should_stop()) + xenvif_dealloc_kthread_should_stop(queue)); + if (xenvif_dealloc_kthread_should_stop(queue)) break; xenvif_tx_dealloc_action(queue); -- cgit v1.2.3