From 23cc5a991c7a9fb7e6d6550e65cee4f4173111c5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 23 Jan 2013 21:46:47 +0100 Subject: vhost-net: extend device allocation to vmalloc Michael Mueller provided a patch to reduce the size of vhost-net structure as some allocations could fail under memory pressure/fragmentation. We are still left with high order allocations though. This patch is handling the problem at the core level, allowing vhost structures to use vmalloc() if kmalloc() failed. As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. People are still looking at cleaner ways to handle the problem at the API level, probably passing in multiple iovecs. This hack seems consistent with approaches taken since then by drivers/vhost/scsi.c and net/core/dev.c Based on patch by Romain Francoise. Cc: Michael Mueller Signed-off-by: Romain Francoise Acked-by: Michael S. Tsirkin --- drivers/vhost/net.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index be414d2b2b22..e489161d0feb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -699,18 +700,30 @@ static void handle_rx_net(struct vhost_work *work) handle_rx(net); } +static void vhost_net_free(void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + static int vhost_net_open(struct inode *inode, struct file *f) { - struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; int i; - if (!n) - return -ENOMEM; + n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!n) { + n = vmalloc(sizeof *n); + if (!n) + return -ENOMEM; + } vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); if (!vqs) { - kfree(n); + vhost_net_free(n); return -ENOMEM; } @@ -827,7 +840,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) * since jobs can re-queue themselves. */ vhost_net_flush(n); kfree(n->dev.vqs); - kfree(n); + vhost_net_free(n); return 0; } -- cgit v1.2.1 From 98f9ca0a3faa99b7388578d55eccecf272be4038 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 28 May 2014 17:07:02 +0300 Subject: vhost: replace rcu with mutex All memory accesses are done under some VQ mutex. So lock/unlock all VQs is a faster equivalent of synchronize_rcu() for memory access changes. Some guests cause a lot of these changes, so it's helpful to make them faster. Reported-by: "Gonglei (Arei)" Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 78987e481bc6..1c05e6030d42 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -593,6 +593,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem, *oldmem; unsigned long size = offsetof(struct vhost_memory, regions); + int i; if (copy_from_user(&mem, m, size)) return -EFAULT; @@ -619,7 +620,14 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) oldmem = rcu_dereference_protected(d->memory, lockdep_is_held(&d->mutex)); rcu_assign_pointer(d->memory, newmem); - synchronize_rcu(); + + /* All memory accesses are done under some VQ mutex. + * So below is a faster equivalent of synchronize_rcu() + */ + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); + mutex_unlock(&d->vqs[i]->mutex); + } kfree(oldmem); return 0; } -- cgit v1.2.1 From ea16c51433510f7f758382dec5b933fc0797f244 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 5 Jun 2014 15:20:23 +0300 Subject: vhost: move acked_features to VQs Refactor code to make sure features are only accessed under VQ mutex. This makes everything simpler, no need for RCU here anymore. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 +++----- drivers/vhost/scsi.c | 22 +++++++++++++--------- drivers/vhost/test.c | 9 ++++++--- drivers/vhost/vhost.c | 36 +++++++++++++++++++----------------- drivers/vhost/vhost.h | 11 +++-------- 5 files changed, 44 insertions(+), 42 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e489161d0feb..2bc8f298a4e7 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -585,9 +585,9 @@ static void handle_rx(struct vhost_net *net) vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; - vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? + vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? vq->log : NULL; - mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); + mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); while ((sock_len = peek_head_len(sock->sk))) { sock_len += sock_hlen; @@ -1051,15 +1051,13 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) mutex_unlock(&n->dev.mutex); return -EFAULT; } - n->dev.acked_features = features; - smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); + n->vqs[i].vq.acked_features = features; n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); } - vhost_net_flush(n); mutex_unlock(&n->dev.mutex); return 0; } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index cf50ce93975b..f1f284fe30fd 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1373,6 +1373,9 @@ err_dev: static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) { + struct vhost_virtqueue *vq; + int i; + if (features & ~VHOST_SCSI_FEATURES) return -EOPNOTSUPP; @@ -1382,9 +1385,13 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) mutex_unlock(&vs->dev.mutex); return -EFAULT; } - vs->dev.acked_features = features; - smp_wmb(); - vhost_scsi_flush(vs); + + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i].vq; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); + } mutex_unlock(&vs->dev.mutex); return 0; } @@ -1591,10 +1598,6 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, return; mutex_lock(&vs->dev.mutex); - if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) { - mutex_unlock(&vs->dev.mutex); - return; - } if (plug) reason = VIRTIO_SCSI_EVT_RESET_RESCAN; @@ -1603,8 +1606,9 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; mutex_lock(&vq->mutex); - tcm_vhost_send_evt(vs, tpg, lun, - VIRTIO_SCSI_T_TRANSPORT_RESET, reason); + if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) + tcm_vhost_send_evt(vs, tpg, lun, + VIRTIO_SCSI_T_TRANSPORT_RESET, reason); mutex_unlock(&vq->mutex); mutex_unlock(&vs->dev.mutex); } diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index c2a54fbf7f99..6fa3bf8bdec7 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -241,15 +241,18 @@ done: static int vhost_test_set_features(struct vhost_test *n, u64 features) { + struct vhost_virtqueue *vq; + mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) { mutex_unlock(&n->dev.mutex); return -EFAULT; } - n->dev.acked_features = features; - smp_wmb(); - vhost_test_flush(n); + vq = &n->vqs[VHOST_TEST_VQ]; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); mutex_unlock(&n->dev.mutex); return 0; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1c05e6030d42..a23870cbbf91 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -191,6 +191,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_used = false; vq->log_addr = -1ull; vq->private_data = NULL; + vq->acked_features = 0; vq->log_base = NULL; vq->error_ctx = NULL; vq->error = NULL; @@ -524,11 +525,13 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, for (i = 0; i < d->nvqs; ++i) { int ok; + bool log; + mutex_lock(&d->vqs[i]->mutex); + log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) - ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, - log_all); + ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); else ok = 1; mutex_unlock(&d->vqs[i]->mutex); @@ -538,12 +541,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } -static int vq_access_ok(struct vhost_dev *d, unsigned int num, +static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) { - size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, sizeof *avail + num * sizeof *avail->ring + s) && @@ -565,16 +568,16 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok); /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, +static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { struct vhost_memory *mp; - size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; mp = rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&vq->mutex)); return vq_memory_access_ok(log_base, mp, - vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && + vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + vq->num * sizeof *vq->used->ring + s)); @@ -584,8 +587,8 @@ static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq->dev, vq, vq->log_base); + return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq, vq->log_base); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); @@ -612,8 +615,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EFAULT; } - if (!memory_access_ok(d, newmem, - vhost_has_feature(d, VHOST_F_LOG_ALL))) { + if (!memory_access_ok(d, newmem, 0)) { kfree(newmem); return -EFAULT; } @@ -726,7 +728,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { - if (!vq_access_ok(d, vq->num, + if (!vq_access_ok(vq, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) { @@ -866,7 +868,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ - if (vq->private_data && !vq_log_access_ok(d, vq, base)) + if (vq->private_data && !vq_log_access_ok(vq, base)) r = -EFAULT; else vq->log_base = base; @@ -1434,11 +1436,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) * interrupts. */ smp_mb(); - if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && + if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && unlikely(vq->avail_idx == vq->last_avail_idx)) return true; - if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __u16 flags; if (__get_user(flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); @@ -1499,7 +1501,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; - if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) { vq_err(vq, "Failed to enable notification at %p: %d\n", @@ -1536,7 +1538,7 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; - if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) vq_err(vq, "Failed to enable notification at %p: %d\n", diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 35eeb2a1bada..ff454a0ec6f5 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -105,6 +105,7 @@ struct vhost_virtqueue { struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ void *private_data; + unsigned acked_features; /* Log write descriptors */ void __user *log_base; struct vhost_log *log; @@ -117,7 +118,6 @@ struct vhost_dev { struct vhost_memory __rcu *memory; struct mm_struct *mm; struct mutex mutex; - unsigned acked_features; struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; @@ -174,13 +174,8 @@ enum { (1ULL << VHOST_F_LOG_ALL), }; -static inline int vhost_has_feature(struct vhost_dev *dev, int bit) +static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit) { - unsigned acked_features; - - /* TODO: check that we are running from vhost_worker or dev mutex is - * held? */ - acked_features = rcu_dereference_index_check(dev->acked_features, 1); - return acked_features & (1 << bit); + return vq->acked_features & (1 << bit); } #endif -- cgit v1.2.1 From 47283bef7ed356629467d1fac61687756e48f254 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 5 Jun 2014 15:20:27 +0300 Subject: vhost: move memory pointer to VQs commit 2ae76693b8bcabf370b981cd00c36cd41d33fabc vhost: replace rcu with mutex replaced rcu sync for memory accesses with VQ mutex locl/unlock. This is correct since all accesses are under VQ mutex, but incomplete: we still do useless rcu lock/unlock operations, someone might copy this code into some other context where this won't be right. This use of RCU is also non standard and hard to understand. Let's copy the pointer to each VQ structure, this way the access rules become straight-forward, and there's no need for RCU anymore. Reported-by: Eric Dumazet Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 4 ++-- drivers/vhost/scsi.c | 4 ++-- drivers/vhost/test.c | 2 +- drivers/vhost/vhost.c | 57 ++++++++++++++++++++++----------------------------- drivers/vhost/vhost.h | 8 +++----- 5 files changed, 33 insertions(+), 42 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2bc8f298a4e7..971a760af4a1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -374,7 +374,7 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); @@ -506,7 +506,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, r = -ENOBUFS; goto err; } - r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, + r = vhost_get_vq_desc(vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); if (unlikely(r < 0)) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f1f284fe30fd..83b834b357d9 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -606,7 +606,7 @@ tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) again: vhost_disable_notify(&vs->dev, vq); - head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); if (head < 0) { @@ -945,7 +945,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vhost_disable_notify(&vs->dev, vq); for (;;) { - head = vhost_get_vq_desc(&vs->dev, vq, vq->iov, + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 6fa3bf8bdec7..d9c501eaa6c3 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -53,7 +53,7 @@ static void handle_vq(struct vhost_test *n) vhost_disable_notify(&n->dev, vq); for (;;) { - head = vhost_get_vq_desc(&n->dev, vq, vq->iov, + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a23870cbbf91..c90f4374442a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -199,6 +198,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; + vq->memory = NULL; } static int vhost_worker(void *data) @@ -416,11 +416,18 @@ EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) { + int i; + vhost_dev_cleanup(dev, true); /* Restore memory to default empty mapping. */ memory->nregions = 0; - RCU_INIT_POINTER(dev->memory, memory); + dev->memory = memory; + /* We don't need VQ locks below since vhost_dev_cleanup makes sure + * VQs aren't running. + */ + for (i = 0; i < dev->nvqs; ++i) + dev->vqs[i]->memory = memory; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); @@ -463,10 +470,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kfree(rcu_dereference_protected(dev->memory, - locked == - lockdep_is_held(&dev->mutex))); - RCU_INIT_POINTER(dev->memory, NULL); + kfree(dev->memory); + dev->memory = NULL; WARN_ON(!list_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); @@ -558,11 +563,7 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - struct vhost_memory *mp; - - mp = rcu_dereference_protected(dev->memory, - lockdep_is_held(&dev->mutex)); - return memory_access_ok(dev, mp, 1); + return memory_access_ok(dev, dev->memory, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); @@ -571,12 +572,9 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok); static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { - struct vhost_memory *mp; size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - mp = rcu_dereference_protected(vq->dev->memory, - lockdep_is_held(&vq->mutex)); - return vq_memory_access_ok(log_base, mp, + return vq_memory_access_ok(log_base, vq->memory, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -619,15 +617,13 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kfree(newmem); return -EFAULT; } - oldmem = rcu_dereference_protected(d->memory, - lockdep_is_held(&d->mutex)); - rcu_assign_pointer(d->memory, newmem); + oldmem = d->memory; + d->memory = newmem; - /* All memory accesses are done under some VQ mutex. - * So below is a faster equivalent of synchronize_rcu() - */ + /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->memory = newmem; mutex_unlock(&d->vqs[i]->mutex); } kfree(oldmem); @@ -1054,7 +1050,7 @@ int vhost_init_used(struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_init_used); -static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, +static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size) { const struct vhost_memory_region *reg; @@ -1063,9 +1059,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, u64 s = 0; int ret = 0; - rcu_read_lock(); - - mem = rcu_dereference(dev->memory); + mem = vq->memory; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { @@ -1087,7 +1081,6 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, ++ret; } - rcu_read_unlock(); return ret; } @@ -1112,7 +1105,7 @@ static unsigned next_desc(struct vring_desc *desc) return next; } -static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, +static int get_indirect(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num, @@ -1131,7 +1124,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, return -EINVAL; } - ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, + ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect, UIO_MAXIOV); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d in indirect.\n", ret); @@ -1171,7 +1164,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, return -EINVAL; } - ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, iov_size - iov_count); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d indirect idx %d\n", @@ -1208,7 +1201,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, * This function returns the descriptor number found, or vq->num (which is * never a valid descriptor number) if none was found. A negative code is * returned on error. */ -int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, +int vhost_get_vq_desc(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num) @@ -1282,7 +1275,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, return -EFAULT; } if (desc.flags & VRING_DESC_F_INDIRECT) { - ret = get_indirect(dev, vq, iov, iov_size, + ret = get_indirect(vq, iov, iov_size, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { @@ -1293,7 +1286,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, continue; } - ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + ret = translate_desc(vq, desc.addr, desc.len, iov + iov_count, iov_size - iov_count); if (unlikely(ret < 0)) { vq_err(vq, "Translation failure %d descriptor idx %d\n", diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ff454a0ec6f5..3eda654b8f5a 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -104,6 +104,7 @@ struct vhost_virtqueue { struct iovec *indirect; struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ + struct vhost_memory *memory; void *private_data; unsigned acked_features; /* Log write descriptors */ @@ -112,10 +113,7 @@ struct vhost_virtqueue { }; struct vhost_dev { - /* Readers use RCU to access memory table pointer - * log base pointer and features. - * Writers use mutex below.*/ - struct vhost_memory __rcu *memory; + struct vhost_memory *memory; struct mm_struct *mm; struct mutex mutex; struct vhost_virtqueue **vqs; @@ -140,7 +138,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); -int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, +int vhost_get_vq_desc(struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num); -- cgit v1.2.1