summaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c12
-rw-r--r--drivers/vhost/scsi.c12
-rw-r--r--drivers/vhost/test.c27
-rw-r--r--drivers/vhost/vhost.c530
-rw-r--r--drivers/vhost/vhost.h42
-rw-r--r--drivers/vhost/vringh.c8
-rw-r--r--drivers/vhost/vsock.c186
7 files changed, 142 insertions, 675 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1a2dd53caade..e158159671fa 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1751,14 +1751,6 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
}
}
-#ifdef CONFIG_COMPAT
-static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
- unsigned long arg)
-{
- return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
@@ -1794,9 +1786,7 @@ static const struct file_operations vhost_net_fops = {
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = vhost_net_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = vhost_net_open,
.llseek = noop_llseek,
};
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index a9caf1bc3c3e..0b949a14bce3 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1727,21 +1727,11 @@ vhost_scsi_ioctl(struct file *f,
}
}
-#ifdef CONFIG_COMPAT
-static long vhost_scsi_compat_ioctl(struct file *f, unsigned int ioctl,
- unsigned long arg)
-{
- return vhost_scsi_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
static const struct file_operations vhost_scsi_fops = {
.owner = THIS_MODULE,
.release = vhost_scsi_release,
.unlocked_ioctl = vhost_scsi_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = vhost_scsi_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = vhost_scsi_open,
.llseek = noop_llseek,
};
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 9e90e969af55..e37c92d4d7ad 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -22,6 +22,12 @@
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_TEST_WEIGHT 0x80000
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others with
+ * pkts.
+ */
+#define VHOST_TEST_PKT_WEIGHT 256
+
enum {
VHOST_TEST_VQ = 0,
VHOST_TEST_VQ_MAX = 1,
@@ -80,10 +86,8 @@ static void handle_vq(struct vhost_test *n)
}
vhost_add_used_and_signal(&n->dev, vq, head, 0);
total_len += len;
- if (unlikely(total_len >= VHOST_TEST_WEIGHT)) {
- vhost_poll_queue(&vq->poll);
+ if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
break;
- }
}
mutex_unlock(&vq->mutex);
@@ -115,7 +119,8 @@ static int vhost_test_open(struct inode *inode, struct file *f)
dev = &n->dev;
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
- vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX);
+ vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
+ VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
f->private_data = n;
@@ -156,6 +161,7 @@ static int vhost_test_release(struct inode *inode, struct file *f)
vhost_test_stop(n, &private);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_cleanup(&n->dev);
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
@@ -232,6 +238,7 @@ static long vhost_test_reset_owner(struct vhost_test *n)
}
vhost_test_stop(n, &priv);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_reset_owner(&n->dev, umem);
done:
mutex_unlock(&n->dev.mutex);
@@ -297,21 +304,11 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl,
}
}
-#ifdef CONFIG_COMPAT
-static long vhost_test_compat_ioctl(struct file *f, unsigned int ioctl,
- unsigned long arg)
-{
- return vhost_test_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
static const struct file_operations vhost_test_fops = {
.owner = THIS_MODULE,
.release = vhost_test_release,
.unlocked_ioctl = vhost_test_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = vhost_test_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = vhost_test_open,
.llseek = noop_llseek,
};
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0536f8526359..f44340b41494 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -30,6 +30,7 @@
#include <linux/sched/signal.h>
#include <linux/interval_tree_generic.h>
#include <linux/nospec.h>
+#include <linux/kcov.h>
#include "vhost.h"
@@ -203,7 +204,6 @@ EXPORT_SYMBOL_GPL(vhost_poll_init);
int vhost_poll_start(struct vhost_poll *poll, struct file *file)
{
__poll_t mask;
- int ret = 0;
if (poll->wqh)
return 0;
@@ -213,10 +213,10 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
if (mask & EPOLLERR) {
vhost_poll_stop(poll);
- ret = -EINVAL;
+ return -EINVAL;
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(vhost_poll_start);
@@ -298,160 +298,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
__vhost_vq_meta_reset(d->vqs[i]);
}
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-static void vhost_map_unprefetch(struct vhost_map *map)
-{
- kfree(map->pages);
- map->pages = NULL;
- map->npages = 0;
- map->addr = NULL;
-}
-
-static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
-{
- struct vhost_map *map[VHOST_NUM_ADDRS];
- int i;
-
- spin_lock(&vq->mmu_lock);
- for (i = 0; i < VHOST_NUM_ADDRS; i++) {
- map[i] = rcu_dereference_protected(vq->maps[i],
- lockdep_is_held(&vq->mmu_lock));
- if (map[i])
- rcu_assign_pointer(vq->maps[i], NULL);
- }
- spin_unlock(&vq->mmu_lock);
-
- synchronize_rcu();
-
- for (i = 0; i < VHOST_NUM_ADDRS; i++)
- if (map[i])
- vhost_map_unprefetch(map[i]);
-
-}
-
-static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
-{
- int i;
-
- vhost_uninit_vq_maps(vq);
- for (i = 0; i < VHOST_NUM_ADDRS; i++)
- vq->uaddrs[i].size = 0;
-}
-
-static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
- unsigned long start,
- unsigned long end)
-{
- if (unlikely(!uaddr->size))
- return false;
-
- return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
-}
-
-static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
- int index,
- unsigned long start,
- unsigned long end)
-{
- struct vhost_uaddr *uaddr = &vq->uaddrs[index];
- struct vhost_map *map;
- int i;
-
- if (!vhost_map_range_overlap(uaddr, start, end))
- return;
-
- spin_lock(&vq->mmu_lock);
- ++vq->invalidate_count;
-
- map = rcu_dereference_protected(vq->maps[index],
- lockdep_is_held(&vq->mmu_lock));
- if (map) {
- if (uaddr->write) {
- for (i = 0; i < map->npages; i++)
- set_page_dirty(map->pages[i]);
- }
- rcu_assign_pointer(vq->maps[index], NULL);
- }
- spin_unlock(&vq->mmu_lock);
-
- if (map) {
- synchronize_rcu();
- vhost_map_unprefetch(map);
- }
-}
-
-static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
- int index,
- unsigned long start,
- unsigned long end)
-{
- if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
- return;
-
- spin_lock(&vq->mmu_lock);
- --vq->invalidate_count;
- spin_unlock(&vq->mmu_lock);
-}
-
-static int vhost_invalidate_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct vhost_dev *dev = container_of(mn, struct vhost_dev,
- mmu_notifier);
- int i, j;
-
- if (!mmu_notifier_range_blockable(range))
- return -EAGAIN;
-
- for (i = 0; i < dev->nvqs; i++) {
- struct vhost_virtqueue *vq = dev->vqs[i];
-
- for (j = 0; j < VHOST_NUM_ADDRS; j++)
- vhost_invalidate_vq_start(vq, j,
- range->start,
- range->end);
- }
-
- return 0;
-}
-
-static void vhost_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct vhost_dev *dev = container_of(mn, struct vhost_dev,
- mmu_notifier);
- int i, j;
-
- for (i = 0; i < dev->nvqs; i++) {
- struct vhost_virtqueue *vq = dev->vqs[i];
-
- for (j = 0; j < VHOST_NUM_ADDRS; j++)
- vhost_invalidate_vq_end(vq, j,
- range->start,
- range->end);
- }
-}
-
-static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
- .invalidate_range_start = vhost_invalidate_range_start,
- .invalidate_range_end = vhost_invalidate_range_end,
-};
-
-static void vhost_init_maps(struct vhost_dev *dev)
-{
- struct vhost_virtqueue *vq;
- int i, j;
-
- dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
-
- for (i = 0; i < dev->nvqs; ++i) {
- vq = dev->vqs[i];
- for (j = 0; j < VHOST_NUM_ADDRS; j++)
- RCU_INIT_POINTER(vq->maps[j], NULL);
- }
-}
-#endif
-
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
@@ -480,11 +326,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
- vq->invalidate_count = 0;
__vhost_vq_meta_reset(vq);
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- vhost_reset_vq_maps(vq);
-#endif
}
static int vhost_worker(void *data)
@@ -516,7 +358,9 @@ static int vhost_worker(void *data)
llist_for_each_entry_safe(work, work_next, node, node) {
clear_bit(VHOST_WORK_QUEUED, &work->flags);
__set_current_state(TASK_RUNNING);
+ kcov_remote_start_common(dev->kcov_handle);
work->fn(work);
+ kcov_remote_stop();
if (need_resched())
schedule();
}
@@ -634,9 +478,7 @@ void vhost_dev_init(struct vhost_dev *dev,
INIT_LIST_HEAD(&dev->read_list);
INIT_LIST_HEAD(&dev->pending_list);
spin_lock_init(&dev->iotlb_lock);
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- vhost_init_maps(dev);
-#endif
+
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
@@ -645,7 +487,6 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->heads = NULL;
vq->dev = dev;
mutex_init(&vq->mutex);
- spin_lock_init(&vq->mmu_lock);
vhost_vq_reset(dev, vq);
if (vq->handle_kick)
vhost_poll_init(&vq->poll, vq->handle_kick,
@@ -708,6 +549,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
/* No owner, become one */
dev->mm = get_task_mm(current);
+ dev->kcov_handle = kcov_common_handle();
worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
@@ -725,18 +567,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
if (err)
goto err_cgroup;
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
- if (err)
- goto err_mmu_notifier;
-#endif
-
return 0;
-
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-err_mmu_notifier:
- vhost_dev_free_iovecs(dev);
-#endif
err_cgroup:
kthread_stop(worker);
dev->worker = NULL;
@@ -744,6 +575,7 @@ err_worker:
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
+ dev->kcov_handle = 0;
err_mm:
return err;
}
@@ -827,107 +659,6 @@ static void vhost_clear_msg(struct vhost_dev *dev)
spin_unlock(&dev->iotlb_lock);
}
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
- int index, unsigned long uaddr,
- size_t size, bool write)
-{
- struct vhost_uaddr *addr = &vq->uaddrs[index];
-
- addr->uaddr = uaddr;
- addr->size = size;
- addr->write = write;
-}
-
-static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
-{
- vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
- (unsigned long)vq->desc,
- vhost_get_desc_size(vq, vq->num),
- false);
- vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
- (unsigned long)vq->avail,
- vhost_get_avail_size(vq, vq->num),
- false);
- vhost_setup_uaddr(vq, VHOST_ADDR_USED,
- (unsigned long)vq->used,
- vhost_get_used_size(vq, vq->num),
- true);
-}
-
-static int vhost_map_prefetch(struct vhost_virtqueue *vq,
- int index)
-{
- struct vhost_map *map;
- struct vhost_uaddr *uaddr = &vq->uaddrs[index];
- struct page **pages;
- int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
- int npinned;
- void *vaddr, *v;
- int err;
- int i;
-
- spin_lock(&vq->mmu_lock);
-
- err = -EFAULT;
- if (vq->invalidate_count)
- goto err;
-
- err = -ENOMEM;
- map = kmalloc(sizeof(*map), GFP_ATOMIC);
- if (!map)
- goto err;
-
- pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
- if (!pages)
- goto err_pages;
-
- err = EFAULT;
- npinned = __get_user_pages_fast(uaddr->uaddr, npages,
- uaddr->write, pages);
- if (npinned > 0)
- release_pages(pages, npinned);
- if (npinned != npages)
- goto err_gup;
-
- for (i = 0; i < npinned; i++)
- if (PageHighMem(pages[i]))
- goto err_gup;
-
- vaddr = v = page_address(pages[0]);
-
- /* For simplicity, fallback to userspace address if VA is not
- * contigious.
- */
- for (i = 1; i < npinned; i++) {
- v += PAGE_SIZE;
- if (v != page_address(pages[i]))
- goto err_gup;
- }
-
- map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
- map->npages = npages;
- map->pages = pages;
-
- rcu_assign_pointer(vq->maps[index], map);
- /* No need for a synchronize_rcu(). This function should be
- * called by dev->worker so we are serialized with all
- * readers.
- */
- spin_unlock(&vq->mmu_lock);
-
- return 0;
-
-err_gup:
- kfree(pages);
-err_pages:
- kfree(map);
-err:
- spin_unlock(&vq->mmu_lock);
- return err;
-}
-#endif
-
void vhost_dev_cleanup(struct vhost_dev *dev)
{
int i;
@@ -956,17 +687,10 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;
+ dev->kcov_handle = 0;
}
- if (dev->mm) {
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
-#endif
+ if (dev->mm)
mmput(dev->mm);
- }
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- for (i = 0; i < dev->nvqs; i++)
- vhost_uninit_vq_maps(dev->vqs[i]);
-#endif
dev->mm = NULL;
}
EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -1195,26 +919,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_used *used;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
- if (likely(map)) {
- used = map->addr;
- *((__virtio16 *)&used->ring[vq->num]) =
- cpu_to_vhost16(vq, vq->avail_idx);
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
vhost_avail_event(vq));
}
@@ -1223,27 +927,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
struct vring_used_elem *head, int idx,
int count)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_used *used;
- size_t size;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
- if (likely(map)) {
- used = map->addr;
- size = count * sizeof(*head);
- memcpy(used->ring + idx, head, size);
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_copy_to_user(vq, vq->used->ring + idx, head,
count * sizeof(*head));
}
@@ -1251,25 +934,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_used *used;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
- if (likely(map)) {
- used = map->addr;
- used->flags = cpu_to_vhost16(vq, vq->used_flags);
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
&vq->used->flags);
}
@@ -1277,25 +941,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_used *used;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
- if (likely(map)) {
- used = map->addr;
- used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
&vq->used->idx);
}
@@ -1341,50 +986,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
__virtio16 *idx)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_avail *avail;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
- if (likely(map)) {
- avail = map->addr;
- *idx = avail->idx;
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_get_avail(vq, *idx, &vq->avail->idx);
}
static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
__virtio16 *head, int idx)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_avail *avail;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
- if (likely(map)) {
- avail = map->addr;
- *head = avail->ring[idx & (vq->num - 1)];
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_get_avail(vq, *head,
&vq->avail->ring[idx & (vq->num - 1)]);
}
@@ -1392,98 +999,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
__virtio16 *flags)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_avail *avail;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
- if (likely(map)) {
- avail = map->addr;
- *flags = avail->flags;
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_get_avail(vq, *flags, &vq->avail->flags);
}
static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
__virtio16 *event)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_avail *avail;
-
- if (!vq->iotlb) {
- rcu_read_lock();
- map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
- if (likely(map)) {
- avail = map->addr;
- *event = (__virtio16)avail->ring[vq->num];
- rcu_read_unlock();
- return 0;
- }
- rcu_read_unlock();
- }
-#endif
-
return vhost_get_avail(vq, *event, vhost_used_event(vq));
}
static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
__virtio16 *idx)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_used *used;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
- if (likely(map)) {
- used = map->addr;
- *idx = used->idx;
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_get_used(vq, *idx, &vq->used->idx);
}
static inline int vhost_get_desc(struct vhost_virtqueue *vq,
struct vring_desc *desc, int idx)
{
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- struct vhost_map *map;
- struct vring_desc *d;
-
- if (!vq->iotlb) {
- rcu_read_lock();
-
- map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
- if (likely(map)) {
- d = map->addr;
- *desc = *(d + idx);
- rcu_read_unlock();
- return 0;
- }
-
- rcu_read_unlock();
- }
-#endif
-
return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
}
@@ -1824,32 +1357,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
return true;
}
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
-static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
-{
- struct vhost_map __rcu *map;
- int i;
-
- for (i = 0; i < VHOST_NUM_ADDRS; i++) {
- rcu_read_lock();
- map = rcu_dereference(vq->maps[i]);
- rcu_read_unlock();
- if (unlikely(!map))
- vhost_map_prefetch(vq, i);
- }
-}
-#endif
-
int vq_meta_prefetch(struct vhost_virtqueue *vq)
{
unsigned int num = vq->num;
- if (!vq->iotlb) {
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- vhost_vq_map_prefetch(vq);
-#endif
+ if (!vq->iotlb)
return 1;
- }
return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
@@ -2060,16 +1573,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
mutex_lock(&vq->mutex);
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- /* Unregister MMU notifer to allow invalidation callback
- * can access vq->uaddrs[] without holding a lock.
- */
- if (d->mm)
- mmu_notifier_unregister(&d->mmu_notifier, d->mm);
-
- vhost_uninit_vq_maps(vq);
-#endif
-
switch (ioctl) {
case VHOST_SET_VRING_NUM:
r = vhost_vring_set_num(d, vq, argp);
@@ -2081,13 +1584,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
BUG();
}
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- vhost_setup_vq_uaddr(vq);
-
- if (d->mm)
- mmu_notifier_register(&d->mmu_notifier, d->mm);
-#endif
-
mutex_unlock(&vq->mutex);
return r;
@@ -2688,7 +2184,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
/* If this is an input descriptor, increment that count. */
if (access == VHOST_ACCESS_WO) {
*in_num += ret;
- if (unlikely(log)) {
+ if (unlikely(log && ret)) {
log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
log[*log_num].len = vhost32_to_cpu(vq, desc.len);
++*log_num;
@@ -2829,7 +2325,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* If this is an input descriptor,
* increment that count. */
*in_num += ret;
- if (unlikely(log)) {
+ if (unlikely(log && ret)) {
log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
log[*log_num].len = vhost32_to_cpu(vq, desc.len);
++*log_num;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 42a8c2a13ab1..a123fd70847e 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -12,9 +12,6 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/atomic.h>
-#include <linux/pagemap.h>
-#include <linux/mmu_notifier.h>
-#include <asm/cacheflush.h>
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@@ -83,24 +80,6 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
-struct vhost_map {
- int npages;
- void *addr;
- struct page **pages;
-};
-
-struct vhost_uaddr {
- unsigned long uaddr;
- size_t size;
- bool write;
-};
-
-#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
-#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
-#else
-#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
-#endif
-
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -111,22 +90,7 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
-
-#if VHOST_ARCH_CAN_ACCEL_UACCESS
- /* Read by memory accessors, modified by meta data
- * prefetching, MMU notifier and vring ioctl().
- * Synchonrized through mmu_lock (writers) and RCU (writers
- * and readers).
- */
- struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
- /* Read by MMU notifier, modified by vring ioctl(),
- * synchronized through MMU notifier
- * registering/unregistering.
- */
- struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
-#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
-
struct file *kick;
struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx;
@@ -181,8 +145,6 @@ struct vhost_virtqueue {
bool user_be;
#endif
u32 busyloop_timeout;
- spinlock_t mmu_lock;
- int invalidate_count;
};
struct vhost_msg_node {
@@ -196,9 +158,6 @@ struct vhost_msg_node {
struct vhost_dev {
struct mm_struct *mm;
-#ifdef CONFIG_MMU_NOTIFIER
- struct mmu_notifier mmu_notifier;
-#endif
struct mutex mutex;
struct vhost_virtqueue **vqs;
int nvqs;
@@ -214,6 +173,7 @@ struct vhost_dev {
int iov_limit;
int weight;
int byte_weight;
+ u64 kcov_handle;
};
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 08ad0d1f0476..a0a2d74967ef 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -852,6 +852,12 @@ static inline int xfer_kern(void *src, void *dst, size_t len)
return 0;
}
+static inline int kern_xfer(void *dst, void *src, size_t len)
+{
+ memcpy(dst, src, len);
+ return 0;
+}
+
/**
* vringh_init_kern - initialize a vringh for a kernelspace vring.
* @vrh: the vringh to initialize.
@@ -958,7 +964,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern);
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
const void *src, size_t len)
{
- return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
+ return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer);
}
EXPORT_SYMBOL(vringh_iov_push_kern);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6a50e1d0529c..c2d7d57e98cf 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -102,7 +102,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct iov_iter iov_iter;
unsigned out, in;
size_t nbytes;
- size_t len;
+ size_t iov_len, payload_len;
int head;
spin_lock_bh(&vsock->send_pkt_list_lock);
@@ -147,8 +147,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
- len = iov_length(&vq->iov[out], in);
- iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
+ iov_len = iov_length(&vq->iov[out], in);
+ if (iov_len < sizeof(pkt->hdr)) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
+ break;
+ }
+
+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
+ payload_len = pkt->len - pkt->off;
+
+ /* If the packet is greater than the space available in the
+ * buffer, we split it using multiple buffers.
+ */
+ if (payload_len > iov_len - sizeof(pkt->hdr))
+ payload_len = iov_len - sizeof(pkt->hdr);
+
+ /* Set the correct length in the header */
+ pkt->hdr.len = cpu_to_le32(payload_len);
nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
if (nbytes != sizeof(pkt->hdr)) {
@@ -157,33 +173,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
- nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
- if (nbytes != pkt->len) {
+ nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
+ &iov_iter);
+ if (nbytes != payload_len) {
virtio_transport_free_pkt(pkt);
vq_err(vq, "Faulted on copying pkt buf\n");
break;
}
- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
added = true;
- if (pkt->reply) {
- int val;
-
- val = atomic_dec_return(&vsock->queued_replies);
-
- /* Do we have resources to resume tx processing? */
- if (val + 1 == tx_vq->num)
- restart_tx = true;
- }
-
/* Deliver to monitoring devices all correctly transmitted
* packets.
*/
virtio_transport_deliver_tap_pkt(pkt);
- total_len += pkt->len;
- virtio_transport_free_pkt(pkt);
+ pkt->off += payload_len;
+ total_len += payload_len;
+
+ /* If we didn't send all the payload we can requeue the packet
+ * to send it with the next available buffer.
+ */
+ if (pkt->off < pkt->len) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ } else {
+ if (pkt->reply) {
+ int val;
+
+ val = atomic_dec_return(&vsock->queued_replies);
+
+ /* Do we have resources to resume tx
+ * processing?
+ */
+ if (val + 1 == tx_vq->num)
+ restart_tx = true;
+ }
+
+ virtio_transport_free_pkt(pkt);
+ }
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
if (added)
vhost_signal(&vsock->dev, vq);
@@ -329,6 +359,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
return NULL;
}
+ pkt->buf_len = pkt->len;
+
nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
if (nbytes != pkt->len) {
vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
@@ -352,6 +384,49 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
return val < vq->num;
}
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .module = THIS_MODULE,
+
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vhost_transport_cancel_pkt,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+ .notify_buffer_size = virtio_transport_notify_buffer_size,
+
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+};
+
static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -405,8 +480,10 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
virtio_transport_deliver_tap_pkt(pkt);
/* Only accept correctly addressed packets */
- if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
- virtio_transport_recv_pkt(pkt);
+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
+ le64_to_cpu(pkt->hdr.dst_cid) ==
+ vhost_transport_get_local_cid())
+ virtio_transport_recv_pkt(&vhost_transport, pkt);
else
virtio_transport_free_pkt(pkt);
@@ -643,6 +720,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
if (guest_cid > U32_MAX)
return -EINVAL;
+ /* Refuse if CID is assigned to the guest->host transport (i.e. nested
+ * VM), to make the loopback work.
+ */
+ if (vsock_find_cid(guest_cid))
+ return -EADDRINUSE;
+
/* Refuse if CID is already in use */
mutex_lock(&vhost_vsock_mutex);
other = vhost_vsock_get(guest_cid);
@@ -729,23 +812,13 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
}
}
-#ifdef CONFIG_COMPAT
-static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl,
- unsigned long arg)
-{
- return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
static const struct file_operations vhost_vsock_fops = {
.owner = THIS_MODULE,
.open = vhost_vsock_dev_open,
.release = vhost_vsock_dev_release,
.llseek = noop_llseek,
.unlocked_ioctl = vhost_vsock_dev_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = vhost_vsock_dev_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct miscdevice vhost_vsock_misc = {
@@ -754,57 +827,12 @@ static struct miscdevice vhost_vsock_misc = {
.fops = &vhost_vsock_fops,
};
-static struct virtio_transport vhost_transport = {
- .transport = {
- .get_local_cid = vhost_transport_get_local_cid,
-
- .init = virtio_transport_do_socket_init,
- .destruct = virtio_transport_destruct,
- .release = virtio_transport_release,
- .connect = virtio_transport_connect,
- .shutdown = virtio_transport_shutdown,
- .cancel_pkt = vhost_transport_cancel_pkt,
-
- .dgram_enqueue = virtio_transport_dgram_enqueue,
- .dgram_dequeue = virtio_transport_dgram_dequeue,
- .dgram_bind = virtio_transport_dgram_bind,
- .dgram_allow = virtio_transport_dgram_allow,
-
- .stream_enqueue = virtio_transport_stream_enqueue,
- .stream_dequeue = virtio_transport_stream_dequeue,
- .stream_has_data = virtio_transport_stream_has_data,
- .stream_has_space = virtio_transport_stream_has_space,
- .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
- .stream_is_active = virtio_transport_stream_is_active,
- .stream_allow = virtio_transport_stream_allow,
-
- .notify_poll_in = virtio_transport_notify_poll_in,
- .notify_poll_out = virtio_transport_notify_poll_out,
- .notify_recv_init = virtio_transport_notify_recv_init,
- .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
- .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
- .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
- .notify_send_init = virtio_transport_notify_send_init,
- .notify_send_pre_block = virtio_transport_notify_send_pre_block,
- .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
- .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
-
- .set_buffer_size = virtio_transport_set_buffer_size,
- .set_min_buffer_size = virtio_transport_set_min_buffer_size,
- .set_max_buffer_size = virtio_transport_set_max_buffer_size,
- .get_buffer_size = virtio_transport_get_buffer_size,
- .get_min_buffer_size = virtio_transport_get_min_buffer_size,
- .get_max_buffer_size = virtio_transport_get_max_buffer_size,
- },
-
- .send_pkt = vhost_transport_send_pkt,
-};
-
static int __init vhost_vsock_init(void)
{
int ret;
- ret = vsock_core_init(&vhost_transport.transport);
+ ret = vsock_core_register(&vhost_transport.transport,
+ VSOCK_TRANSPORT_F_H2G);
if (ret < 0)
return ret;
return misc_register(&vhost_vsock_misc);
@@ -813,7 +841,7 @@ static int __init vhost_vsock_init(void)
static void __exit vhost_vsock_exit(void)
{
misc_deregister(&vhost_vsock_misc);
- vsock_core_exit();
+ vsock_core_unregister(&vhost_transport.transport);
};
module_init(vhost_vsock_init);
OpenPOWER on IntegriCloud