summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/eventfd.c5
-rw-r--r--virt/kvm/kvm_main.c139
2 files changed, 100 insertions, 44 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2ca4535f4fb7..3656849f78a0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -313,8 +313,9 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
/*
* This rcu_assign_pointer is needed for when
- * another thread calls kvm_irqfd_update before
- * we flush workqueue below.
+ * another thread calls kvm_irq_routing_update before
+ * we flush workqueue below (we synchronize with
+ * kvm_irq_routing_update using irqfds.lock).
* It is paired with synchronize_rcu done by caller
* of that function.
*/
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f29abeb6a912..1fa0d292119a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -69,7 +69,7 @@ MODULE_LICENSE("GPL");
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_SPINLOCK(kvm_lock);
+DEFINE_RAW_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -137,6 +137,14 @@ void vcpu_load(struct kvm_vcpu *vcpu)
int cpu;
mutex_lock(&vcpu->mutex);
+ if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+ /* The thread running this VCPU changed. */
+ struct pid *oldpid = vcpu->pid;
+ struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+ rcu_assign_pointer(vcpu->pid, newpid);
+ synchronize_rcu();
+ put_pid(oldpid);
+ }
cpu = get_cpu();
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
@@ -165,13 +173,16 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
- raw_spin_lock(&kvm->requests_lock);
- me = smp_processor_id();
+ me = get_cpu();
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (kvm_make_check_request(req, vcpu))
- continue;
+ kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
- if (cpus != NULL && cpu != -1 && cpu != me)
+
+ /* Set ->requests bit before we read ->mode */
+ smp_mb();
+
+ if (cpus != NULL && cpu != -1 && cpu != me &&
+ kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
cpumask_set_cpu(cpu, cpus);
}
if (unlikely(cpus == NULL))
@@ -180,7 +191,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
- raw_spin_unlock(&kvm->requests_lock);
+ put_cpu();
free_cpumask_var(cpus);
return called;
}
@@ -209,6 +220,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
+ vcpu->pid = NULL;
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
@@ -233,6 +245,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
{
+ put_pid(vcpu->pid);
kvm_arch_vcpu_uninit(vcpu);
free_page((unsigned long)vcpu->run);
}
@@ -463,15 +476,14 @@ static struct kvm *kvm_create_vm(void)
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
- raw_spin_lock_init(&kvm->requests_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
return kvm;
@@ -544,9 +556,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
@@ -588,6 +600,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
return 0;
}
+#ifndef CONFIG_S390
/*
* Allocation size is twice as large as the actual dirty bitmap size.
* This makes it possible to do double buffering: see x86's
@@ -608,6 +621,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
memslot->dirty_bitmap_head = memslot->dirty_bitmap;
return 0;
}
+#endif /* !CONFIG_S390 */
/*
* Allocate some memory and give it an address in the guest physical address
@@ -621,7 +635,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- int r, flush_shadow = 0;
+ int r;
gfn_t base_gfn;
unsigned long npages;
unsigned long i;
@@ -741,8 +755,6 @@ skip_lpage:
if (kvm_create_dirty_bitmap(&new) < 0)
goto out_free;
/* destroy any largepage mappings for dirty tracking */
- if (old.npages)
- flush_shadow = 1;
}
#else /* not defined CONFIG_S390 */
new.user_alloc = user_alloc;
@@ -813,9 +825,6 @@ skip_lpage:
kvm_free_physmem_slot(&old, &new);
kfree(old_memslots);
- if (flush_shadow)
- kvm_arch_flush_shadow(kvm);
-
return 0;
out_free:
@@ -1029,6 +1038,15 @@ static pfn_t get_fault_pfn(void)
return fault_pfn;
}
+static inline int check_user_page_hwpoison(unsigned long addr)
+{
+ int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
+
+ rc = __get_user_pages(current, current->mm, addr, 1,
+ flags, NULL, NULL, NULL);
+ return rc == -EHWPOISON;
+}
+
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
bool *async, bool write_fault, bool *writable)
{
@@ -1076,7 +1094,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
return get_fault_pfn();
down_read(&current->mm->mmap_sem);
- if (is_hwpoison_address(addr)) {
+ if (check_user_page_hwpoison(addr)) {
up_read(&current->mm->mmap_sem);
get_page(hwpoison_page);
return page_to_pfn(hwpoison_page);
@@ -1466,18 +1484,55 @@ void kvm_resched(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_resched);
-void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
+void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
- ktime_t expires;
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
-
- /* Sleep for 100 us, and hope lock-holder got scheduled */
- expires = ktime_add_ns(ktime_get(), 100000UL);
- schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+ struct kvm *kvm = me->kvm;
+ struct kvm_vcpu *vcpu;
+ int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
+ int yielded = 0;
+ int pass;
+ int i;
- finish_wait(&vcpu->wq, &wait);
+ /*
+ * We boost the priority of a VCPU that is runnable but not
+ * currently running, because it got preempted by something
+ * else and called schedule in __vcpu_run. Hopefully that
+ * VCPU is holding the lock that we need and will release it.
+ * We approximate round-robin by starting at the last boosted VCPU.
+ */
+ for (pass = 0; pass < 2 && !yielded; pass++) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct task_struct *task = NULL;
+ struct pid *pid;
+ if (!pass && i < last_boosted_vcpu) {
+ i = last_boosted_vcpu;
+ continue;
+ } else if (pass && i > last_boosted_vcpu)
+ break;
+ if (vcpu == me)
+ continue;
+ if (waitqueue_active(&vcpu->wq))
+ continue;
+ rcu_read_lock();
+ pid = rcu_dereference(vcpu->pid);
+ if (pid)
+ task = get_pid_task(vcpu->pid, PIDTYPE_PID);
+ rcu_read_unlock();
+ if (!task)
+ continue;
+ if (task->flags & PF_VCPU) {
+ put_task_struct(task);
+ continue;
+ }
+ if (yield_to(task, 1)) {
+ put_task_struct(task);
+ kvm->last_boosted_vcpu = i;
+ yielded = 1;
+ break;
+ }
+ put_task_struct(task);
+ }
+ }
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
@@ -2122,9 +2177,9 @@ static void hardware_enable_nolock(void *junk)
static void hardware_enable(void *junk)
{
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
hardware_enable_nolock(junk);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
}
static void hardware_disable_nolock(void *junk)
@@ -2139,9 +2194,9 @@ static void hardware_disable_nolock(void *junk)
static void hardware_disable(void *junk)
{
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
hardware_disable_nolock(junk);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
}
static void hardware_disable_all_nolock(void)
@@ -2155,16 +2210,16 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
hardware_disable_all_nolock();
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
}
static int hardware_enable_all(void)
{
int r = 0;
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -2177,7 +2232,7 @@ static int hardware_enable_all(void)
}
}
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
return r;
}
@@ -2339,10 +2394,10 @@ static int vm_stat_get(void *_offset, u64 *val)
struct kvm *kvm;
*val = 0;
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
*val += *(u32 *)((void *)kvm + offset);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
return 0;
}
@@ -2356,12 +2411,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
int i;
*val = 0;
- spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
*val += *(u32 *)((void *)vcpu + offset);
- spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_lock);
return 0;
}
@@ -2402,7 +2457,7 @@ static int kvm_suspend(struct sys_device *dev, pm_message_t state)
static int kvm_resume(struct sys_device *dev)
{
if (kvm_usage_count) {
- WARN_ON(spin_is_locked(&kvm_lock));
+ WARN_ON(raw_spin_is_locked(&kvm_lock));
hardware_enable_nolock(NULL);
}
return 0;
OpenPOWER on IntegriCloud