diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/eventfd.c | 5 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 139 |
2 files changed, 100 insertions, 44 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2ca4535f4fb7..3656849f78a0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -313,8 +313,9 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { /* * This rcu_assign_pointer is needed for when - * another thread calls kvm_irqfd_update before - * we flush workqueue below. + * another thread calls kvm_irq_routing_update before + * we flush workqueue below (we synchronize with + * kvm_irq_routing_update using irqfds.lock). * It is paired with synchronize_rcu done by caller * of that function. */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f29abeb6a912..1fa0d292119a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -69,7 +69,7 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_RAW_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -137,6 +137,14 @@ void vcpu_load(struct kvm_vcpu *vcpu) int cpu; mutex_lock(&vcpu->mutex); + if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { + /* The thread running this VCPU changed. */ + struct pid *oldpid = vcpu->pid; + struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); + synchronize_rcu(); + put_pid(oldpid); + } cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); @@ -165,13 +173,16 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) zalloc_cpumask_var(&cpus, GFP_ATOMIC); - raw_spin_lock(&kvm->requests_lock); - me = smp_processor_id(); + me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (kvm_make_check_request(req, vcpu)) - continue; + kvm_make_request(req, vcpu); cpu = vcpu->cpu; - if (cpus != NULL && cpu != -1 && cpu != me) + + /* Set ->requests bit before we read ->mode */ + smp_mb(); + + if (cpus != NULL && cpu != -1 && cpu != me && + kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) cpumask_set_cpu(cpu, cpus); } if (unlikely(cpus == NULL)) @@ -180,7 +191,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) smp_call_function_many(cpus, ack_flush, NULL, 1); else called = false; - raw_spin_unlock(&kvm->requests_lock); + put_cpu(); free_cpumask_var(cpus); return called; } @@ -209,6 +220,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->vcpu_id = id; + vcpu->pid = NULL; init_waitqueue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); @@ -233,6 +245,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { + put_pid(vcpu->pid); kvm_arch_vcpu_uninit(vcpu); free_page((unsigned long)vcpu->run); } @@ -463,15 +476,14 @@ static struct kvm *kvm_create_vm(void) kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); spin_lock_init(&kvm->mmu_lock); - raw_spin_lock_init(&kvm->requests_lock); kvm_eventfd_init(kvm); mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); return kvm; @@ -544,9 +556,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -588,6 +600,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) return 0; } +#ifndef CONFIG_S390 /* * Allocation size is twice as large as the actual dirty bitmap size. * This makes it possible to do double buffering: see x86's @@ -608,6 +621,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) memslot->dirty_bitmap_head = memslot->dirty_bitmap; return 0; } +#endif /* !CONFIG_S390 */ /* * Allocate some memory and give it an address in the guest physical address @@ -621,7 +635,7 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - int r, flush_shadow = 0; + int r; gfn_t base_gfn; unsigned long npages; unsigned long i; @@ -741,8 +755,6 @@ skip_lpage: if (kvm_create_dirty_bitmap(&new) < 0) goto out_free; /* destroy any largepage mappings for dirty tracking */ - if (old.npages) - flush_shadow = 1; } #else /* not defined CONFIG_S390 */ new.user_alloc = user_alloc; @@ -813,9 +825,6 @@ skip_lpage: kvm_free_physmem_slot(&old, &new); kfree(old_memslots); - if (flush_shadow) - kvm_arch_flush_shadow(kvm); - return 0; out_free: @@ -1029,6 +1038,15 @@ static pfn_t get_fault_pfn(void) return fault_pfn; } +static inline int check_user_page_hwpoison(unsigned long addr) +{ + int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; + + rc = __get_user_pages(current, current->mm, addr, 1, + flags, NULL, NULL, NULL); + return rc == -EHWPOISON; +} + static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, bool *async, bool write_fault, bool *writable) { @@ -1076,7 +1094,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return get_fault_pfn(); down_read(¤t->mm->mmap_sem); - if (is_hwpoison_address(addr)) { + if (check_user_page_hwpoison(addr)) { up_read(¤t->mm->mmap_sem); get_page(hwpoison_page); return page_to_pfn(hwpoison_page); @@ -1466,18 +1484,55 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) +void kvm_vcpu_on_spin(struct kvm_vcpu *me) { - ktime_t expires; - DEFINE_WAIT(wait); - - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - - /* Sleep for 100 us, and hope lock-holder got scheduled */ - expires = ktime_add_ns(ktime_get(), 100000UL); - schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + struct kvm *kvm = me->kvm; + struct kvm_vcpu *vcpu; + int last_boosted_vcpu = me->kvm->last_boosted_vcpu; + int yielded = 0; + int pass; + int i; - finish_wait(&vcpu->wq, &wait); + /* + * We boost the priority of a VCPU that is runnable but not + * currently running, because it got preempted by something + * else and called schedule in __vcpu_run. Hopefully that + * VCPU is holding the lock that we need and will release it. + * We approximate round-robin by starting at the last boosted VCPU. + */ + for (pass = 0; pass < 2 && !yielded; pass++) { + kvm_for_each_vcpu(i, vcpu, kvm) { + struct task_struct *task = NULL; + struct pid *pid; + if (!pass && i < last_boosted_vcpu) { + i = last_boosted_vcpu; + continue; + } else if (pass && i > last_boosted_vcpu) + break; + if (vcpu == me) + continue; + if (waitqueue_active(&vcpu->wq)) + continue; + rcu_read_lock(); + pid = rcu_dereference(vcpu->pid); + if (pid) + task = get_pid_task(vcpu->pid, PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + continue; + if (task->flags & PF_VCPU) { + put_task_struct(task); + continue; + } + if (yield_to(task, 1)) { + put_task_struct(task); + kvm->last_boosted_vcpu = i; + yielded = 1; + break; + } + put_task_struct(task); + } + } } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); @@ -2122,9 +2177,9 @@ static void hardware_enable_nolock(void *junk) static void hardware_enable(void *junk) { - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); hardware_enable_nolock(junk); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); } static void hardware_disable_nolock(void *junk) @@ -2139,9 +2194,9 @@ static void hardware_disable_nolock(void *junk) static void hardware_disable(void *junk) { - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); hardware_disable_nolock(junk); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); } static void hardware_disable_all_nolock(void) @@ -2155,16 +2210,16 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); hardware_disable_all_nolock(); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); } static int hardware_enable_all(void) { int r = 0; - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -2177,7 +2232,7 @@ static int hardware_enable_all(void) } } - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); return r; } @@ -2339,10 +2394,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); return 0; } @@ -2356,12 +2411,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - spin_lock(&kvm_lock); + raw_spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_lock); return 0; } @@ -2402,7 +2457,7 @@ static int kvm_suspend(struct sys_device *dev, pm_message_t state) static int kvm_resume(struct sys_device *dev) { if (kvm_usage_count) { - WARN_ON(spin_is_locked(&kvm_lock)); + WARN_ON(raw_spin_is_locked(&kvm_lock)); hardware_enable_nolock(NULL); } return 0; |