diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 245 |
1 files changed, 182 insertions, 63 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 37af23052470..81dfc73d3df3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu) put_cpu(); return 0; } +EXPORT_SYMBOL_GPL(vcpu_load); void vcpu_put(struct kvm_vcpu *vcpu) { @@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu) preempt_enable(); mutex_unlock(&vcpu->mutex); } +EXPORT_SYMBOL_GPL(vcpu_put); static void ack_flush(void *_completed) { @@ -557,9 +559,11 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) debugfs_remove_recursive(kvm->debugfs_dentry); - for (i = 0; i < kvm_debugfs_num_entries; i++) - kfree(kvm->debugfs_stat_data[i]); - kfree(kvm->debugfs_stat_data); + if (kvm->debugfs_stat_data) { + for (i = 0; i < kvm_debugfs_num_entries; i++) + kfree(kvm->debugfs_stat_data[i]); + kfree(kvm->debugfs_stat_data); + } } static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) @@ -694,6 +698,11 @@ static void kvm_destroy_devices(struct kvm *kvm) { struct kvm_device *dev, *tmp; + /* + * We do not need to take the kvm->lock here, because nobody else + * has a reference to the struct kvm at this point and therefore + * cannot access the devices list anyhow. + */ list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { list_del(&dev->vm_node); dev->ops->destroy(dev); @@ -1442,6 +1451,52 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, kvm_pfn_t *p_pfn) +{ + unsigned long pfn; + int r; + + r = follow_pfn(vma, addr, &pfn); + if (r) { + /* + * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does + * not call the fault handler, so do it here. + */ + bool unlocked = false; + r = fixup_user_fault(current, current->mm, addr, + (write_fault ? FAULT_FLAG_WRITE : 0), + &unlocked); + if (unlocked) + return -EAGAIN; + if (r) + return r; + + r = follow_pfn(vma, addr, &pfn); + if (r) + return r; + + } + + + /* + * Get a reference here because callers of *hva_to_pfn* and + * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the + * returned pfn. This is only needed if the VMA has VM_MIXEDMAP + * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will + * simply do nothing for reserved pfns. + * + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. + */ + kvm_get_pfn(pfn); + + *p_pfn = pfn; + return 0; +} + /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1461,7 +1516,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, { struct vm_area_struct *vma; kvm_pfn_t pfn = 0; - int npages; + int npages, r; /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); @@ -1483,14 +1538,17 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, goto exit; } +retry: vma = find_vma_intersection(current->mm, addr, addr + 1); if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; - else if ((vma->vm_flags & VM_PFNMAP)) { - pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - BUG_ON(!kvm_is_reserved_pfn(pfn)); + else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { + r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + if (r == -EAGAIN) + goto retry; + if (r < 0) + pfn = KVM_PFN_ERR_FAULT; } else { if (async && vma_is_valid(vma, write_fault)) *async = true; @@ -2313,6 +2371,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; + debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_put_kvm(vcpu->kvm); return 0; } @@ -2335,6 +2394,32 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } +static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + char dir_name[ITOA_MAX_LEN * 2]; + int ret; + + if (!kvm_arch_has_vcpu_debugfs()) + return 0; + + if (!debugfs_initialized()) + return 0; + + snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); + vcpu->debugfs_dentry = debugfs_create_dir(dir_name, + vcpu->kvm->debugfs_dentry); + if (!vcpu->debugfs_dentry) + return -ENOMEM; + + ret = kvm_arch_create_vcpu_debugfs(vcpu); + if (ret < 0) { + debugfs_remove_recursive(vcpu->debugfs_dentry); + return ret; + } + + return 0; +} + /* * Creates some virtual cpus. Good luck creating more than one. */ @@ -2346,9 +2431,20 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (id >= KVM_MAX_VCPU_ID) return -EINVAL; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus == KVM_MAX_VCPUS) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + kvm->created_vcpus++; + mutex_unlock(&kvm->lock); + vcpu = kvm_arch_vcpu_create(kvm, id); - if (IS_ERR(vcpu)) - return PTR_ERR(vcpu); + if (IS_ERR(vcpu)) { + r = PTR_ERR(vcpu); + goto vcpu_decrement; + } preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); @@ -2356,15 +2452,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_destroy; + r = kvm_create_vcpu_debugfs(vcpu); + if (r) + goto vcpu_destroy; + mutex_lock(&kvm->lock); - if (!kvm_vcpu_compatible(vcpu)) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } - if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; @@ -2395,8 +2487,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) unlock_vcpu_destroy: mutex_unlock(&kvm->lock); + debugfs_remove_recursive(vcpu->debugfs_dentry); vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); +vcpu_decrement: + mutex_lock(&kvm->lock); + kvm->created_vcpus--; + mutex_unlock(&kvm->lock); return r; } @@ -2774,19 +2871,28 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->ops = ops; dev->kvm = kvm; + mutex_lock(&kvm->lock); ret = ops->create(dev, cd->type); if (ret < 0) { + mutex_unlock(&kvm->lock); kfree(dev); return ret; } + list_add(&dev->vm_node, &kvm->devices); + mutex_unlock(&kvm->lock); + + if (ops->init) + ops->init(dev); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { ops->destroy(dev); + mutex_lock(&kvm->lock); + list_del(&dev->vm_node); + mutex_unlock(&kvm->lock); return ret; } - list_add(&dev->vm_node, &kvm->devices); kvm_get_kvm(kvm); cd->fd = ret; return 0; @@ -2935,25 +3041,27 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_SET_GSI_ROUTING: { struct kvm_irq_routing routing; struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; + struct kvm_irq_routing_entry *entries = NULL; r = -EFAULT; if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) + if (routing.nr > KVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; + if (routing.nr) { + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + } r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); out_free_irq_routing: @@ -3046,6 +3154,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) { int r; struct kvm *kvm; + struct file *file; kvm = kvm_create_vm(type); if (IS_ERR(kvm)) @@ -3057,17 +3166,25 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } #endif - r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); + r = get_unused_fd_flags(O_CLOEXEC); if (r < 0) { kvm_put_kvm(kvm); return r; } + file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); + if (IS_ERR(file)) { + put_unused_fd(r); + kvm_put_kvm(kvm); + return PTR_ERR(file); + } if (kvm_create_vm_debugfs(kvm, r) < 0) { - kvm_put_kvm(kvm); + put_unused_fd(r); + fput(file); return -ENOMEM; } + fd_install(r, file); return r; } @@ -3142,12 +3259,13 @@ static void hardware_enable_nolock(void *junk) } } -static void hardware_enable(void) +static int kvm_starting_cpu(unsigned int cpu) { raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_enable_nolock(NULL); raw_spin_unlock(&kvm_count_lock); + return 0; } static void hardware_disable_nolock(void *junk) @@ -3160,12 +3278,13 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(); } -static void hardware_disable(void) +static int kvm_dying_cpu(unsigned int cpu) { raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); raw_spin_unlock(&kvm_count_lock); + return 0; } static void hardware_disable_all_nolock(void) @@ -3206,21 +3325,6 @@ static int hardware_enable_all(void) return r; } -static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, - void *v) -{ - val &= ~CPU_TASKS_FROZEN; - switch (val) { - case CPU_DYING: - hardware_disable(); - break; - case CPU_STARTING: - hardware_enable(); - break; - } - return NOTIFY_OK; -} - static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -3487,9 +3591,29 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, return r; } -static struct notifier_block kvm_cpu_notifier = { - .notifier_call = kvm_cpu_hotplug, -}; +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr) +{ + struct kvm_io_bus *bus; + int dev_idx, srcu_idx; + struct kvm_io_device *iodev = NULL; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) + goto out_unlock; + + iodev = bus->range[dev_idx].dev; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return iodev; +} +EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); static int kvm_debugfs_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), @@ -3529,7 +3653,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val) { struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; - *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset); + *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset); return 0; } @@ -3559,7 +3683,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val) *val = 0; kvm_for_each_vcpu(i, vcpu, stat_data->kvm) - *val += *(u32 *)((void *)vcpu + stat_data->offset); + *val += *(u64 *)((void *)vcpu + stat_data->offset); return 0; } @@ -3717,12 +3841,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, * kvm_arch_init makes sure there's at most one caller * for architectures that support multiple implementations, * like intel and amd on x86. - * kvm_arch_init must be called before kvm_irqfd_init to avoid creating - * conflicts in case kvm is already setup for another implementation. */ - r = kvm_irqfd_init(); - if (r) - goto out_irqfd; if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; @@ -3741,7 +3860,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_1; } - r = register_cpu_notifier(&kvm_cpu_notifier); + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING", + kvm_starting_cpu, kvm_dying_cpu); if (r) goto out_free_2; register_reboot_notifier(&kvm_reboot_notifier); @@ -3795,7 +3915,7 @@ out_free: kmem_cache_destroy(kvm_vcpu_cache); out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); - unregister_cpu_notifier(&kvm_cpu_notifier); + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); out_free_2: out_free_1: kvm_arch_hardware_unsetup(); @@ -3803,7 +3923,6 @@ out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: kvm_irqfd_exit(); -out_irqfd: kvm_arch_exit(); out_fail: return r; @@ -3818,7 +3937,7 @@ void kvm_exit(void) kvm_async_pf_deinit(); unregister_syscore_ops(&kvm_syscore_ops); unregister_reboot_notifier(&kvm_reboot_notifier); - unregister_cpu_notifier(&kvm_cpu_notifier); + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); |