diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 345 |
1 files changed, 275 insertions, 70 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a6eff166fc6..4244c2baf57d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); -struct kvm_x86_ops *kvm_x86_ops; +struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); -static bool ignore_msrs = 0; +static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); unsigned int min_timer_period_us = 500; @@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; module_param(kvmclock_periodic_sync, bool, S_IRUGO); -bool kvm_has_tsc_control; +bool __read_mostly kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); -u32 kvm_max_guest_tsc_khz; +u32 __read_mostly kvm_max_guest_tsc_khz; EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); +u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); +u64 __read_mostly kvm_max_tsc_scaling_ratio; +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); +static u64 __read_mostly kvm_default_tsc_scaling_ratio; /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ -static u32 tsc_tolerance_ppm = 250; +static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int lapic_timer_advance_ns = 0; +unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); -static bool backwards_tsc_observed = false; +static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -946,7 +951,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -961,6 +966,8 @@ static u32 emulated_msrs[] = { HV_X64_MSR_RESET, HV_X64_MSR_VP_INDEX, HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -1162,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) ++version; - kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) + return; /* * The guest calculates current wall clock time by adding @@ -1249,14 +1257,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) return v; } -static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) +{ + u64 ratio; + + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return 0; + } + + /* TSC scaling supported? */ + if (!kvm_has_tsc_control) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + return 0; + } else { + WARN(1, "user requested TSC rate below hardware speed\n"); + return -1; + } + } + + /* TSC scaling required - calculate ratio */ + ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, + user_tsc_khz, tsc_khz); + + if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { + WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return -1; + } + + vcpu->arch.tsc_scaling_ratio = ratio; + return 0; +} + +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) - return; + if (this_tsc_khz == 0) { + /* set tsc_scaling_ratio to a safe value */ + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return -1; + } /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, @@ -1276,7 +1323,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1322,6 +1369,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } +/* + * Multiply tsc by a fixed point number represented by ratio. + * + * The most significant 64-N bits (mult) of ratio represent the + * integral part of the fixed point number; the remaining N bits + * (frac) represent the fractional part, ie. ratio represents a fixed + * point number (mult + frac * 2^(-N)). + * + * N equals to kvm_tsc_scaling_ratio_frac_bits. + */ +static inline u64 __scale_tsc(u64 ratio, u64 tsc) +{ + return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); +} + +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + u64 _tsc = tsc; + u64 ratio = vcpu->arch.tsc_scaling_ratio; + + if (ratio != kvm_default_tsc_scaling_ratio) + _tsc = __scale_tsc(ratio, tsc); + + return _tsc; +} +EXPORT_SYMBOL_GPL(kvm_scale_tsc); + +static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = kvm_scale_tsc(vcpu, rdtsc()); + + return target_tsc - tsc; +} + +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +{ + return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); +} +EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1333,7 +1422,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; @@ -1390,7 +1479,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) } else { u64 delta = nsec_to_cycles(vcpu, elapsed); data += delta; - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; @@ -1447,6 +1536,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) EXPORT_SYMBOL_GPL(kvm_write_tsc); +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) + WARN_ON(adjustment < 0); + adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + #ifdef CONFIG_X86_64 static cycle_t read_tsc(void) @@ -1583,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) #endif } +void kvm_make_mclock_inprogress_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); +} + static void kvm_gen_update_masterclock(struct kvm *kvm) { #ifdef CONFIG_X86_64 @@ -1608,7 +1716,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz; + unsigned long flags, this_tsc_khz, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1645,7 +1753,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = get_kernel_ns(); } - tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); + tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); /* * We may have to catch up the TSC to match elapsed wall clock @@ -1671,7 +1779,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 0; if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, + tgt_tsc_khz = kvm_has_tsc_control ? + vcpu->virtual_tsc_khz : this_tsc_khz; + kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = this_tsc_khz; @@ -2096,6 +2206,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2300,6 +2411,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -2439,6 +2551,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2591,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) return kvm_arch_has_noncoherent_dma(vcpu->kvm); } +static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) +{ + set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Address WBINVD may be executed by guest */ @@ -2617,7 +2735,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { - u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, + u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; @@ -2646,7 +2764,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - kvm_x86_ops->sync_pir_to_irr(vcpu); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -2661,6 +2781,26 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, return 0; } +static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) +{ + return (!lapic_in_kernel(vcpu) || + kvm_apic_accept_pic_intr(vcpu)); +} + +/* + * if userspace requested an interrupt window, check that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) +{ + return kvm_arch_interrupt_allowed(vcpu) && + !kvm_cpu_has_interrupt(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); +} + static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { @@ -2684,6 +2824,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return -EEXIST; vcpu->arch.pending_external_vector = irq->irq; + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -3068,6 +3209,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_HYPERV_SYNIC: + return kvm_hv_activate_synic(vcpu); + default: + return -EINVAL; + } +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3319,9 +3474,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (user_tsc_khz == 0) user_tsc_khz = tsc_khz; - kvm_set_tsc_khz(vcpu, user_tsc_khz); + if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) + r = 0; - r = 0; goto out; } case KVM_GET_TSC_KHZ: { @@ -3332,6 +3487,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_set_guest_paused(vcpu); goto out; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } @@ -3449,9 +3613,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { + int i; mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); - kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -3470,6 +3636,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) { int start = 0; + int i; u32 prev_legacy, cur_legacy; mutex_lock(&kvm->arch.vpit->pit_state.lock); prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; @@ -3479,7 +3646,9 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, sizeof(kvm->arch.vpit->pit_state.channels)); kvm->arch.vpit->pit_state.flags = ps->flags; - kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, + start && i == 0); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -3878,16 +4047,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } @@ -4978,7 +5148,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, int emulation_type) { gpa_t gpa = cr2; - pfn_t pfn; + kvm_pfn_t pfn; if (emulation_type & EMULTYPE_NO_REEXECUTE) return false; @@ -5744,6 +5914,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) +{ + vcpu->arch.apicv_active = false; + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5808,23 +5984,10 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); } -/* - * Check if userspace requested an interrupt window, and that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { - if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm)) - return false; - - if (kvm_cpu_has_interrupt(vcpu)) - return false; - - return (irqchip_split(vcpu->kvm) - ? kvm_apic_accept_pic_intr(vcpu) - : kvm_arch_interrupt_allowed(vcpu)); + return vcpu->run->request_interrupt_window && + likely(!pic_in_kernel(vcpu->kvm)); } static void post_kvm_run_save(struct kvm_vcpu *vcpu) @@ -5835,17 +5998,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (!irqchip_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = - kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu); - else if (!pic_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = - kvm_apic_accept_pic_intr(vcpu) && - !kvm_cpu_has_interrupt(vcpu); - else - kvm_run->ready_for_interrupt_injection = 1; + kvm_run->ready_for_interrupt_injection = + pic_in_kernel(vcpu->kvm) || + kvm_vcpu_ready_for_interrupt_injection(vcpu); } static void update_cr8_intercept(struct kvm_vcpu *vcpu) @@ -5858,6 +6013,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!vcpu->arch.apic) return; + if (vcpu->arch.apicv_active) + return; + if (!vcpu->arch.apic->vapic_addr) max_irr = kvm_lapic_find_highest_irr(vcpu); else @@ -6194,20 +6352,30 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } +void kvm_make_scan_ioapic_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); +} + static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { + u64 eoi_exit_bitmap[4]; + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); + bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); if (irqchip_split(vcpu->kvm)) - kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } - kvm_x86_ops->load_eoi_exitmap(vcpu); + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, + vcpu_to_synic(vcpu)->vec_bitmap, 256); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -6258,8 +6426,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; - bool req_int_win = !lapic_in_kernel(vcpu) && - vcpu->run->request_interrupt_window; + bool req_int_win = + dm_request_for_irq_injection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + bool req_immediate_exit = false; if (vcpu->requests) { @@ -6313,7 +6483,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); if (test_bit(vcpu->arch.pending_ioapic_eoi, - (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->arch.ioapic_handled_vectors)) { vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; vcpu->run->eoi.vector = vcpu->arch.pending_ioapic_eoi; @@ -6337,6 +6507,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv = vcpu->arch.hyperv.exit; + r = 0; + goto out; + } + + /* + * KVM_REQ_HV_STIMER has to be processed after + * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers + * depend on the guest clock being up-to-date + */ + if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) + kvm_hv_process_stimers(vcpu); } /* @@ -6348,7 +6532,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) kvm_x86_ops->hwapic_irr_update(vcpu, kvm_lapic_find_highest_irr(vcpu)); } @@ -6411,6 +6595,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); + trace_kvm_entry(vcpu->vcpu_id); + wait_lapic_expire(vcpu); __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -6423,8 +6609,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } - trace_kvm_entry(vcpu->vcpu_id); - wait_lapic_expire(vcpu); kvm_x86_ops->run(vcpu); /* @@ -6452,8 +6636,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); - vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - rdtsc()); + vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -6562,7 +6745,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); - if (dm_request_for_irq_injection(vcpu)) { + if (dm_request_for_irq_injection(vcpu) && + kvm_vcpu_ready_for_interrupt_injection(vcpu)) { r = 0; vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; ++vcpu->stat.request_irq_exits; @@ -7015,7 +7199,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->update_db_bp_intercept(vcpu); + kvm_x86_ops->update_bp_intercept(vcpu); r = 0; @@ -7364,6 +7548,20 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + if (kvm_has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); + kvm_max_guest_tsc_khz = max; + + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; + } + kvm_init_msr_list(); return 0; } @@ -7405,6 +7603,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) @@ -7462,6 +7661,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pending_external_vector = -1; + kvm_hv_vcpu_init(vcpu); + return 0; fail_free_mce_banks: @@ -7480,6 +7681,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { int idx; + kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); @@ -7874,6 +8076,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) kvm_cpu_has_interrupt(vcpu)) return true; + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + return false; } |