diff options
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 291 |
1 files changed, 179 insertions, 112 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d52dcf0776ea..d67206a7b99a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -33,6 +33,7 @@ #include <asm/page.h> #include <asm/current.h> #include <asm/apicdef.h> +#include <asm/delay.h> #include <linux/atomic.h> #include <linux/jump_label.h> #include "kvm_cache_regs.h" @@ -132,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +/* The logical map is definitely wrong if we have multiple + * modes at the same time. (Physical map is always right.) + */ +static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) +{ + return !(map->mode & (map->mode - 1)); +} + +static inline void +apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) +{ + unsigned lid_bits; + + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); + BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); + lid_bits = map->mode; + + *cid = dest_id >> lid_bits; + *lid = dest_id & ((1 << lid_bits) - 1); +} + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -145,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm) if (!new) goto out; - new->ldr_bits = 8; - /* flat mode is default */ - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - new->broadcast = APIC_BROADCAST; - - kvm_for_each_vcpu(i, vcpu, kvm) { - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!kvm_apic_present(vcpu)) - continue; - - if (apic_x2apic_mode(apic)) { - new->ldr_bits = 32; - new->cid_shift = 16; - new->cid_mask = new->lid_mask = 0xffff; - new->broadcast = X2APIC_BROADCAST; - } else if (kvm_apic_get_reg(apic, APIC_LDR)) { - if (kvm_apic_get_reg(apic, APIC_DFR) == - APIC_DFR_CLUSTER) { - new->cid_shift = 4; - new->cid_mask = 0xf; - new->lid_mask = 0xf; - } else { - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - } - } - - /* - * All APICs have to be configured in the same mode by an OS. - * We take advatage of this while building logical id loockup - * table. After reset APICs are in software disabled mode, so if - * we find apic with different setting we assume this is the mode - * OS wants all apics to be in; build lookup table accordingly. - */ - if (kvm_apic_sw_enabled(apic)) - break; - } - kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; u16 cid, lid; @@ -197,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm) aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); - cid = apic_cluster_id(new, ldr); - lid = apic_logical_id(new, ldr); if (aid < ARRAY_SIZE(new->phys_map)) new->phys_map[aid] = apic; + + if (apic_x2apic_mode(apic)) { + new->mode |= KVM_APIC_MODE_X2APIC; + } else if (ldr) { + ldr = GET_APIC_LOGICAL_ID(ldr); + if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) + new->mode |= KVM_APIC_MODE_XAPIC_FLAT; + else + new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; + } + + if (!kvm_apic_logical_map_valid(new)) + continue; + + apic_logical_id(new, ldr, &cid, &lid); + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } @@ -327,17 +322,24 @@ static u8 count_vectors(void *bitmap) return count; } -void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +void __kvm_apic_update_irr(u32 *pir, void *regs) { u32 i, pir_val; - struct kvm_lapic *apic = vcpu->arch.apic; for (i = 0; i <= 7; i++) { pir_val = xchg(&pir[i], 0); if (pir_val) - *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; + *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; } } +EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); + +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + __kvm_apic_update_irr(pir, apic->regs); +} EXPORT_SYMBOL_GPL(kvm_apic_update_irr); static inline void apic_set_irr(int vec, struct kvm_lapic *apic) @@ -405,7 +407,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + if (unlikely(kvm_x86_ops->hwapic_isr_update)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); else { ++apic->isr_count; @@ -453,7 +455,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + if (unlikely(kvm_x86_ops->hwapic_isr_update)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); else { @@ -580,56 +582,75 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) apic_update_ppr(apic); } -static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) { - return dest == (apic_x2apic_mode(apic) ? - X2APIC_BROADCAST : APIC_BROADCAST); + if (apic_x2apic_mode(apic)) + return mda == X2APIC_BROADCAST; + + return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; } -int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) { - return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); + if (kvm_apic_broadcast(apic, mda)) + return true; + + if (apic_x2apic_mode(apic)) + return mda == kvm_apic_id(apic); + + return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); } -int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) +static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) { - int result = 0; u32 logical_id; if (kvm_apic_broadcast(apic, mda)) - return 1; + return true; - if (apic_x2apic_mode(apic)) { - logical_id = kvm_apic_get_reg(apic, APIC_LDR); - return logical_id & mda; - } + logical_id = kvm_apic_get_reg(apic, APIC_LDR); - logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); + if (apic_x2apic_mode(apic)) + return ((logical_id >> 16) == (mda >> 16)) + && (logical_id & mda & 0xffff) != 0; + + logical_id = GET_APIC_LOGICAL_ID(logical_id); + mda = GET_APIC_DEST_FIELD(mda); switch (kvm_apic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: - if (logical_id & mda) - result = 1; - break; + return (logical_id & mda) != 0; case APIC_DFR_CLUSTER: - if (((logical_id >> 4) == (mda >> 0x4)) - && (logical_id & mda & 0xf)) - result = 1; - break; + return ((logical_id >> 4) == (mda >> 4)) + && (logical_id & mda & 0xf) != 0; default: apic_debug("Bad DFR vcpu %d: %08x\n", apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); - break; + return false; } +} - return result; +/* KVM APIC implementation has two quirks + * - dest always begins at 0 while xAPIC MDA has offset 24, + * - IOxAPIC messages have to be delivered (directly) to x2APIC. + */ +static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, + struct kvm_lapic *target) +{ + bool ipi = source != NULL; + bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); + + if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) + return X2APIC_BROADCAST; + + return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); } -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, +bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode) { - int result = 0; struct kvm_lapic *target = vcpu->arch.apic; + u32 mda = kvm_apic_mda(dest, source, target); apic_debug("target %p, source %p, dest 0x%x, " "dest_mode 0x%x, short_hand 0x%x\n", @@ -638,29 +659,21 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, ASSERT(target); switch (short_hand) { case APIC_DEST_NOSHORT: - if (dest_mode == 0) - /* Physical mode. */ - result = kvm_apic_match_physical_addr(target, dest); + if (dest_mode == APIC_DEST_PHYSICAL) + return kvm_apic_match_physical_addr(target, mda); else - /* Logical mode. */ - result = kvm_apic_match_logical_addr(target, dest); - break; + return kvm_apic_match_logical_addr(target, mda); case APIC_DEST_SELF: - result = (target == source); - break; + return target == source; case APIC_DEST_ALLINC: - result = 1; - break; + return true; case APIC_DEST_ALLBUT: - result = (target != source); - break; + return target != source; default: apic_debug("kvm: apic: Bad dest shorthand value %x\n", short_hand); - break; + return false; } - - return result; } bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, @@ -671,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic **dst; int i; bool ret = false; + bool x2apic_ipi = src && apic_x2apic_mode(src); *r = -1; @@ -682,33 +696,37 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) + return false; + rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); if (!map) goto out; - if (irq->dest_id == map->broadcast) - goto out; - ret = true; - if (irq->dest_mode == 0) { /* physical mode */ + if (irq->dest_mode == APIC_DEST_PHYSICAL) { if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) goto out; dst = &map->phys_map[irq->dest_id]; } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); - u16 cid = apic_cluster_id(map, mda); + u16 cid; + + if (!kvm_apic_logical_map_valid(map)) { + ret = false; + goto out; + } + + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); if (cid >= ARRAY_SIZE(map->logical_map)) goto out; dst = map->logical_map[cid]; - bitmap = apic_logical_id(map, mda); - if (irq->delivery_mode == APIC_DM_LOWEST) { int l = -1; for_each_set_bit(i, &bitmap, 16) { @@ -840,8 +858,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) trigger_mode = IOAPIC_LEVEL_TRIG; @@ -1045,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) addr < apic->base_address + LAPIC_MMIO_LENGTH; } -static int apic_mmio_read(struct kvm_io_device *this, +static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1076,25 +1093,72 @@ static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; wait_queue_head_t *q = &vcpu->wq; + struct kvm_timer *ktimer = &apic->lapic_timer; - /* - * Note: KVM_REQ_PENDING_TIMER is implicitly checked in - * vcpu_enter_guest. - */ if (atomic_read(&apic->lapic_timer.pending)) return; atomic_inc(&apic->lapic_timer.pending); - /* FIXME: this code should not know anything about vcpus */ - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_set_pending_timer(vcpu); if (waitqueue_active(q)) wake_up_interruptible(q); + + if (apic_lvtt_tscdeadline(apic)) + ktimer->expired_tscdeadline = ktimer->tscdeadline; +} + +/* + * On APICv, this test will cause a busy wait + * during a higher-priority task. + */ + +static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + u32 reg = kvm_apic_get_reg(apic, APIC_LVTT); + + if (kvm_apic_hw_enabled(apic)) { + int vec = reg & APIC_VECTOR_MASK; + void *bitmap = apic->regs + APIC_ISR; + + if (kvm_x86_ops->deliver_posted_interrupt) + bitmap = apic->regs + APIC_IRR; + + if (apic_test_vector(vec, bitmap)) + return true; + } + return false; +} + +void wait_lapic_expire(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + u64 guest_tsc, tsc_deadline; + + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (apic->lapic_timer.expired_tscdeadline == 0) + return; + + if (!lapic_timer_int_injected(vcpu)) + return; + + tsc_deadline = apic->lapic_timer.expired_tscdeadline; + apic->lapic_timer.expired_tscdeadline = 0; + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); + + /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ + if (guest_tsc < tsc_deadline) + __delay(tsc_deadline - guest_tsc); } static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now; + atomic_set(&apic->lapic_timer.pending, 0); if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { @@ -1140,6 +1204,7 @@ static void start_apic_timer(struct kvm_lapic *apic) /* lapic timer in tsc deadline mode */ u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; u64 ns = 0; + ktime_t expire; struct kvm_vcpu *vcpu = apic->vcpu; unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; unsigned long flags; @@ -1154,8 +1219,10 @@ static void start_apic_timer(struct kvm_lapic *apic) if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); + expire = ktime_add_ns(now, ns); + expire = ktime_sub_ns(expire, lapic_timer_advance_ns); hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + expire, HRTIMER_MODE_ABS); } else apic_timer_expired(apic); @@ -1315,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) return ret; } -static int apic_mmio_write(struct kvm_io_device *this, +static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1455,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; /* update jump label if enable bit changes */ @@ -1529,7 +1594,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1739,13 +1804,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; if (kvm_x86_ops->hwapic_irr_update) kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); - kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); + if (unlikely(kvm_x86_ops->hwapic_isr_update)) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, + apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_rtc_eoi_tracking_restore_one(vcpu); } |