From 394457a928e0f7ff121c375966f5ec1980dabc09 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 3 Oct 2014 00:30:52 +0300 Subject: KVM: x86: some apic broadcast modes does not work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM does not deliver x2APIC broadcast messages with physical mode. Intel SDM (10.12.9 ICR Operation in x2APIC Mode) states: "A destination ID value of FFFF_FFFFH is used for broadcast of interrupts in both logical destination and physical destination modes." In addition, the local-apic enables cluster mode broadcast. As Intel SDM 10.6.2.2 says: "Broadcast to all local APICs is achieved by setting all destination bits to one." This patch enables cluster mode broadcast. The fix tries to combine broadcast in different modes through a unified code. One rare case occurs when the source of IPI has its APIC disabled. In such case, the source can still issue IPIs, but since the source is not obliged to have the same LAPIC mode as the enabled ones, we cannot rely on it. Since it is a rare case, it is unoptimized and done on the slow-path. Signed-off-by: Nadav Amit Reviewed-by: Radim Krčmář Reviewed-by: Wanpeng Li [As per Radim's review, use unsigned int for X2APIC_BROADCAST, return bool from kvm_apic_broadcast. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index e23b70634f1e..31725a3a93b8 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -83,7 +83,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode); + int short_hand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); -- cgit v1.2.1 From 02d5d55b7e39b63feb71bc49a75f58d342527d16 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 27 Oct 2014 16:22:56 +0100 Subject: KVM: trivial fix comment regarding __kvm_set_memory_region commit 72dc67a69690 ("KVM: remove the usage of the mmap_sem for the protection of the memory slots.") changed the lock which will be taken. This should be reflected in the function commentary. Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25ffac9e947d..3a31ec6e396b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -760,7 +760,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * * Discontiguous memory is allowed, mostly for framebuffers. * - * Must be called holding mmap_sem for write. + * Must be called holding kvm->slots_lock for write. */ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) -- cgit v1.2.1 From 063584d44377ebde5ebc6e99cedc1bc6561939d7 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 13 Nov 2014 23:00:13 +0000 Subject: kvm: memslots: replace heap sort with an insertion sort pass memslots is a sorted array. When a slot is changed, heapsort (lib/sort.c) would take O(n log n) time to update it; an optimized insertion sort will only cost O(n) on an array with just one item out of order. Replace sort() with a custom sort that takes advantage of memslots usage pattern and the known position of the changed slot. performance change of 128 memslots insertions with gradually increasing size (the worst case): heap sort custom sort max: 249747 2500 cycles with custom sort alg taking ~98% less then original update time. Signed-off-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 56 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a31ec6e396b..c0c2202e6c4f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -668,31 +668,37 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return 0; } -static int cmp_memslot(const void *slot1, const void *slot2) -{ - struct kvm_memory_slot *s1, *s2; - - s1 = (struct kvm_memory_slot *)slot1; - s2 = (struct kvm_memory_slot *)slot2; - - if (s1->npages < s2->npages) - return 1; - if (s1->npages > s2->npages) - return -1; - - return 0; -} - /* - * Sort the memslots base on its size, so the larger slots - * will get better fit. + * Insert memslot and re-sort memslots based on their size, + * so the larger slots will get better fit. Sorting algorithm + * takes advantage of having initially sorted array and + * known changed memslot position. */ -static void sort_memslots(struct kvm_memslots *slots) +static void insert_memslot(struct kvm_memslots *slots, + struct kvm_memory_slot *new) { - int i; + int i = slots->id_to_index[new->id]; + struct kvm_memory_slot *old = id_to_memslot(slots, new->id); + struct kvm_memory_slot *mslots = slots->memslots; - sort(slots->memslots, KVM_MEM_SLOTS_NUM, - sizeof(struct kvm_memory_slot), cmp_memslot, NULL); + if (new->npages == old->npages) { + *old = *new; + return; + } + + while (1) { + if (i < (KVM_MEM_SLOTS_NUM - 1) && + new->npages < mslots[i + 1].npages) { + mslots[i] = mslots[i + 1]; + i++; + } else if (i > 0 && new->npages > mslots[i - 1].npages) { + mslots[i] = mslots[i - 1]; + i--; + } else { + mslots[i] = *new; + break; + } + } for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) slots->id_to_index[slots->memslots[i].id] = i; @@ -702,13 +708,7 @@ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) { if (new) { - int id = new->id; - struct kvm_memory_slot *old = id_to_memslot(slots, id); - unsigned long npages = old->npages; - - *old = *new; - if (new->npages != npages) - sort_memslots(slots); + insert_memslot(slots, new); } } -- cgit v1.2.1 From 8593176c677226ead5b4effbd667dd19d5cba5ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Nov 2014 10:22:07 +0100 Subject: kvm: memslots: track id_to_index changes during the insertion sort This completes the optimization from the previous patch, by removing the KVM_MEM_SLOTS_NUM-iteration loop from insert_memslot. Reviewed-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c0c2202e6c4f..eae07f97ecf7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -677,31 +677,31 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) static void insert_memslot(struct kvm_memslots *slots, struct kvm_memory_slot *new) { - int i = slots->id_to_index[new->id]; - struct kvm_memory_slot *old = id_to_memslot(slots, new->id); + int id = new->id; + int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; - if (new->npages == old->npages) { - *old = *new; - return; - } - - while (1) { - if (i < (KVM_MEM_SLOTS_NUM - 1) && - new->npages < mslots[i + 1].npages) { - mslots[i] = mslots[i + 1]; - i++; - } else if (i > 0 && new->npages > mslots[i - 1].npages) { - mslots[i] = mslots[i - 1]; - i--; + WARN_ON(mslots[i].id != id); + if (new->npages != mslots[i].npages) { + if (new->npages < mslots[i].npages) { + while (i < KVM_MEM_SLOTS_NUM - 1 && + new->npages < mslots[i + 1].npages) { + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + i++; + } } else { - mslots[i] = *new; - break; + while (i > 0 && + new->npages > mslots[i - 1].npages) { + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + i--; + } } } - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - slots->id_to_index[slots->memslots[i].id] = i; + mslots[i] = *new; + slots->id_to_index[mslots[i].id] = i; } static void update_memslots(struct kvm_memslots *slots, -- cgit v1.2.1 From f2a81036516e2b97c07c49dd6d51d36bfa43593d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Nov 2014 10:46:45 +0100 Subject: kvm: commonize allocation of the new memory slots The two kmemdup invocations can be unified. I find that the new placement of the comment makes it easier to see what happens. Reviewed-by: Igor Mammedov Reviewed-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eae07f97ecf7..7fc77c0f98fb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -866,11 +866,12 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); + if (!slots) + goto out_free; + if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); - if (!slots) - goto out_free; slot = id_to_memslot(slots, mem->slot); slot->flags |= KVM_MEMSLOT_INVALID; @@ -886,6 +887,12 @@ int __kvm_set_memory_region(struct kvm *kvm, * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); + + /* + * We can re-use the old_memslots from above, the only difference + * from the currently installed memslots is the invalid flag. This + * will get overwritten by update_memslots anyway. + */ slots = old_memslots; } @@ -893,19 +900,6 @@ int __kvm_set_memory_region(struct kvm *kvm, if (r) goto out_slots; - r = -ENOMEM; - /* - * We can re-use the old_memslots from above, the only difference - * from the currently installed memslots is the invalid flag. This - * will get overwritten by update_memslots anyway. - */ - if (!slots) { - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); - if (!slots) - goto out_free; - } - /* actual memory is freed via old in kvm_free_physmem_slot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; -- cgit v1.2.1 From 5cc150279936a618187c42966a8a2f09177ac80a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Nov 2014 10:55:31 +0100 Subject: kvm: simplify update_memslots invocation The update_memslots invocation is only needed in one case. Make the code clearer by moving it to __kvm_set_memory_region, and removing the wrapper around insert_memslot. Reviewed-by: Igor Mammedov Reviewed-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7fc77c0f98fb..751ece6a595c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -674,8 +674,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) * takes advantage of having initially sorted array and * known changed memslot position. */ -static void insert_memslot(struct kvm_memslots *slots, - struct kvm_memory_slot *new) +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new) { int id = new->id; int i = slots->id_to_index[id]; @@ -704,14 +704,6 @@ static void insert_memslot(struct kvm_memslots *slots, slots->id_to_index[mslots[i].id] = i; } -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new) -{ - if (new) { - insert_memslot(slots, new); - } -} - static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; @@ -727,7 +719,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) } static struct kvm_memslots *install_new_memslots(struct kvm *kvm, - struct kvm_memslots *slots, struct kvm_memory_slot *new) + struct kvm_memslots *slots) { struct kvm_memslots *old_memslots = kvm->memslots; @@ -738,7 +730,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, WARN_ON(old_memslots->generation & 1); slots->generation = old_memslots->generation + 1; - update_memslots(slots, new); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); @@ -875,7 +866,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slot = id_to_memslot(slots, mem->slot); slot->flags |= KVM_MEMSLOT_INVALID; - old_memslots = install_new_memslots(kvm, slots, NULL); + old_memslots = install_new_memslots(kvm, slots); /* slot was deleted or moved, clear iommu mapping */ kvm_iommu_unmap_pages(kvm, &old); @@ -906,7 +897,8 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(&new.arch, 0, sizeof(new.arch)); } - old_memslots = install_new_memslots(kvm, slots, &new); + update_memslots(slots, &new); + old_memslots = install_new_memslots(kvm, slots); kvm_arch_commit_memory_region(kvm, mem, &old, change); -- cgit v1.2.1 From 003f7de6258900e17f6206e8e417d76c75ca549f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 19 Nov 2014 16:22:52 +0100 Subject: KVM: ia64: remove KVM for ia64 has been marked as broken not just once, but twice even, and the last patch from the maintainer is now roughly 5 years old. Time for it to rest in peace. Acked-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- virt/kvm/ioapic.c | 5 ----- virt/kvm/ioapic.h | 1 - virt/kvm/irq_comm.c | 22 ---------------------- 3 files changed, 28 deletions(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 0ba4057d271b..f0f7ef82b7a6 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -586,11 +586,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, case IOAPIC_REG_WINDOW: ioapic_write_indirect(ioapic, data); break; -#ifdef CONFIG_IA64 - case IOAPIC_REG_EOI: - __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); - break; -#endif default: break; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 31725a3a93b8..dc3baa3a538f 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -19,7 +19,6 @@ struct kvm_vcpu; /* Direct registers. */ #define IOAPIC_REG_SELECT 0x00 #define IOAPIC_REG_WINDOW 0x10 -#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */ /* Indirect registers. */ #define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 963b8995a9e8..1345bde064f5 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -26,9 +26,6 @@ #include #include -#ifdef CONFIG_IA64 -#include -#endif #include "irq.h" @@ -57,12 +54,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) { -#ifdef CONFIG_IA64 - return irq->delivery_mode == - (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); -#else return irq->delivery_mode == APIC_DM_LOWEST; -#endif } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, @@ -346,20 +338,6 @@ static const struct kvm_irq_routing_entry default_routing[] = { ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), -#ifdef CONFIG_IA64 - ROUTING_ENTRY1(24), ROUTING_ENTRY1(25), - ROUTING_ENTRY1(26), ROUTING_ENTRY1(27), - ROUTING_ENTRY1(28), ROUTING_ENTRY1(29), - ROUTING_ENTRY1(30), ROUTING_ENTRY1(31), - ROUTING_ENTRY1(32), ROUTING_ENTRY1(33), - ROUTING_ENTRY1(34), ROUTING_ENTRY1(35), - ROUTING_ENTRY1(36), ROUTING_ENTRY1(37), - ROUTING_ENTRY1(38), ROUTING_ENTRY1(39), - ROUTING_ENTRY1(40), ROUTING_ENTRY1(41), - ROUTING_ENTRY1(42), ROUTING_ENTRY1(43), - ROUTING_ENTRY1(44), ROUTING_ENTRY1(45), - ROUTING_ENTRY1(46), ROUTING_ENTRY1(47), -#endif }; int kvm_setup_default_irq_routing(struct kvm *kvm) -- cgit v1.2.1 From 6ef768fac9dfe3404d3fdc09909ea203a88f2f38 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Nov 2014 13:45:31 +0100 Subject: kvm: x86: move ioapic.c and irq_comm.c back to arch/x86/ ia64 does not need them anymore. Ack notifiers become x86-specific too. Suggested-by: Gleb Natapov Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 7 - virt/kvm/ioapic.c | 682 ---------------------------------------------------- virt/kvm/ioapic.h | 103 -------- virt/kvm/irq_comm.c | 347 -------------------------- virt/kvm/kvm_main.c | 3 - 5 files changed, 1142 deletions(-) delete mode 100644 virt/kvm/ioapic.c delete mode 100644 virt/kvm/ioapic.h delete mode 100644 virt/kvm/irq_comm.c (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b0fb390943c6..148b2392c762 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,9 +36,6 @@ #include #include -#ifdef __KVM_HAVE_IOAPIC -#include "ioapic.h" -#endif #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD @@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); -#endif } void kvm_unregister_irq_ack_notifier(struct kvm *kvm, @@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); -#endif } #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c deleted file mode 100644 index f0f7ef82b7a6..000000000000 --- a/virt/kvm/ioapic.c +++ /dev/null @@ -1,682 +0,0 @@ -/* - * Copyright (C) 2001 MandrakeSoft S.A. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * MandrakeSoft S.A. - * 43, rue d'Aboukir - * 75002 Paris - France - * http://www.linux-mandrake.com/ - * http://www.mandrakesoft.com/ - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Yunhong Jiang - * Yaozu (Eddie) Dong - * Based on Xen 3.1 code. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ioapic.h" -#include "lapic.h" -#include "irq.h" - -#if 0 -#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) -#else -#define ioapic_debug(fmt, arg...) -#endif -static int ioapic_service(struct kvm_ioapic *vioapic, int irq, - bool line_status); - -static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, - unsigned long addr, - unsigned long length) -{ - unsigned long result = 0; - - switch (ioapic->ioregsel) { - case IOAPIC_REG_VERSION: - result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) - | (IOAPIC_VERSION_ID & 0xff)); - break; - - case IOAPIC_REG_APIC_ID: - case IOAPIC_REG_ARB_ID: - result = ((ioapic->id & 0xf) << 24); - break; - - default: - { - u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; - - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; - - result = (ioapic->ioregsel & 0x1) ? - (redir_content >> 32) & 0xffffffff : - redir_content & 0xffffffff; - break; - } - } - - return result; -} - -static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) -{ - ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); -} - -static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); - -static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) -{ - if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) - kvm_rtc_eoi_tracking_restore_all(ioapic); -} - -static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) -{ - bool new_val, old_val; - struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - union kvm_ioapic_redirect_entry *e; - - e = &ioapic->redirtbl[RTC_GSI]; - if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, - e->fields.dest_mode)) - return; - - new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); - - if (new_val == old_val) - return; - - if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); - ioapic->rtc_status.pending_eoi++; - } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); - ioapic->rtc_status.pending_eoi--; - rtc_status_pending_eoi_check_valid(ioapic); - } -} - -void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) -{ - struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - - spin_lock(&ioapic->lock); - __rtc_irq_eoi_tracking_restore_one(vcpu); - spin_unlock(&ioapic->lock); -} - -static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) -{ - struct kvm_vcpu *vcpu; - int i; - - if (RTC_GSI >= IOAPIC_NUM_PINS) - return; - - rtc_irq_eoi_tracking_reset(ioapic); - kvm_for_each_vcpu(i, vcpu, ioapic->kvm) - __rtc_irq_eoi_tracking_restore_one(vcpu); -} - -static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) -{ - if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { - --ioapic->rtc_status.pending_eoi; - rtc_status_pending_eoi_check_valid(ioapic); - } -} - -static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) -{ - if (ioapic->rtc_status.pending_eoi > 0) - return true; /* coalesced */ - - return false; -} - -static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, - int irq_level, bool line_status) -{ - union kvm_ioapic_redirect_entry entry; - u32 mask = 1 << irq; - u32 old_irr; - int edge, ret; - - entry = ioapic->redirtbl[irq]; - edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); - - if (!irq_level) { - ioapic->irr &= ~mask; - ret = 1; - goto out; - } - - /* - * Return 0 for coalesced interrupts; for edge-triggered interrupts, - * this only happens if a previous edge has not been delivered due - * do masking. For level interrupts, the remote_irr field tells - * us if the interrupt is waiting for an EOI. - * - * RTC is special: it is edge-triggered, but userspace likes to know - * if it has been already ack-ed via EOI because coalesced RTC - * interrupts lead to time drift in Windows guests. So we track - * EOI manually for the RTC interrupt. - */ - if (irq == RTC_GSI && line_status && - rtc_irq_check_coalesced(ioapic)) { - ret = 0; - goto out; - } - - old_irr = ioapic->irr; - ioapic->irr |= mask; - if ((edge && old_irr == ioapic->irr) || - (!edge && entry.fields.remote_irr)) { - ret = 0; - goto out; - } - - ret = ioapic_service(ioapic, irq, line_status); - -out: - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); - return ret; -} - -static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) -{ - u32 idx; - - rtc_irq_eoi_tracking_reset(ioapic); - for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) - ioapic_set_irq(ioapic, idx, 1, true); - - kvm_rtc_eoi_tracking_restore_all(ioapic); -} - - -static void update_handled_vectors(struct kvm_ioapic *ioapic) -{ - DECLARE_BITMAP(handled_vectors, 256); - int i; - - memset(handled_vectors, 0, sizeof(handled_vectors)); - for (i = 0; i < IOAPIC_NUM_PINS; ++i) - __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); - memcpy(ioapic->handled_vectors, handled_vectors, - sizeof(handled_vectors)); - smp_wmb(); -} - -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, - u32 *tmr) -{ - struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - union kvm_ioapic_redirect_entry *e; - int index; - - spin_lock(&ioapic->lock); - for (index = 0; index < IOAPIC_NUM_PINS; index++) { - e = &ioapic->redirtbl[index]; - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || - kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || - index == RTC_GSI) { - if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode)) { - __set_bit(e->fields.vector, - (unsigned long *)eoi_exit_bitmap); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) - __set_bit(e->fields.vector, - (unsigned long *)tmr); - } - } - } - spin_unlock(&ioapic->lock); -} - -#ifdef CONFIG_X86 -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - - if (!ioapic) - return; - kvm_make_scan_ioapic_request(kvm); -} -#else -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) -{ - return; -} -#endif - -static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) -{ - unsigned index; - bool mask_before, mask_after; - union kvm_ioapic_redirect_entry *e; - - switch (ioapic->ioregsel) { - case IOAPIC_REG_VERSION: - /* Writes are ignored. */ - break; - - case IOAPIC_REG_APIC_ID: - ioapic->id = (val >> 24) & 0xf; - break; - - case IOAPIC_REG_ARB_ID: - break; - - default: - index = (ioapic->ioregsel - 0x10) >> 1; - - ioapic_debug("change redir index %x val %x\n", index, val); - if (index >= IOAPIC_NUM_PINS) - return; - e = &ioapic->redirtbl[index]; - mask_before = e->fields.mask; - if (ioapic->ioregsel & 1) { - e->bits &= 0xffffffff; - e->bits |= (u64) val << 32; - } else { - e->bits &= ~0xffffffffULL; - e->bits |= (u32) val; - e->fields.remote_irr = 0; - } - update_handled_vectors(ioapic); - mask_after = e->fields.mask; - if (mask_before != mask_after) - kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); - kvm_vcpu_request_scan_ioapic(ioapic->kvm); - break; - } -} - -static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) -{ - union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; - struct kvm_lapic_irq irqe; - int ret; - - if (entry->fields.mask) - return -1; - - ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " - "vector=%x trig_mode=%x\n", - entry->fields.dest_id, entry->fields.dest_mode, - entry->fields.delivery_mode, entry->fields.vector, - entry->fields.trig_mode); - - irqe.dest_id = entry->fields.dest_id; - irqe.vector = entry->fields.vector; - irqe.dest_mode = entry->fields.dest_mode; - irqe.trig_mode = entry->fields.trig_mode; - irqe.delivery_mode = entry->fields.delivery_mode << 8; - irqe.level = 1; - irqe.shorthand = 0; - - if (irqe.trig_mode == IOAPIC_EDGE_TRIG) - ioapic->irr &= ~(1 << irq); - - if (irq == RTC_GSI && line_status) { - /* - * pending_eoi cannot ever become negative (see - * rtc_status_pending_eoi_check_valid) and the caller - * ensures that it is only called if it is >= zero, namely - * if rtc_irq_check_coalesced returns false). - */ - BUG_ON(ioapic->rtc_status.pending_eoi != 0); - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, - ioapic->rtc_status.dest_map); - ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); - } else - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); - - if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) - entry->fields.remote_irr = 1; - - return ret; -} - -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level, bool line_status) -{ - int ret, irq_level; - - BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); - - spin_lock(&ioapic->lock); - irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], - irq_source_id, level); - ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); - - spin_unlock(&ioapic->lock); - - return ret; -} - -void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) -{ - int i; - - spin_lock(&ioapic->lock); - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) - __clear_bit(irq_source_id, &ioapic->irq_states[i]); - spin_unlock(&ioapic->lock); -} - -static void kvm_ioapic_eoi_inject_work(struct work_struct *work) -{ - int i; - struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, - eoi_inject.work); - spin_lock(&ioapic->lock); - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; - - if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) - continue; - - if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) - ioapic_service(ioapic, i, false); - } - spin_unlock(&ioapic->lock); -} - -#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 - -static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, - struct kvm_ioapic *ioapic, int vector, int trigger_mode) -{ - int i; - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; - - if (ent->fields.vector != vector) - continue; - - if (i == RTC_GSI) - rtc_irq_eoi(ioapic, vcpu); - /* - * We are dropping lock while calling ack notifiers because ack - * notifier callbacks for assigned devices call into IOAPIC - * recursively. Since remote_irr is cleared only after call - * to notifiers if the same vector will be delivered while lock - * is dropped it will be put into irr and will be delivered - * after ack notifier returns. - */ - spin_unlock(&ioapic->lock); - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - spin_lock(&ioapic->lock); - - if (trigger_mode != IOAPIC_LEVEL_TRIG) - continue; - - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); - ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) { - ++ioapic->irq_eoi[i]; - if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { - /* - * Real hardware does not deliver the interrupt - * immediately during eoi broadcast, and this - * lets a buggy guest make slow progress - * even if it does not correctly handle a - * level-triggered interrupt. Emulate this - * behavior if we detect an interrupt storm. - */ - schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); - ioapic->irq_eoi[i] = 0; - trace_kvm_ioapic_delayed_eoi_inj(ent->bits); - } else { - ioapic_service(ioapic, i, false); - } - } else { - ioapic->irq_eoi[i] = 0; - } - } -} - -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - return test_bit(vector, ioapic->handled_vectors); -} - -void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) -{ - struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - - spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); - spin_unlock(&ioapic->lock); -} - -static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) -{ - return container_of(dev, struct kvm_ioapic, dev); -} - -static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) -{ - return ((addr >= ioapic->base_address && - (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); -} - -static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, - void *val) -{ - struct kvm_ioapic *ioapic = to_ioapic(this); - u32 result; - if (!ioapic_in_range(ioapic, addr)) - return -EOPNOTSUPP; - - ioapic_debug("addr %lx\n", (unsigned long)addr); - ASSERT(!(addr & 0xf)); /* check alignment */ - - addr &= 0xff; - spin_lock(&ioapic->lock); - switch (addr) { - case IOAPIC_REG_SELECT: - result = ioapic->ioregsel; - break; - - case IOAPIC_REG_WINDOW: - result = ioapic_read_indirect(ioapic, addr, len); - break; - - default: - result = 0; - break; - } - spin_unlock(&ioapic->lock); - - switch (len) { - case 8: - *(u64 *) val = result; - break; - case 1: - case 2: - case 4: - memcpy(val, (char *)&result, len); - break; - default: - printk(KERN_WARNING "ioapic: wrong length %d\n", len); - } - return 0; -} - -static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) -{ - struct kvm_ioapic *ioapic = to_ioapic(this); - u32 data; - if (!ioapic_in_range(ioapic, addr)) - return -EOPNOTSUPP; - - ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", - (void*)addr, len, val); - ASSERT(!(addr & 0xf)); /* check alignment */ - - switch (len) { - case 8: - case 4: - data = *(u32 *) val; - break; - case 2: - data = *(u16 *) val; - break; - case 1: - data = *(u8 *) val; - break; - default: - printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); - return 0; - } - - addr &= 0xff; - spin_lock(&ioapic->lock); - switch (addr) { - case IOAPIC_REG_SELECT: - ioapic->ioregsel = data & 0xFF; /* 8-bit register */ - break; - - case IOAPIC_REG_WINDOW: - ioapic_write_indirect(ioapic, data); - break; - - default: - break; - } - spin_unlock(&ioapic->lock); - return 0; -} - -static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) -{ - int i; - - cancel_delayed_work_sync(&ioapic->eoi_inject); - for (i = 0; i < IOAPIC_NUM_PINS; i++) - ioapic->redirtbl[i].fields.mask = 1; - ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; - ioapic->ioregsel = 0; - ioapic->irr = 0; - ioapic->id = 0; - memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); - rtc_irq_eoi_tracking_reset(ioapic); - update_handled_vectors(ioapic); -} - -static const struct kvm_io_device_ops ioapic_mmio_ops = { - .read = ioapic_mmio_read, - .write = ioapic_mmio_write, -}; - -int kvm_ioapic_init(struct kvm *kvm) -{ - struct kvm_ioapic *ioapic; - int ret; - - ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); - if (!ioapic) - return -ENOMEM; - spin_lock_init(&ioapic->lock); - INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); - kvm->arch.vioapic = ioapic; - kvm_ioapic_reset(ioapic); - kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); - ioapic->kvm = kvm; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, - IOAPIC_MEM_LENGTH, &ioapic->dev); - mutex_unlock(&kvm->slots_lock); - if (ret < 0) { - kvm->arch.vioapic = NULL; - kfree(ioapic); - } - - return ret; -} - -void kvm_ioapic_destroy(struct kvm *kvm) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - - cancel_delayed_work_sync(&ioapic->eoi_inject); - if (ioapic) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); - kvm->arch.vioapic = NULL; - kfree(ioapic); - } -} - -int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) -{ - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - if (!ioapic) - return -EINVAL; - - spin_lock(&ioapic->lock); - memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); - spin_unlock(&ioapic->lock); - return 0; -} - -int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) -{ - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - if (!ioapic) - return -EINVAL; - - spin_lock(&ioapic->lock); - memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); - ioapic->irr = 0; - update_handled_vectors(ioapic); - kvm_vcpu_request_scan_ioapic(kvm); - kvm_ioapic_inject_all(ioapic, state->irr); - spin_unlock(&ioapic->lock); - return 0; -} diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h deleted file mode 100644 index dc3baa3a538f..000000000000 --- a/virt/kvm/ioapic.h +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef __KVM_IO_APIC_H -#define __KVM_IO_APIC_H - -#include - -#include "iodev.h" - -struct kvm; -struct kvm_vcpu; - -#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS -#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ -#define IOAPIC_EDGE_TRIG 0 -#define IOAPIC_LEVEL_TRIG 1 - -#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 -#define IOAPIC_MEM_LENGTH 0x100 - -/* Direct registers. */ -#define IOAPIC_REG_SELECT 0x00 -#define IOAPIC_REG_WINDOW 0x10 - -/* Indirect registers. */ -#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ -#define IOAPIC_REG_VERSION 0x01 -#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ - -/*ioapic delivery mode*/ -#define IOAPIC_FIXED 0x0 -#define IOAPIC_LOWEST_PRIORITY 0x1 -#define IOAPIC_PMI 0x2 -#define IOAPIC_NMI 0x4 -#define IOAPIC_INIT 0x5 -#define IOAPIC_EXTINT 0x7 - -#ifdef CONFIG_X86 -#define RTC_GSI 8 -#else -#define RTC_GSI -1U -#endif - -struct rtc_status { - int pending_eoi; - DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); -}; - -struct kvm_ioapic { - u64 base_address; - u32 ioregsel; - u32 id; - u32 irr; - u32 pad; - union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; - unsigned long irq_states[IOAPIC_NUM_PINS]; - struct kvm_io_device dev; - struct kvm *kvm; - void (*ack_notifier)(void *opaque, int irq); - spinlock_t lock; - DECLARE_BITMAP(handled_vectors, 256); - struct rtc_status rtc_status; - struct delayed_work eoi_inject; - u32 irq_eoi[IOAPIC_NUM_PINS]; -}; - -#ifdef DEBUG -#define ASSERT(x) \ -do { \ - if (!(x)) { \ - printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ - __FILE__, __LINE__, #x); \ - BUG(); \ - } \ -} while (0) -#else -#define ASSERT(x) do { } while (0) -#endif - -static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) -{ - return kvm->arch.vioapic; -} - -void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, - int trigger_mode); -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); -int kvm_ioapic_init(struct kvm *kvm); -void kvm_ioapic_destroy(struct kvm *kvm); -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level, bool line_status); -void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, unsigned long *dest_map); -int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, - u32 *tmr); - -#endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c deleted file mode 100644 index 1345bde064f5..000000000000 --- a/virt/kvm/irq_comm.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * irq_comm.c: Common API for in kernel interrupt controller - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * Authors: - * Yaozu (Eddie) Dong - * - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - */ - -#include -#include -#include -#include - -#include - -#include "irq.h" - -#include "ioapic.h" - -static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ -#ifdef CONFIG_X86 - struct kvm_pic *pic = pic_irqchip(kvm); - return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); -#else - return -1; -#endif -} - -static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, - line_status); -} - -inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) -{ - return irq->delivery_mode == APIC_DM_LOWEST; -} - -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, unsigned long *dest_map) -{ - int i, r = -1; - struct kvm_vcpu *vcpu, *lowest = NULL; - - if (irq->dest_mode == 0 && irq->dest_id == 0xff && - kvm_is_dm_lowest_prio(irq)) { - printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); - irq->delivery_mode = APIC_DM_FIXED; - } - - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) - return r; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!kvm_apic_present(vcpu)) - continue; - - if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, - irq->dest_id, irq->dest_mode)) - continue; - - if (!kvm_is_dm_lowest_prio(irq)) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, irq, dest_map); - } else if (kvm_lapic_enabled(vcpu)) { - if (!lowest) - lowest = vcpu; - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) - lowest = vcpu; - } - } - - if (lowest) - r = kvm_apic_set_irq(lowest, irq, dest_map); - - return r; -} - -static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq) -{ - trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); - - irq->dest_id = (e->msi.address_lo & - MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; - irq->vector = (e->msi.data & - MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; - irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; - irq->delivery_mode = e->msi.data & 0x700; - irq->level = 1; - irq->shorthand = 0; - /* TODO Deal with RH bit of MSI message address */ -} - -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, bool line_status) -{ - struct kvm_lapic_irq irq; - - if (!level) - return -1; - - kvm_set_msi_irq(e, &irq); - - return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); -} - - -static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm) -{ - struct kvm_lapic_irq irq; - int r; - - kvm_set_msi_irq(e, &irq); - - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; -} - -/* - * Deliver an IRQ in an atomic context if we can, or return a failure, - * user can retry in a process context. - * Return value: - * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. - * Other values - No need to retry. - */ -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - struct kvm_kernel_irq_routing_entry *e; - int ret = -EINVAL; - int idx; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* - * Injection into either PIC or IOAPIC might need to scan all CPUs, - * which would need to be retried from thread context; when same GSI - * is connected to both PIC and IOAPIC, we'd have to report a - * partial failure here. - * Since there's no easy way to do this, we only support injecting MSI - * which is limited to 1:1 GSI mapping. - */ - idx = srcu_read_lock(&kvm->irq_srcu); - if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { - e = &entries[0]; - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - } - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - -int kvm_request_irq_source_id(struct kvm *kvm) -{ - unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; - int irq_source_id; - - mutex_lock(&kvm->irq_lock); - irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); - - if (irq_source_id >= BITS_PER_LONG) { - printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); - irq_source_id = -EFAULT; - goto unlock; - } - - ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); -#ifdef CONFIG_X86 - ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); -#endif - set_bit(irq_source_id, bitmap); -unlock: - mutex_unlock(&kvm->irq_lock); - - return irq_source_id; -} - -void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) -{ - ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); -#ifdef CONFIG_X86 - ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); -#endif - - mutex_lock(&kvm->irq_lock); - if (irq_source_id < 0 || - irq_source_id >= BITS_PER_LONG) { - printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); - goto unlock; - } - clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); - if (!irqchip_in_kernel(kvm)) - goto unlock; - - kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); -#ifdef CONFIG_X86 - kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); -#endif -unlock: - mutex_unlock(&kvm->irq_lock); -} - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn) -{ - mutex_lock(&kvm->irq_lock); - kimn->irq = irq; - hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); - mutex_unlock(&kvm->irq_lock); -} - -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_rcu(&kimn->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -} - -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask) -{ - struct kvm_irq_mask_notifier *kimn; - int idx, gsi; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) - if (kimn->irq == gsi) - kimn->func(kimn, mask); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) -{ - int r = -EINVAL; - int delta; - unsigned max_pin; - - switch (ue->type) { - case KVM_IRQ_ROUTING_IRQCHIP: - delta = 0; - switch (ue->u.irqchip.irqchip) { - case KVM_IRQCHIP_PIC_MASTER: - e->set = kvm_set_pic_irq; - max_pin = PIC_NUM_PINS; - break; - case KVM_IRQCHIP_PIC_SLAVE: - e->set = kvm_set_pic_irq; - max_pin = PIC_NUM_PINS; - delta = 8; - break; - case KVM_IRQCHIP_IOAPIC: - max_pin = KVM_IOAPIC_NUM_PINS; - e->set = kvm_set_ioapic_irq; - break; - default: - goto out; - } - e->irqchip.irqchip = ue->u.irqchip.irqchip; - e->irqchip.pin = ue->u.irqchip.pin + delta; - if (e->irqchip.pin >= max_pin) - goto out; - break; - case KVM_IRQ_ROUTING_MSI: - e->set = kvm_set_msi; - e->msi.address_lo = ue->u.msi.address_lo; - e->msi.address_hi = ue->u.msi.address_hi; - e->msi.data = ue->u.msi.data; - break; - default: - goto out; - } - - r = 0; -out: - return r; -} - -#define IOAPIC_ROUTING_ENTRY(irq) \ - { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } -#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) - -#ifdef CONFIG_X86 -# define PIC_ROUTING_ENTRY(irq) \ - { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } -# define ROUTING_ENTRY2(irq) \ - IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) -#else -# define ROUTING_ENTRY2(irq) \ - IOAPIC_ROUTING_ENTRY(irq) -#endif - -static const struct kvm_irq_routing_entry default_routing[] = { - ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), - ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), - ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), - ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), - ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), - ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), - ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), - ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), - ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), - ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), - ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), - ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), -}; - -int kvm_setup_default_irq_routing(struct kvm *kvm) -{ - return kvm_set_irq_routing(kvm, default_routing, - ARRAY_SIZE(default_routing), 0); -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 751ece6a595c..3be43424818b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -468,9 +468,6 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err_no_disable; -#ifdef CONFIG_HAVE_KVM_IRQCHIP - INIT_HLIST_HEAD(&kvm->mask_notifier_list); -#endif #ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif -- cgit v1.2.1 From c274e03af70544506cd7214fcc2d4c4376c2c6f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 21 Nov 2014 22:21:50 +0100 Subject: kvm: x86: move assigned-dev.c and iommu.c to arch/x86/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ia64 is gone, we can hide deprecated device assignment in x86. Notable changes: - kvm_vm_ioctl_assigned_device() was moved to x86/kvm_arch_vm_ioctl() The easy parts were removed from generic kvm code, remaining - kvm_iommu_(un)map_pages() would require new code to be moved - struct kvm_assigned_dev_kernel depends on struct kvm_irq_ack_notifier Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- virt/kvm/assigned-dev.c | 1026 ----------------------------------------------- virt/kvm/iommu.c | 358 ----------------- virt/kvm/kvm_main.c | 2 - 3 files changed, 1386 deletions(-) delete mode 100644 virt/kvm/assigned-dev.c delete mode 100644 virt/kvm/iommu.c (limited to 'virt') diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c deleted file mode 100644 index e05000e200d2..000000000000 --- a/virt/kvm/assigned-dev.c +++ /dev/null @@ -1,1026 +0,0 @@ -/* - * Kernel-based Virtual Machine - device assignment support - * - * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "irq.h" - -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel - *assigned_dev, int irq) -{ - int i, index; - struct msix_entry *host_msix_entries; - - host_msix_entries = assigned_dev->host_msix_entries; - - index = -1; - for (i = 0; i < assigned_dev->entries_nr; i++) - if (irq == host_msix_entries[i].vector) { - index = i; - break; - } - if (index < 0) - printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - - return index; -} - -static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int ret; - - spin_lock(&assigned_dev->intx_lock); - if (pci_check_and_mask_intx(assigned_dev->dev)) { - assigned_dev->host_irq_disabled = true; - ret = IRQ_WAKE_THREAD; - } else - ret = IRQ_NONE; - spin_unlock(&assigned_dev->intx_lock); - - return ret; -} - -static void -kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, - int vector) -{ - if (unlikely(assigned_dev->irq_requested_type & - KVM_DEV_IRQ_GUEST_INTX)) { - spin_lock(&assigned_dev->intx_mask_lock); - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1, - false); - spin_unlock(&assigned_dev->intx_mask_lock); - } else - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1, false); -} - -static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - spin_lock_irq(&assigned_dev->intx_lock); - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - spin_unlock_irq(&assigned_dev->intx_lock); - } - - kvm_assigned_dev_raise_guest_irq(assigned_dev, - assigned_dev->guest_irq); - - return IRQ_HANDLED; -} - -#ifdef __KVM_HAVE_MSI -static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int ret = kvm_set_irq_inatomic(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; -} - -static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - - kvm_assigned_dev_raise_guest_irq(assigned_dev, - assigned_dev->guest_irq); - - return IRQ_HANDLED; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int index = find_index_from_host_irq(assigned_dev, irq); - u32 vector; - int ret = 0; - - if (index >= 0) { - vector = assigned_dev->guest_msix_entries[index].vector; - ret = kvm_set_irq_inatomic(assigned_dev->kvm, - assigned_dev->irq_source_id, - vector, 1); - } - - return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; -} - -static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int index = find_index_from_host_irq(assigned_dev, irq); - u32 vector; - - if (index >= 0) { - vector = assigned_dev->guest_msix_entries[index].vector; - kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); - } - - return IRQ_HANDLED; -} -#endif - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev = - container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); - - spin_lock(&dev->intx_mask_lock); - - if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { - bool reassert = false; - - spin_lock_irq(&dev->intx_lock); - /* - * The guest IRQ may be shared so this ack can come from an - * IRQ for another guest device. - */ - if (dev->host_irq_disabled) { - if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) - enable_irq(dev->host_irq); - else if (!pci_check_and_unmask_intx(dev->dev)) - reassert = true; - dev->host_irq_disabled = reassert; - } - spin_unlock_irq(&dev->intx_lock); - - if (reassert) - kvm_set_irq(dev->kvm, dev->irq_source_id, - dev->guest_irq, 1, false); - } - - spin_unlock(&dev->intx_mask_lock); -} - -static void deassign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - if (assigned_dev->ack_notifier.gsi != -1) - kvm_unregister_irq_ack_notifier(kvm, - &assigned_dev->ack_notifier); - - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 0, false); - - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - /* - * We disable irq here to prevent further events. - * - * Notice this maybe result in nested disable if the interrupt type is - * INTx, but it's OK for we are going to free it. - * - * If this function is a part of VM destroy, please ensure that till - * now, the kvm state is still legal for probably we also have to wait - * on a currently running IRQ handler. - */ - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int i; - for (i = 0; i < assigned_dev->entries_nr; i++) - disable_irq(assigned_dev->host_msix_entries[i].vector); - - for (i = 0; i < assigned_dev->entries_nr; i++) - free_irq(assigned_dev->host_msix_entries[i].vector, - assigned_dev); - - assigned_dev->entries_nr = 0; - kfree(assigned_dev->host_msix_entries); - kfree(assigned_dev->guest_msix_entries); - pci_disable_msix(assigned_dev->dev); - } else { - /* Deal with MSI and INTx */ - if ((assigned_dev->irq_requested_type & - KVM_DEV_IRQ_HOST_INTX) && - (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - spin_lock_irq(&assigned_dev->intx_lock); - pci_intx(assigned_dev->dev, false); - spin_unlock_irq(&assigned_dev->intx_lock); - synchronize_irq(assigned_dev->host_irq); - } else - disable_irq(assigned_dev->host_irq); - - free_irq(assigned_dev->host_irq, assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - } - - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev, - unsigned long irq_requested_type) -{ - unsigned long guest_irq_type, host_irq_type; - - if (!irqchip_in_kernel(kvm)) - return -EINVAL; - /* no irq assignment to deassign */ - if (!assigned_dev->irq_requested_type) - return -ENXIO; - - host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; - guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - - if (host_irq_type) - deassign_host_irq(kvm, assigned_dev); - if (guest_irq_type) - deassign_guest_irq(kvm, assigned_dev); - - return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - if (pci_load_and_free_saved_state(assigned_dev->dev, - &assigned_dev->pci_saved_state)) - printk(KERN_INFO "%s: Couldn't reload %s saved state\n", - __func__, dev_name(&assigned_dev->dev->dev)); - else - pci_restore_state(assigned_dev->dev); - - pci_clear_dev_assigned(assigned_dev->dev); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - irq_handler_t irq_handler; - unsigned long flags; - - dev->host_irq = dev->dev->irq; - - /* - * We can only share the IRQ line with other host devices if we are - * able to disable the IRQ source at device-level - independently of - * the guest driver. Otherwise host devices may suffer from unbounded - * IRQ latencies when the guest keeps the line asserted. - */ - if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { - irq_handler = kvm_assigned_dev_intx; - flags = IRQF_SHARED; - } else { - irq_handler = NULL; - flags = IRQF_ONESHOT; - } - if (request_threaded_irq(dev->host_irq, irq_handler, - kvm_assigned_dev_thread_intx, flags, - dev->irq_name, dev)) - return -EIO; - - if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { - spin_lock_irq(&dev->intx_lock); - pci_intx(dev->dev, true); - spin_unlock_irq(&dev->intx_lock); - } - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_host_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int r; - - if (!dev->dev->msi_enabled) { - r = pci_enable_msi(dev->dev); - if (r) - return r; - } - - dev->host_irq = dev->dev->irq; - if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, - kvm_assigned_dev_thread_msi, 0, - dev->irq_name, dev)) { - pci_disable_msi(dev->dev); - return -EIO; - } - - return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_host_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int i, r = -EINVAL; - - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (dev->entries_nr == 0) - return r; - - r = pci_enable_msix_exact(dev->dev, - dev->host_msix_entries, dev->entries_nr); - if (r) - return r; - - for (i = 0; i < dev->entries_nr; i++) { - r = request_threaded_irq(dev->host_msix_entries[i].vector, - kvm_assigned_dev_msix, - kvm_assigned_dev_thread_msix, - 0, dev->irq_name, dev); - if (r) - goto err; - } - - return 0; -err: - for (i -= 1; i >= 0; i--) - free_irq(dev->host_msix_entries[i].vector, dev); - pci_disable_msix(dev->dev); - return r; -} - -#endif - -static int assigned_device_enable_guest_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = irq->guest_irq; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_guest_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_guest_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - return 0; -} -#endif - -static int assign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - __u32 host_irq_type) -{ - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) - return r; - - snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", - pci_name(dev->dev)); - - switch (host_irq_type) { - case KVM_DEV_IRQ_HOST_INTX: - r = assigned_device_enable_host_intx(kvm, dev); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_HOST_MSI: - r = assigned_device_enable_host_msi(kvm, dev); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_HOST_MSIX: - r = assigned_device_enable_host_msix(kvm, dev); - break; -#endif - default: - r = -EINVAL; - } - dev->host_irq_disabled = false; - - if (!r) - dev->irq_requested_type |= host_irq_type; - - return r; -} - -static int assign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq, - unsigned long guest_irq_type) -{ - int id; - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) - return r; - - id = kvm_request_irq_source_id(kvm); - if (id < 0) - return id; - - dev->irq_source_id = id; - - switch (guest_irq_type) { - case KVM_DEV_IRQ_GUEST_INTX: - r = assigned_device_enable_guest_intx(kvm, dev, irq); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_GUEST_MSI: - r = assigned_device_enable_guest_msi(kvm, dev, irq); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_GUEST_MSIX: - r = assigned_device_enable_guest_msix(kvm, dev, irq); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) { - dev->irq_requested_type |= guest_irq_type; - if (dev->ack_notifier.gsi != -1) - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); - } else { - kvm_free_irq_source_id(kvm, dev->irq_source_id); - dev->irq_source_id = -1; - } - - return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int r = -EINVAL; - struct kvm_assigned_dev_kernel *match; - unsigned long host_irq_type, guest_irq_type; - - if (!irqchip_in_kernel(kvm)) - return r; - - mutex_lock(&kvm->lock); - r = -ENODEV; - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); - guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - - r = -EINVAL; - /* can only assign one type at a time */ - if (hweight_long(host_irq_type) > 1) - goto out; - if (hweight_long(guest_irq_type) > 1) - goto out; - if (host_irq_type == 0 && guest_irq_type == 0) - goto out; - - r = 0; - if (host_irq_type) - r = assign_host_irq(kvm, match, host_irq_type); - if (r) - goto out; - - if (guest_irq_type) - r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = -ENODEV; - struct kvm_assigned_dev_kernel *match; - unsigned long irq_type; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | - KVM_DEV_IRQ_GUEST_MASK); - r = kvm_deassign_irq(kvm, match, irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -/* - * We want to test whether the caller has been granted permissions to - * use this device. To be able to configure and control the device, - * the user needs access to PCI configuration space and BAR resources. - * These are accessed through PCI sysfs. PCI config space is often - * passed to the process calling this ioctl via file descriptor, so we - * can't rely on access to that file. We can check for permissions - * on each of the BAR resource files, which is a pretty clear - * indicator that the user has been granted access to the device. - */ -static int probe_sysfs_permissions(struct pci_dev *dev) -{ -#ifdef CONFIG_SYSFS - int i; - bool bar_found = false; - - for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { - char *kpath, *syspath; - struct path path; - struct inode *inode; - int r; - - if (!pci_resource_len(dev, i)) - continue; - - kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); - if (!kpath) - return -ENOMEM; - - /* Per sysfs-rules, sysfs is always at /sys */ - syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); - kfree(kpath); - if (!syspath) - return -ENOMEM; - - r = kern_path(syspath, LOOKUP_FOLLOW, &path); - kfree(syspath); - if (r) - return r; - - inode = path.dentry->d_inode; - - r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); - path_put(&path); - if (r) - return r; - - bar_found = true; - } - - /* If no resources, probably something special */ - if (!bar_found) - return -EPERM; - - return 0; -#else - return -EINVAL; /* No way to control the device without sysfs */ -#endif -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0, idx; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) - return -EINVAL; - - mutex_lock(&kvm->lock); - idx = srcu_read_lock(&kvm->srcu); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EEXIST; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, - assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - - /* Don't allow bridges to be assigned */ - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { - r = -EPERM; - goto out_put; - } - - r = probe_sysfs_permissions(dev); - if (r) - goto out_put; - - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - - pci_reset_function(dev); - pci_save_state(dev); - match->pci_saved_state = pci_store_saved_state(dev); - if (!match->pci_saved_state) - printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", - __func__, dev_name(&dev->dev)); - - if (!pci_intx_mask_supported(dev)) - assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; - - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_segnr = assigned_dev->segnr; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->flags = assigned_dev->flags; - match->dev = dev; - spin_lock_init(&match->intx_lock); - spin_lock_init(&match->intx_mask_lock); - match->irq_source_id = -1; - match->kvm = kvm; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); - if (r) - goto out_list_del; - -out: - srcu_read_unlock(&kvm->srcu, idx); - mutex_unlock(&kvm->lock); - return r; -out_list_del: - if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) - printk(KERN_INFO "%s: Couldn't reload %s saved state\n", - __func__, dev_name(&dev->dev)); - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - srcu_read_unlock(&kvm->srcu, idx); - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - printk(KERN_INFO "%s: device hasn't been assigned before, " - "so cannot be deassigned\n", __func__); - r = -EINVAL; - goto out; - } - - kvm_deassign_device(kvm, match); - - kvm_free_assigned_device(kvm, match); - -out: - mutex_unlock(&kvm->lock); - return r; -} - - -#ifdef __KVM_HAVE_MSIX -static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, - struct kvm_assigned_msix_nr *entry_nr) -{ - int r = 0; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry_nr->assigned_dev_id); - if (!adev) { - r = -EINVAL; - goto msix_nr_out; - } - - if (adev->entries_nr == 0) { - adev->entries_nr = entry_nr->entry_nr; - if (adev->entries_nr == 0 || - adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { - r = -EINVAL; - goto msix_nr_out; - } - - adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * - entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->host_msix_entries) { - r = -ENOMEM; - goto msix_nr_out; - } - adev->guest_msix_entries = - kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->guest_msix_entries) { - kfree(adev->host_msix_entries); - r = -ENOMEM; - goto msix_nr_out; - } - } else /* Not allowed set MSI-X number twice */ - r = -EINVAL; -msix_nr_out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, - struct kvm_assigned_msix_entry *entry) -{ - int r = 0, i; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry->assigned_dev_id); - - if (!adev) { - r = -EINVAL; - goto msix_entry_out; - } - - for (i = 0; i < adev->entries_nr; i++) - if (adev->guest_msix_entries[i].vector == 0 || - adev->guest_msix_entries[i].entry == entry->entry) { - adev->guest_msix_entries[i].entry = entry->entry; - adev->guest_msix_entries[i].vector = entry->gsi; - adev->host_msix_entries[i].entry = entry->entry; - break; - } - if (i == adev->entries_nr) { - r = -ENOSPC; - goto msix_entry_out; - } - -msix_entry_out: - mutex_unlock(&kvm->lock); - - return r; -} -#endif - -static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - r = -ENODEV; - goto out; - } - - spin_lock(&match->intx_mask_lock); - - match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; - match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; - - if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { - kvm_set_irq(match->kvm, match->irq_source_id, - match->guest_irq, 0, false); - /* - * Masking at hardware-level is performed on demand, - * i.e. when an IRQ actually arrives at the host. - */ - } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - /* - * Unmask the IRQ line if required. Unmasking at - * device level will be performed by user space. - */ - spin_lock_irq(&match->intx_lock); - if (match->host_irq_disabled) { - enable_irq(match->host_irq); - match->host_irq_disabled = false; - } - spin_unlock_irq(&match->intx_lock); - } - } - - spin_unlock(&match->intx_mask_lock); - -out: - mutex_unlock(&kvm->lock); - return r; -} - -long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int r; - - switch (ioctl) { - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - r = -EOPNOTSUPP; - break; - } - case KVM_ASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } -#ifdef __KVM_HAVE_MSIX - case KVM_ASSIGN_SET_MSIX_NR: { - struct kvm_assigned_msix_nr entry_nr; - r = -EFAULT; - if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) - goto out; - r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_MSIX_ENTRY: { - struct kvm_assigned_msix_entry entry; - r = -EFAULT; - if (copy_from_user(&entry, argp, sizeof entry)) - goto out; - r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); - if (r) - goto out; - break; - } -#endif - case KVM_ASSIGN_SET_INTX_MASK: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); - break; - } - default: - r = -ENOTTY; - break; - } -out: - return r; -} diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c deleted file mode 100644 index c1e6ae989a43..000000000000 --- a/virt/kvm/iommu.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Copyright IBM Corporation, 2008 - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Author: Allen M. Kay - * Author: Weidong Han - * Author: Ben-Ami Yassour - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static bool allow_unsafe_assigned_interrupts; -module_param_named(allow_unsafe_assigned_interrupts, - allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, - "Enable device assignment on platforms without interrupt remapping support."); - -static int kvm_iommu_unmap_memslots(struct kvm *kvm); -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages); - -static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, - unsigned long npages) -{ - gfn_t end_gfn; - pfn_t pfn; - - pfn = gfn_to_pfn_memslot(slot, gfn); - end_gfn = gfn + npages; - gfn += 1; - - if (is_error_noslot_pfn(pfn)) - return pfn; - - while (gfn < end_gfn) - gfn_to_pfn_memslot(slot, gfn++); - - return pfn; -} - -static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) - kvm_release_pfn_clean(pfn + i); -} - -int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) -{ - gfn_t gfn, end_gfn; - pfn_t pfn; - int r = 0; - struct iommu_domain *domain = kvm->arch.iommu_domain; - int flags; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - gfn = slot->base_gfn; - end_gfn = gfn + slot->npages; - - flags = IOMMU_READ; - if (!(slot->flags & KVM_MEM_READONLY)) - flags |= IOMMU_WRITE; - if (!kvm->arch.iommu_noncoherent) - flags |= IOMMU_CACHE; - - - while (gfn < end_gfn) { - unsigned long page_size; - - /* Check if already mapped */ - if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { - gfn += 1; - continue; - } - - /* Get the page size we could use to map */ - page_size = kvm_host_page_size(kvm, gfn); - - /* Make sure the page_size does not exceed the memslot */ - while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) - page_size >>= 1; - - /* Make sure gfn is aligned to the page size we want to map */ - while ((gfn << PAGE_SHIFT) & (page_size - 1)) - page_size >>= 1; - - /* Make sure hva is aligned to the page size we want to map */ - while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) - page_size >>= 1; - - /* - * Pin all pages we are about to map in memory. This is - * important because we unmap and unpin in 4kb steps later. - */ - pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); - if (is_error_noslot_pfn(pfn)) { - gfn += 1; - continue; - } - - /* Map into IO address space */ - r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - page_size, flags); - if (r) { - printk(KERN_ERR "kvm_iommu_map_address:" - "iommu failed to map pfn=%llx\n", pfn); - kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); - goto unmap_pages; - } - - gfn += page_size >> PAGE_SHIFT; - - - } - - return 0; - -unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); - return r; -} - -static int kvm_iommu_map_memslots(struct kvm *kvm) -{ - int idx, r = 0; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - if (kvm->arch.iommu_noncoherent) - kvm_arch_register_noncoherent_dma(kvm); - - idx = srcu_read_lock(&kvm->srcu); - slots = kvm_memslots(kvm); - - kvm_for_each_memslot(memslot, slots) { - r = kvm_iommu_map_pages(kvm, memslot); - if (r) - break; - } - srcu_read_unlock(&kvm->srcu, idx); - - return r; -} - -int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - struct pci_dev *pdev = NULL; - struct iommu_domain *domain = kvm->arch.iommu_domain; - int r; - bool noncoherent; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - pdev = assigned_dev->dev; - if (pdev == NULL) - return -ENODEV; - - r = iommu_attach_device(domain, &pdev->dev); - if (r) { - dev_err(&pdev->dev, "kvm assign device failed ret %d", r); - return r; - } - - noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); - - /* Check if need to update IOMMU page table for guest memory */ - if (noncoherent != kvm->arch.iommu_noncoherent) { - kvm_iommu_unmap_memslots(kvm); - kvm->arch.iommu_noncoherent = noncoherent; - r = kvm_iommu_map_memslots(kvm); - if (r) - goto out_unmap; - } - - pci_set_dev_assigned(pdev); - - dev_info(&pdev->dev, "kvm assign device\n"); - - return 0; -out_unmap: - kvm_iommu_unmap_memslots(kvm); - return r; -} - -int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - struct iommu_domain *domain = kvm->arch.iommu_domain; - struct pci_dev *pdev = NULL; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - pdev = assigned_dev->dev; - if (pdev == NULL) - return -ENODEV; - - iommu_detach_device(domain, &pdev->dev); - - pci_clear_dev_assigned(pdev); - - dev_info(&pdev->dev, "kvm deassign device\n"); - - return 0; -} - -int kvm_iommu_map_guest(struct kvm *kvm) -{ - int r; - - if (!iommu_present(&pci_bus_type)) { - printk(KERN_ERR "%s: iommu not found\n", __func__); - return -ENODEV; - } - - mutex_lock(&kvm->slots_lock); - - kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); - if (!kvm->arch.iommu_domain) { - r = -ENOMEM; - goto out_unlock; - } - - if (!allow_unsafe_assigned_interrupts && - !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { - printk(KERN_WARNING "%s: No interrupt remapping support," - " disallowing device assignment." - " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" - " module option.\n", __func__); - iommu_domain_free(kvm->arch.iommu_domain); - kvm->arch.iommu_domain = NULL; - r = -EPERM; - goto out_unlock; - } - - r = kvm_iommu_map_memslots(kvm); - if (r) - kvm_iommu_unmap_memslots(kvm); - -out_unlock: - mutex_unlock(&kvm->slots_lock); - return r; -} - -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - struct iommu_domain *domain; - gfn_t end_gfn, gfn; - pfn_t pfn; - u64 phys; - - domain = kvm->arch.iommu_domain; - end_gfn = base_gfn + npages; - gfn = base_gfn; - - /* check if iommu exists and in use */ - if (!domain) - return; - - while (gfn < end_gfn) { - unsigned long unmap_pages; - size_t size; - - /* Get physical address */ - phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); - - if (!phys) { - gfn++; - continue; - } - - pfn = phys >> PAGE_SHIFT; - - /* Unmap address from IO address space */ - size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); - unmap_pages = 1ULL << get_order(size); - - /* Unpin all pages we just unmapped to not leak any memory */ - kvm_unpin_pages(kvm, pfn, unmap_pages); - - gfn += unmap_pages; - } -} - -void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) -{ - kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); -} - -static int kvm_iommu_unmap_memslots(struct kvm *kvm) -{ - int idx; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - idx = srcu_read_lock(&kvm->srcu); - slots = kvm_memslots(kvm); - - kvm_for_each_memslot(memslot, slots) - kvm_iommu_unmap_pages(kvm, memslot); - - srcu_read_unlock(&kvm->srcu, idx); - - if (kvm->arch.iommu_noncoherent) - kvm_arch_unregister_noncoherent_dma(kvm); - - return 0; -} - -int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - struct iommu_domain *domain = kvm->arch.iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - mutex_lock(&kvm->slots_lock); - kvm_iommu_unmap_memslots(kvm); - kvm->arch.iommu_domain = NULL; - kvm->arch.iommu_noncoherent = false; - mutex_unlock(&kvm->slots_lock); - - iommu_domain_free(domain); - return 0; -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3be43424818b..5b4533079eaa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2582,8 +2582,6 @@ static long kvm_vm_ioctl(struct file *filp, break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); - if (r == -ENOTTY) - r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; -- cgit v1.2.1 From 7d39f9e32c761abaa84bef7c4a4d9349ba8638e0 Mon Sep 17 00:00:00 2001 From: wanghaibin Date: Mon, 17 Nov 2014 09:27:37 +0000 Subject: KVM: ARM: VGIC: Optimize the vGIC vgic_update_irq_pending function. When vgic_update_irq_pending with level-sensitive false, it is need to deactivates an interrupt, and, it can go to out directly. Here return a false value, because it will be not need to kick. Signed-off-by: wanghaibin Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3aaca49de325..5acf2c93f616 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1646,6 +1646,9 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, } else { vgic_dist_irq_clear_pending(vcpu, irq_num); } + + ret = false; + goto out; } enabled = vgic_irq_is_enabled(vcpu, irq_num); -- cgit v1.2.1 From bf4bea8e9a9058319a19f8c2710a6f0ef2459983 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Nov 2014 08:33:56 +0000 Subject: kvm: fix kvm_is_mmio_pfn() and rename to kvm_is_reserved_pfn() This reverts commit 85c8555ff0 ("KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn()") and renames the function to kvm_is_reserved_pfn. The problem being addressed by the patch above was that some ARM code based the memory mapping attributes of a pfn on the return value of kvm_is_mmio_pfn(), whose name indeed suggests that such pfns should be mapped as device memory. However, kvm_is_mmio_pfn() doesn't do quite what it says on the tin, and the existing non-ARM users were already using it in a way which suggests that its name should probably have been 'kvm_is_reserved_pfn' from the beginning, e.g., whether or not to call get_page/put_page on it etc. This means that returning false for the zero page is a mistake and the patch above should be reverted. Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- virt/kvm/kvm_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25ffac9e947d..3cee7b167052 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -107,10 +107,10 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -bool kvm_is_mmio_pfn(pfn_t pfn) +bool kvm_is_reserved_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)); return true; } @@ -1321,7 +1321,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, else if ((vma->vm_flags & VM_PFNMAP)) { pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - BUG_ON(!kvm_is_mmio_pfn(pfn)); + BUG_ON(!kvm_is_reserved_pfn(pfn)); } else { if (async && vma_is_valid(vma, write_fault)) *async = true; @@ -1427,7 +1427,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn) if (is_error_noslot_pfn(pfn)) return KVM_ERR_PTR_BAD_PAGE; - if (kvm_is_mmio_pfn(pfn)) { + if (kvm_is_reserved_pfn(pfn)) { WARN_ON(1); return KVM_ERR_PTR_BAD_PAGE; } @@ -1456,7 +1456,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -1477,7 +1477,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn) void kvm_set_pfn_dirty(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); @@ -1487,14 +1487,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn)) get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); -- cgit v1.2.1 From b1e952b4e484ebc9ffad674c361d261a1af02a13 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 24 Nov 2014 10:41:56 +0100 Subject: arm/arm64: vgic: Remove unreachable irq_clear_pending When 'injecting' an edge-triggered interrupt with a falling edge we shouldn't clear the pending state on the distributor. In fact, we don't, because the check in vgic_validate_injection would prevent us from ever reaching this bit of code. Remove the unreachable snippet. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5acf2c93f616..631a472e41cf 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1643,8 +1643,6 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, vgic_dist_irq_clear_level(vcpu, irq_num); if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) vgic_dist_irq_clear_pending(vcpu, irq_num); - } else { - vgic_dist_irq_clear_pending(vcpu, irq_num); } ret = false; -- cgit v1.2.1 From 016ed39c54b8a3db680e5c6a43419f806133caf2 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Wed, 19 Nov 2014 10:11:25 +0000 Subject: arm/arm64: KVM: vgic: kick the specific vcpu instead of iterating through all When call kvm_vgic_inject_irq to inject interrupt, we can known which vcpu the interrupt for by the irq_num and the cpuid. So we should just kick this vcpu to avoid iterating through all. Reviewed-by: Christoffer Dall Signed-off-by: Shannon Zhao Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 631a472e41cf..21e035cc5460 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1607,7 +1607,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) } } -static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, +static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -1673,7 +1673,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, out: spin_unlock(&dist->lock); - return ret; + return ret ? cpuid : -EINVAL; } /** @@ -1693,9 +1693,14 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (likely(vgic_initialized(kvm)) && - vgic_update_irq_pending(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); + int vcpu_id; + + if (likely(vgic_initialized(kvm))) { + vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); + if (vcpu_id >= 0) + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); + } return 0; } -- cgit v1.2.1 From 5a38b6e6b41b4819516e13770ec02b7f0e622a57 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 1 Dec 2014 17:29:23 +0000 Subject: kvm: update_memslots: drop not needed check for the same number of pages if number of pages haven't changed sorting algorithm will do nothing, so there is no need to do extra check to avoid entering sorting logic. Signed-off-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b4533079eaa..407277b33dc8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -679,21 +679,19 @@ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); - if (new->npages != mslots[i].npages) { - if (new->npages < mslots[i].npages) { - while (i < KVM_MEM_SLOTS_NUM - 1 && - new->npages < mslots[i + 1].npages) { - mslots[i] = mslots[i + 1]; - slots->id_to_index[mslots[i].id] = i; - i++; - } - } else { - while (i > 0 && - new->npages > mslots[i - 1].npages) { - mslots[i] = mslots[i - 1]; - slots->id_to_index[mslots[i].id] = i; - i--; - } + if (new->npages < mslots[i].npages) { + while (i < KVM_MEM_SLOTS_NUM - 1 && + new->npages < mslots[i + 1].npages) { + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + i++; + } + } else { + while (i > 0 && + new->npages > mslots[i - 1].npages) { + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + i--; } } -- cgit v1.2.1 From 7f379cff11fb9e00e0ff9eff8fbc39ddfd4b1bec Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 1 Dec 2014 17:29:24 +0000 Subject: kvm: update_memslots: drop not needed check for the same slot UP/DOWN shift loops will shift array in needed direction and stop at place where new slot should be placed regardless of old slot size. Signed-off-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 407277b33dc8..278ed65cc9c5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -679,20 +679,17 @@ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); - if (new->npages < mslots[i].npages) { - while (i < KVM_MEM_SLOTS_NUM - 1 && - new->npages < mslots[i + 1].npages) { - mslots[i] = mslots[i + 1]; - slots->id_to_index[mslots[i].id] = i; - i++; - } - } else { - while (i > 0 && - new->npages > mslots[i - 1].npages) { - mslots[i] = mslots[i - 1]; - slots->id_to_index[mslots[i].id] = i; - i--; - } + while (i < KVM_MEM_SLOTS_NUM - 1 && + new->npages < mslots[i + 1].npages) { + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + i++; + } + while (i > 0 && + new->npages > mslots[i - 1].npages) { + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + i--; } mslots[i] = *new; -- cgit v1.2.1 From 0e60b0799fedc495a5c57dbd669de3c10d72edd2 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 1 Dec 2014 17:29:26 +0000 Subject: kvm: change memslot sorting rule from size to GFN it will allow to use binary search for GFN -> memslot lookups, reducing lookup cost with large slots amount. Signed-off-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 278ed65cc9c5..162817f853ec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -666,10 +666,10 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) } /* - * Insert memslot and re-sort memslots based on their size, - * so the larger slots will get better fit. Sorting algorithm - * takes advantage of having initially sorted array and - * known changed memslot position. + * Insert memslot and re-sort memslots based on their GFN, + * so binary search could be used to lookup GFN. + * Sorting algorithm takes advantage of having initially + * sorted array and known changed memslot position. */ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) @@ -679,14 +679,19 @@ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); + if (!new->npages) + new->base_gfn = 0; + while (i < KVM_MEM_SLOTS_NUM - 1 && - new->npages < mslots[i + 1].npages) { + new->base_gfn <= mslots[i + 1].base_gfn) { + if (!mslots[i + 1].npages) + break; mslots[i] = mslots[i + 1]; slots->id_to_index[mslots[i].id] = i; i++; } while (i > 0 && - new->npages > mslots[i - 1].npages) { + new->base_gfn > mslots[i - 1].base_gfn) { mslots[i] = mslots[i - 1]; slots->id_to_index[mslots[i].id] = i; i--; -- cgit v1.2.1 From 9c1a5d38780e652275aa55362dbee0d7e827e069 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 1 Dec 2014 17:29:27 +0000 Subject: kvm: optimize GFN to memslot lookup with large slots amount Current linear search doesn't scale well when large amount of memslots is used and looked up slot is not in the beginning memslots array. Taking in account that memslots don't overlap, it's possible to switch sorting order of memslots array from 'npages' to 'base_gfn' and use binary search for memslot lookup by GFN. As result of switching to binary search lookup times are reduced with large amount of memslots. Following is a table of search_memslot() cycles during WS2008R2 guest boot. boot, boot + ~10 min mostly same of using it, slot lookup randomized lookup max average average cycles cycles cycles 13 slots : 1450 28 30 13 slots : 1400 30 40 binary search 117 slots : 13000 30 460 117 slots : 2000 35 180 binary search Signed-off-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 162817f853ec..759af6596a07 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -679,8 +679,14 @@ static void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); - if (!new->npages) + if (!new->npages) { new->base_gfn = 0; + if (mslots[i].npages) + slots->used_slots--; + } else { + if (!mslots[i].npages) + slots->used_slots++; + } while (i < KVM_MEM_SLOTS_NUM - 1 && new->base_gfn <= mslots[i + 1].base_gfn) { -- cgit v1.2.1 From eed6e79d7362335967def243cc891179a9c59892 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 25 Nov 2014 17:04:08 +0100 Subject: KVM: don't check for PF_VCPU when yielding kvm_enter_guest() has to be called with preemption disabled and will set PF_VCPU. Current code takes PF_VCPU as a hint that the VCPU thread is running and therefore needs no yield. However, the check on PF_VCPU is wrong on s390, where preemption has to stay enabled in order to correctly process page faults. Thus, s390 reenables preemption and starts to execute the guest. The thread might be scheduled out between kvm_enter_guest() and kvm_exit_guest(), resulting in PF_VCPU being set but not being run. When this happens, the opportunity for directed yield is missed. However, this check is done already in kvm_vcpu_on_spin before calling kvm_vcpu_yield_loop: if (!ACCESS_ONCE(vcpu->preempted)) continue; so the check on PF_VCPU is superfluous in general, and this patch removes it. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 759af6596a07..2ffee3018a3d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1788,10 +1788,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_unlock(); if (!task) return ret; - if (task->flags & PF_VCPU) { - put_task_struct(task); - return ret; - } ret = yield_to(task, 1); put_task_struct(task); -- cgit v1.2.1 From 7a72f7a140bfd3a5dae73088947010bfdbcf6a40 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Aug 2014 16:44:14 +0200 Subject: KVM: track pid for VCPU only on KVM_RUN ioctl We currently track the pid of the task that runs the VCPU in vcpu_load. If a yield to that VCPU is triggered while the PID of the wrong thread is active, the wrong thread might receive a yield, but this will most likely not help the executing thread at all. Instead, if we only track the pid on the KVM_RUN ioctl, there are two possibilities: 1) the thread that did a non-KVM_RUN ioctl is holding a mutex that the VCPU thread is waiting for. In this case, the VCPU thread is not runnable, but we also do not do a wrong yield. 2) the thread that did a non-KVM_RUN ioctl is sleeping, or doing something that does not block the VCPU thread. In this case, the VCPU thread can receive the directed yield correctly. Signed-off-by: Christian Borntraeger CC: Rik van Riel CC: Raghavendra K T CC: Michael Mueller Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2ffee3018a3d..c5c186af823b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu) if (mutex_lock_killable(&vcpu->mutex)) return -EINTR; - if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { - /* The thread running this VCPU changed. */ - struct pid *oldpid = vcpu->pid; - struct pid *newpid = get_task_pid(current, PIDTYPE_PID); - rcu_assign_pointer(vcpu->pid, newpid); - if (oldpid) - synchronize_rcu(); - put_pid(oldpid); - } cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); @@ -2050,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; + if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { + /* The thread running this VCPU changed. */ + struct pid *oldpid = vcpu->pid; + struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); + if (oldpid) + synchronize_rcu(); + put_pid(oldpid); + } r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; -- cgit v1.2.1 From 6d3cfbe21bef5b66530b50ad16c88fdc71a04c35 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 4 Dec 2014 15:02:24 +0000 Subject: arm/arm64: KVM: vgic: move reset initialization into vgic_init_maps() VGIC initialization currently happens in three phases: (1) kvm_vgic_create() (triggered by userspace GIC creation) (2) vgic_init_maps() (triggered by userspace GIC register read/write requests, or from kvm_vgic_init() if not already run) (3) kvm_vgic_init() (triggered by first VM run) We were doing initialization of some state to correspond with the state of a freshly-reset GIC in kvm_vgic_init(); this is too late, since it will overwrite changes made by userspace using the register access APIs before the VM is run. Move this initialization earlier, into the vgic_init_maps() phase. This fixes a bug where QEMU could successfully restore a saved VM state snapshot into a VM that had already been run, but could not restore it "from cold" using the -loadvm command line option (the symptoms being that the restored VM would run but interrupts were ignored). Finally rename vgic_init_maps to vgic_init and renamed kvm_vgic_init to kvm_vgic_map_resources. [ This patch is originally written by Peter Maydell, but I have modified it somewhat heavily, renaming various bits and moving code around. If something is broken, I am to be blamed. - Christoffer ] Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Peter Maydell Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 77 ++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 45 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 21e035cc5460..1ce4e364c1e0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -91,6 +91,7 @@ #define ACCESS_WRITE_VALUE (3 << 1) #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) +static int vgic_init(struct kvm *kvm); static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); @@ -1732,39 +1733,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - return 0; -} - -/** - * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state - * @vcpu: pointer to the vcpu struct - * - * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to - * this vcpu and enable the VGIC for this VCPU - */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } + memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); /* * Store the number of LRs per vcpu, so we don't have to go @@ -1773,7 +1749,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) */ vgic_cpu->nr_lr = vgic->nr_lr; - vgic_enable(vcpu); + return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1810,12 +1786,12 @@ void kvm_vgic_destroy(struct kvm *kvm) * Allocate and initialize the various data structures. Must be called * with kvm->lock held! */ -static int vgic_init_maps(struct kvm *kvm) +static int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; int nr_cpus, nr_irqs; - int ret, i; + int ret, i, vcpu_id; if (dist->nr_cpus) /* Already allocated */ return 0; @@ -1865,16 +1841,28 @@ static int vgic_init_maps(struct kvm *kvm) if (ret) goto out; - kvm_for_each_vcpu(i, vcpu, kvm) { + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { ret = vgic_vcpu_init_maps(vcpu, nr_irqs); if (ret) { kvm_err("VGIC: Failed to allocate vcpu memory\n"); break; } - } - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + for (i = 0; i < dist->nr_irqs; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_EDGE); + } + + vgic_enable(vcpu); + } out: if (ret) @@ -1884,18 +1872,16 @@ out: } /** - * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct * * Map the virtual CPU interface into the VM before running any VCPUs. We * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. Also - * initialize the ITARGETSRn regs to 0 on the emulated distributor. + * virtual CPU interface address in the guest physical address space. */ -int kvm_vgic_init(struct kvm *kvm) +int kvm_vgic_map_resources(struct kvm *kvm) { - struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; if (!irqchip_in_kernel(kvm)) return 0; @@ -1912,7 +1898,11 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - ret = vgic_init_maps(kvm); + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); goto out; @@ -1926,9 +1916,6 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); - kvm->arch.vgic.ready = true; out: if (ret) @@ -2173,7 +2160,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - ret = vgic_init_maps(dev->kvm); + ret = vgic_init(dev->kvm); if (ret) goto out; -- cgit v1.2.1 From c52edf5f8caff878afc93c1b1e9a3d9490a9932f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 9 Dec 2014 14:28:09 +0100 Subject: arm/arm64: KVM: Rename vgic_initialized to vgic_ready The vgic_initialized() macro currently returns the state of the vgic->ready flag, which indicates if the vgic is ready to be used when running a VM, not specifically if its internal state has been initialized. Rename the macro accordingly in preparation for a more nuanced initialization flow. Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1ce4e364c1e0..4edb2572ea9a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1696,7 +1696,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, { int vcpu_id; - if (likely(vgic_initialized(kvm))) { + if (likely(vgic_ready(kvm))) { vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); if (vcpu_id >= 0) /* kick the specified vcpu */ @@ -1888,7 +1888,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) mutex_lock(&kvm->lock); - if (vgic_initialized(kvm)) + if (vgic_ready(kvm)) goto out; if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || @@ -2282,7 +2282,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) mutex_lock(&dev->kvm->lock); - if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) ret = -EBUSY; else dev->kvm->arch.vgic.nr_irqs = val; -- cgit v1.2.1 From 1f57be289571d514b9412da2af25a64a81b8dd89 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 9 Dec 2014 14:30:36 +0100 Subject: arm/arm64: KVM: Add (new) vgic_initialized macro Some code paths will need to check to see if the internal state of the vgic has been initialized (such as when creating new VCPUs), so introduce such a macro that checks the nr_cpus field which is set when the vgic has been initialized. Also set nr_cpus = 0 in kvm_vgic_destroy, because the error path in vgic_init() will call this function, and code should never errornously assume the vgic to be properly initialized after an error. Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 4edb2572ea9a..d862ea502167 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1780,6 +1780,7 @@ void kvm_vgic_destroy(struct kvm *kvm) dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->nr_cpus = 0; } /* @@ -1793,7 +1794,7 @@ static int vgic_init(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i, vcpu_id; - if (dist->nr_cpus) /* Already allocated */ + if (vgic_initialized(kvm)) return 0; nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); -- cgit v1.2.1 From ca7d9c829d419c06e450afa5f785d58198c37caa Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 9 Dec 2014 14:35:33 +0100 Subject: arm/arm64: KVM: Initialize the vgic on-demand when injecting IRQs Userspace assumes that it can wire up IRQ injections after having created all VCPUs and after having created the VGIC, but potentially before starting the first VCPU. This can currently lead to lost IRQs because the state of that IRQ injection is not stored anywhere and we don't return an error to userspace. We haven't seen this problem manifest itself yet, presumably because guests reset the devices on boot, but this could cause issues with migration and other non-standard startup configurations. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d862ea502167..e373b76c5420 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1694,16 +1694,26 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { + int ret = 0; int vcpu_id; - if (likely(vgic_ready(kvm))) { - vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); - if (vcpu_id >= 0) - /* kick the specified vcpu */ - kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); + if (unlikely(!vgic_initialized(kvm))) { + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + + if (ret) + goto out; } - return 0; + vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); + if (vcpu_id >= 0) { + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); + } + +out: + return ret; } static irqreturn_t vgic_maintenance_handler(int irq, void *data) -- cgit v1.2.1 From 05971120fca43e0357789a14b3386bb56eef2201 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 12 Dec 2014 21:19:23 +0100 Subject: arm/arm64: KVM: Require in-kernel vgic for the arch timers It is curently possible to run a VM with architected timers support without creating an in-kernel VGIC, which will result in interrupts from the virtual timer going nowhere. To address this issue, move the architected timers initialization to the time when we run a VCPU for the first time, and then only initialize (and enable) the architected timers if we have a properly created and initialized in-kernel VGIC. When injecting interrupts from the virtual timer to the vgic, the current setup should ensure that this never calls an on-demand init of the VGIC, which is the only call path that could return an error from kvm_vgic_inject_irq(), so capture the return value and raise a warning if there's an error there. We also change the kvm_timer_init() function from returning an int to be a void function, since the function always succeeds. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6a..1c0772b340d8 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer) static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { + int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer->irq->irq, + timer->irq->level); + WARN_ON(ret); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) timer_disarm(timer); } -int kvm_timer_init(struct kvm *kvm) +void kvm_timer_enable(struct kvm *kvm) { - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + if (kvm->arch.timer.enabled) + return; + + /* + * There is a potential race here between VCPUs starting for the first + * time, which may be enabling the timer multiple times. That doesn't + * hurt though, because we're just setting a variable to the same + * variable that it already was. The important thing is that all + * VCPUs have the enabled variable set, before entering the guest, if + * the arch timers are enabled. + */ + if (timecounter && wqueue) kvm->arch.timer.enabled = 1; - } +} - return 0; +void kvm_timer_init(struct kvm *kvm) +{ + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } -- cgit v1.2.1