From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:56:33 +0900 Subject: KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start() When we tested KVM under memory pressure, with THP enabled on the host, we noticed that MMU notifier took a long time to invalidate huge pages. Since the invalidation was done with mmu_lock held, it not only wasted the CPU but also made the host harder to respond. This patch mitigates this by using kvm_handle_hva_range(). Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b3ce91c623e2..e2b1a159e5df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; - for (; start < end; start += PAGE_SIZE) - need_tlb_flush |= kvm_unmap_hva(kvm, start); + need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) -- cgit v1.2.1 From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:11 +0800 Subject: KVM: using get_fault_pfn to get the fault pfn Using get_fault_pfn to cleanup the code Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b1a159e5df..0fbbf2d21603 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -103,8 +103,8 @@ static bool largepages_enabled = true; static struct page *hwpoison_page; static pfn_t hwpoison_pfn; -struct page *fault_page; -pfn_t fault_pfn; +static struct page *fault_page; +static pfn_t fault_pfn; inline int kvm_is_mmio_pfn(pfn_t pfn) { @@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_hwpoison_pfn); -int is_fault_pfn(pfn_t pfn) -{ - return pfn == fault_pfn; -} -EXPORT_SYMBOL_GPL(is_fault_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == bad_pfn; @@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t get_fault_pfn(void) +pfn_t get_fault_pfn(void) { get_page(fault_page); return fault_pfn; } +EXPORT_SYMBOL_GPL(get_fault_pfn); int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) -- cgit v1.2.1 From ca0565f5736e67af3172d188577b57e303dd082a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:52 +0800 Subject: KVM: make bad_pfn static to kvm_main.c bad_pfn is not used out of kvm_main.c, so mark it static, also move it near hwpoison_pfn and fault_pfn Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0fbbf2d21603..f955eee92aa9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,6 +100,9 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +struct page *bad_page; +static pfn_t bad_pfn; + static struct page *hwpoison_page; static pfn_t hwpoison_pfn; @@ -2691,9 +2694,6 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; -struct page *bad_page; -pfn_t bad_pfn; - static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) { -- cgit v1.2.1 From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:56:16 +0800 Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f955eee92aa9..68dda513cd72 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) return rc == -EHWPOISON; } -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, - bool *async, bool write_fault, bool *writable) +static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable) { struct page *page[1]; int npages = 0; @@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return pfn; } -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) +pfn_t hva_to_pfn_atomic(unsigned long addr) { - return hva_to_pfn(kvm, addr, true, NULL, true, NULL); + return hva_to_pfn(addr, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); @@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); + return hva_to_pfn(addr, atomic, async, write_fault, writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn) +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false, NULL, true, NULL); + return hva_to_pfn(addr, false, NULL, true, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, -- cgit v1.2.1 From 4c088493c8d07e4e27bad53a99dcfdc14cdf45f8 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 18 Jul 2012 19:07:46 +0530 Subject: KVM: Note down when cpu relax intercepted or pause loop exited Noting pause loop exited vcpu or cpu relax intercepted helps in filtering right candidate to yield. Wrong selection of vcpu; i.e., a vcpu that just did a pl-exit or cpu relax intercepted may contribute to performance degradation. Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68dda513cd72..0892b75eeedd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -239,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) } vcpu->run = page_address(page); + kvm_vcpu_set_in_spin_loop(vcpu, false); + kvm_vcpu_set_dy_eligible(vcpu, false); + r = kvm_arch_vcpu_init(vcpu); if (r < 0) goto fail_free_run; @@ -1585,6 +1588,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) int pass; int i; + kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not * currently running, because it got preempted by something @@ -1610,6 +1614,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } } + kvm_vcpu_set_in_spin_loop(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit v1.2.1 From 06e48c510aa37f6e791602e6420422ea7071fe94 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Thu, 19 Jul 2012 15:17:52 +0530 Subject: KVM: Choose better candidate for directed yield Currently, on a large vcpu guests, there is a high probability of yielding to the same vcpu who had recently done a pause-loop exit or cpu relax intercepted. Such a yield can lead to the vcpu spinning again and hence degrade the performance. The patchset keeps track of the pause loop exit/cpu relax interception and gives chance to a vcpu which: (a) Has not done pause loop exit or cpu relax intercepted at all (probably he is preempted lock-holder) (b) Was skipped in last iteration because it did pause loop exit or cpu relax intercepted, and probably has become eligible now (next eligible lock holder) Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0892b75eeedd..1e10ebe1a370 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1579,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT +/* + * Helper that checks whether a VCPU is eligible for directed yield. + * Most eligible candidate to yield is decided by following heuristics: + * + * (a) VCPU which has not done pl-exit or cpu relax intercepted recently + * (preempted lock holder), indicated by @in_spin_loop. + * Set at the beiginning and cleared at the end of interception/PLE handler. + * + * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get + * chance last time (mostly it has become eligible now since we have probably + * yielded to lockholder in last iteration. This is done by toggling + * @dy_eligible each time a VCPU checked for eligibility.) + * + * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding + * to preempted lock-holder could result in wrong VCPU selection and CPU + * burning. Giving priority for a potential lock-holder increases lock + * progress. + * + * Since algorithm is based on heuristics, accessing another VCPU data without + * locking does not harm. It may result in trying to yield to same VCPU, fail + * and continue with next VCPU and so on. + */ +bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +{ + bool eligible; + + eligible = !vcpu->spin_loop.in_spin_loop || + (vcpu->spin_loop.in_spin_loop && + vcpu->spin_loop.dy_eligible); + + if (vcpu->spin_loop.in_spin_loop) + kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); + + return eligible; +} +#endif void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; @@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (waitqueue_active(&vcpu->wq)) continue; + if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) + continue; if (kvm_vcpu_yield_to(vcpu)) { kvm->last_boosted_vcpu = i; yielded = 1; @@ -1615,6 +1654,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } kvm_vcpu_set_in_spin_loop(me, false); + + /* Ensure vcpu is not eligible during next spinloop */ + kvm_vcpu_set_dy_eligible(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit v1.2.1