From 2e0908afaf03675d22e40ce45a66b8d2070214ac Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:17 +0200 Subject: KVM: PPC: RCU'ify the Book3s MMU So far we've been running all code without locking of any sort. This wasn't really an issue because I didn't see any parallel access to the shadow MMU code coming. But then I started to implement dirty bitmapping to MOL which has the video code in its own thread, so suddenly we had the dirty bitmap code run in parallel to the shadow mmu code. And with that came trouble. So I went ahead and made the MMU modifying functions as parallelizable as I could think of. I hope I didn't screw up too much RCU logic :-). If you know your way around RCU and locking and what needs to be done when, please take a look at this patch. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_mmu_hpte.c | 78 ++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 4868d4a7ebc5..b64389362446 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -60,68 +60,94 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; + spin_lock(&vcpu->arch.mmu_lock); + /* Add to ePTE list */ index = kvmppc_mmu_hash_pte(pte->pte.eaddr); - hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); /* Add to vPTE list */ index = kvmppc_mmu_hash_vpte(pte->pte.vpage); - hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); + hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); /* Add to vPTE_long list */ index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); - hlist_add_head(&pte->list_vpte_long, - &vcpu->arch.hpte_hash_vpte_long[index]); + hlist_add_head_rcu(&pte->list_vpte_long, + &vcpu->arch.hpte_hash_vpte_long[index]); + + spin_unlock(&vcpu->arch.mmu_lock); +} + +static void free_pte_rcu(struct rcu_head *head) +{ + struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); + kmem_cache_free(hpte_cache, pte); } static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { + /* pte already invalidated? */ + if (hlist_unhashed(&pte->list_pte)) + return; + dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", pte->pte.eaddr, pte->pte.vpage, pte->host_va); /* Different for 32 and 64 bit */ kvmppc_mmu_invalidate_pte(vcpu, pte); + spin_lock(&vcpu->arch.mmu_lock); + + hlist_del_init_rcu(&pte->list_pte); + hlist_del_init_rcu(&pte->list_vpte); + hlist_del_init_rcu(&pte->list_vpte_long); + + spin_unlock(&vcpu->arch.mmu_lock); + if (pte->pte.may_write) kvm_release_pfn_dirty(pte->pfn); else kvm_release_pfn_clean(pte->pfn); - hlist_del(&pte->list_pte); - hlist_del(&pte->list_vpte); - hlist_del(&pte->list_vpte_long); - vcpu->arch.hpte_cache_count--; - kmem_cache_free(hpte_cache, pte); + call_rcu(&pte->rcu_head, free_pte_rcu); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { struct hpte_cache *pte; - struct hlist_node *node, *tmp; + struct hlist_node *node; int i; + rcu_read_lock(); + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) + hlist_for_each_entry_rcu(pte, node, list, list_pte) if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) @@ -156,33 +182,41 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) + hlist_for_each_entry_rcu(pte, node, list, list_vpte) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } /* Flush with mask 0xffffff000 */ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { struct hlist_head *list; - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; list = &vcpu->arch.hpte_hash_vpte_long[ kvmppc_mmu_hash_vpte_long(guest_vp)]; + rcu_read_lock(); + /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); + + rcu_read_unlock(); } void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) @@ -206,21 +240,25 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { - struct hlist_node *node, *tmp; + struct hlist_node *node; struct hpte_cache *pte; int i; dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", vcpu->arch.hpte_cache_count, pa_start, pa_end); + rcu_read_lock(); + for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; - hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && (pte->pte.raddr < pa_end)) invalidate_pte(vcpu, pte); } + + rcu_read_unlock(); } struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) @@ -259,6 +297,8 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); + spin_lock_init(&vcpu->arch.mmu_lock); + return 0; } -- cgit v1.2.1 From 2d27fc5eac0205588cb59ae138062e5e96695276 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 29 Jul 2010 15:04:19 +0200 Subject: KVM: PPC: Add book3s_32 tlbie flush acceleration On Book3s_32 the tlbie instruction flushed effective addresses by the mask 0x0ffff000. This is pretty hard to reflect with a hash that hashes ~0xfff, so to speed up that target we should also keep a special hash around for it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_mmu_hpte.c | 40 +++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index b64389362446..02c64ab99c97 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -45,6 +45,12 @@ static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); } +static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr) +{ + return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE, + HPTEG_HASH_BITS_PTE_LONG); +} + static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) { return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); @@ -66,6 +72,11 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) index = kvmppc_mmu_hash_pte(pte->pte.eaddr); hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + /* Add to ePTE_long list */ + index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); + hlist_add_head_rcu(&pte->list_pte_long, + &vcpu->arch.hpte_hash_pte_long[index]); + /* Add to vPTE list */ index = kvmppc_mmu_hash_vpte(pte->pte.vpage); hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); @@ -99,6 +110,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_lock(&vcpu->arch.mmu_lock); hlist_del_init_rcu(&pte->list_pte); + hlist_del_init_rcu(&pte->list_pte_long); hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); @@ -150,10 +162,28 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) rcu_read_unlock(); } -void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { - u64 i; + struct hlist_head *list; + struct hlist_node *node; + struct hpte_cache *pte; + + /* Find the list of entries in the map */ + list = &vcpu->arch.hpte_hash_pte_long[ + kvmppc_mmu_hash_pte_long(guest_ea)]; + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} + +void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) +{ dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", vcpu->arch.hpte_cache_count, guest_ea, ea_mask); @@ -164,9 +194,7 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) kvmppc_mmu_pte_flush_page(vcpu, guest_ea); break; case 0x0ffff000: - /* 32-bit flush w/o segment, go through all possible segments */ - for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) - kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); + kvmppc_mmu_pte_flush_long(vcpu, guest_ea); break; case 0: /* Doing a complete flush -> start from scratch */ @@ -292,6 +320,8 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) /* init hpte lookup hashes */ kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); + kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long, + ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, -- cgit v1.2.1 From 4c4eea7769d0099ea09f9bdb7aed1cc61d57c9d6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Aug 2010 12:51:07 +0200 Subject: KVM: PPC: Add tracepoint for generic mmu map This patch moves the generic mmu map debugging over to tracepoints. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_mmu_hpte.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 02c64ab99c97..ac94bd992564 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -21,6 +21,7 @@ #include #include #include +#include "trace.h" #include #include @@ -66,6 +67,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; + trace_kvm_book3s_mmu_map(pte); + spin_lock(&vcpu->arch.mmu_lock); /* Add to ePTE list */ -- cgit v1.2.1 From 8696ee431233171b3c1cc82bae0193efc4fef2ac Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Aug 2010 12:55:19 +0200 Subject: KVM: PPC: Move pte invalidate debug code to tracepoint This patch moves the SPTE flush debug printk over to tracepoints. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_mmu_hpte.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index ac94bd992564..3397152a2b26 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -104,8 +104,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) if (hlist_unhashed(&pte->list_pte)) return; - dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", - pte->pte.eaddr, pte->pte.vpage, pte->host_va); + trace_kvm_book3s_mmu_invalidate(pte); /* Different for 32 and 64 bit */ kvmppc_mmu_invalidate_pte(vcpu, pte); -- cgit v1.2.1 From c60b4cf70127941e2f944a7971a7f6b3ecb367ac Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Aug 2010 13:40:30 +0200 Subject: KVM: PPC: Add tracepoints for generic spte flushes The different ways of flusing shadow ptes have their own debug prints which use stupid old printk. Let's move them to tracepoints, making them easier available, faster and possible to activate on demand Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_mmu_hpte.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 3397152a2b26..bd6a7676d0c8 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -31,14 +31,6 @@ #define PTE_SIZE 12 -/* #define DEBUG_MMU */ - -#ifdef DEBUG_MMU -#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) -#else -#define dprintk_mmu(a, ...) do { } while(0) -#endif - static struct kmem_cache *hpte_cache; static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) @@ -186,9 +178,7 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) { - dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", - vcpu->arch.hpte_cache_count, guest_ea, ea_mask); - + trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask); guest_ea &= ea_mask; switch (ea_mask) { @@ -251,8 +241,7 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) { - dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", - vcpu->arch.hpte_cache_count, guest_vp, vp_mask); + trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask); guest_vp &= vp_mask; switch(vp_mask) { @@ -274,8 +263,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) struct hpte_cache *pte; int i; - dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", - vcpu->arch.hpte_cache_count, pa_start, pa_end); + trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end); rcu_read_lock(); -- cgit v1.2.1 From e7c1d14e3bf40b87e6a3f68964b36dbb2c875c0f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Aug 2010 21:24:48 +0200 Subject: KVM: PPC: Make invalidation code more reliable There is a race condition in the pte invalidation code path where we can't be sure if a pte was invalidated already. So let's move the spin lock around to get rid of the race. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_mmu_hpte.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kvm/book3s_mmu_hpte.c') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index bd6a7676d0c8..79751d8dd131 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -92,10 +92,6 @@ static void free_pte_rcu(struct rcu_head *head) static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - /* pte already invalidated? */ - if (hlist_unhashed(&pte->list_pte)) - return; - trace_kvm_book3s_mmu_invalidate(pte); /* Different for 32 and 64 bit */ @@ -103,18 +99,24 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_lock(&vcpu->arch.mmu_lock); + /* pte already invalidated in between? */ + if (hlist_unhashed(&pte->list_pte)) { + spin_unlock(&vcpu->arch.mmu_lock); + return; + } + hlist_del_init_rcu(&pte->list_pte); hlist_del_init_rcu(&pte->list_pte_long); hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); - spin_unlock(&vcpu->arch.mmu_lock); - if (pte->pte.may_write) kvm_release_pfn_dirty(pte->pfn); else kvm_release_pfn_clean(pte->pfn); + spin_unlock(&vcpu->arch.mmu_lock); + vcpu->arch.hpte_cache_count--; call_rcu(&pte->rcu_head, free_pte_rcu); } -- cgit v1.2.1