From 120f0779c3ed89c25ef1db943feac8ed73a0d7f9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 1 Mar 2016 10:03:06 +0000 Subject: kvm arm: Move fake PGD handling to arch specific files Rearrange the code for fake pgd handling, which is applicable only for arm64. This will later be removed once we introduce the stage2 page table walker macros. Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 47 +++++++---------------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 58dbd5c439df..774d00b8066b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -684,47 +684,16 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!hwpgd) return -ENOMEM; - /* When the kernel uses more levels of page tables than the + /* + * When the kernel uses more levels of page tables than the * guest, we allocate a fake PGD and pre-populate it to point * to the next-level page table, which will be the real * initial page table pointed to by the VTTBR. - * - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for - * the PMD and the kernel will use folded pud. - * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD - * pages. */ - if (KVM_PREALLOC_LEVEL > 0) { - int i; - - /* - * Allocate fake pgd for the page table manipulation macros to - * work. This is not used by the hardware and we have no - * alignment requirement for this allocation. - */ - pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), - GFP_KERNEL | __GFP_ZERO); - - if (!pgd) { - kvm_free_hwpgd(hwpgd); - return -ENOMEM; - } - - /* Plug the HW PGD into the fake one. */ - for (i = 0; i < PTRS_PER_S2_PGD; i++) { - if (KVM_PREALLOC_LEVEL == 1) - pgd_populate(NULL, pgd + i, - (pud_t *)hwpgd + i * PTRS_PER_PUD); - else if (KVM_PREALLOC_LEVEL == 2) - pud_populate(NULL, pud_offset(pgd, 0) + i, - (pmd_t *)hwpgd + i * PTRS_PER_PMD); - } - } else { - /* - * Allocate actual first-level Stage-2 page table used by the - * hardware for Stage-2 page table walks. - */ - pgd = (pgd_t *)hwpgd; + pgd = kvm_setup_fake_pgd(hwpgd); + if (IS_ERR(pgd)) { + kvm_free_hwpgd(hwpgd); + return PTR_ERR(pgd); } kvm_clean_pgd(pgd); @@ -831,9 +800,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); kvm_free_hwpgd(kvm_get_hwpgd(kvm)); - if (KVM_PREALLOC_LEVEL > 0) - kfree(kvm->arch.pgd); - + kvm_free_fake_pgd(kvm->arch.pgd); kvm->arch.pgd = NULL; } -- cgit v1.2.3 From bbb3b6b35087539e75792b46e07b7ce5282d0979 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 1 Mar 2016 12:00:39 +0000 Subject: kvm-arm: Replace kvm_pmd_huge with pmd_thp_or_huge Both arm and arm64 now provides a helper, pmd_thp_or_huge() to check if the given pmd represents a huge page. Use that instead of our own custom check. Suggested-by: Mark Rutland Cc: Marc Zyngier Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 774d00b8066b..7837f0afa5a4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) -#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) #define kvm_pud_huge(_x) pud_huge(_x) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -115,7 +114,7 @@ static bool kvm_is_device_pfn(unsigned long pfn) */ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) { - if (!kvm_pmd_huge(*pmd)) + if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); @@ -177,7 +176,7 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); - VM_BUG_ON(kvm_pmd_huge(*pmd)); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); pte_free_kernel(NULL, pte_table); @@ -240,7 +239,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; pmd_clear(pmd); @@ -326,7 +325,7 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) + if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else stage2_flush_ptes(kvm, pmd, addr, next); @@ -1050,7 +1049,7 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) kvm_set_s2pmd_readonly(pmd); } else { @@ -1331,7 +1330,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) if (!pmd || pmd_none(*pmd)) /* Nothing there */ goto out; - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ *pmd = pmd_mkyoung(*pmd); pfn = pmd_pfn(*pmd); pfn_valid = true; @@ -1555,7 +1554,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ if (pmd_young(*pmd)) { *pmd = pmd_mkold(*pmd); return 1; @@ -1585,7 +1584,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ return pmd_young(*pmd); pte = pte_offset_kernel(pmd, gpa); -- cgit v1.2.3 From 77b5665141a9a7e69d2f685ee2f2a3698fd27397 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 14:06:47 +0000 Subject: kvm-arm: Remove kvm_pud_huge() Get rid of kvm_pud_huge() which falls back to pud_huge. Use pud_huge instead. Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7837f0afa5a4..d0c0ee92c378 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -45,8 +45,6 @@ static phys_addr_t hyp_idmap_vector; #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) -#define kvm_pud_huge(_x) pud_huge(_x) - #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) @@ -1077,7 +1075,7 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(kvm_pud_huge(*pud)); + BUG_ON(pud_huge(*pud)); stage2_wp_pmds(pud, addr, next); } } while (pud++, addr = next, addr != end); -- cgit v1.2.3 From 70fd19068573e449d47eb2daa69cf5db541ef4f5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 18:33:45 +0000 Subject: kvm-arm: Use explicit stage2 helper routines We have stage2 page table helpers for both arm and arm64. Switch to the stage2 helpers for routines that only deal with stage2 page table. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d0c0ee92c378..f93f717b5d8b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -319,9 +319,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, pmd_t *pmd; phys_addr_t next; - pmd = pmd_offset(pud, addr); + pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); @@ -337,11 +337,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, pud_t *pud; phys_addr_t next; - pud = pud_offset(pgd, addr); + pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { - if (pud_huge(*pud)) + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) kvm_flush_dcache_pud(*pud); else stage2_flush_pmds(kvm, pud, addr, next); @@ -357,9 +357,9 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { - next = kvm_pgd_addr_end(addr, end); + next = stage2_pgd_addr_end(addr, end); stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -807,16 +807,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); - if (WARN_ON(pgd_none(*pgd))) { + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + if (WARN_ON(stage2_pgd_none(*pgd))) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); - pgd_populate(NULL, pgd, pud); + stage2_pgd_populate(pgd, pud); get_page(virt_to_page(pgd)); } - return pud_offset(pgd, addr); + return stage2_pud_offset(pgd, addr); } static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -826,15 +826,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pmd_t *pmd; pud = stage2_get_pud(kvm, cache, addr); - if (pud_none(*pud)) { + if (stage2_pud_none(*pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); - pud_populate(NULL, pud, pmd); + stage2_pud_populate(pud, pmd); get_page(virt_to_page(pud)); } - return pmd_offset(pud, addr); + return stage2_pmd_offset(pud, addr); } static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache @@ -1042,10 +1042,10 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) pmd_t *pmd; phys_addr_t next; - pmd = pmd_offset(pud, addr); + pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) @@ -1070,12 +1070,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) pud_t *pud; phys_addr_t next; - pud = pud_offset(pgd, addr); + pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(pud_huge(*pud)); + BUG_ON(stage2_pud_huge(*pud)); stage2_wp_pmds(pud, addr, next); } } while (pud++, addr = next, addr != end); @@ -1092,7 +1092,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1104,8 +1104,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) if (need_resched() || spin_needbreak(&kvm->mmu_lock)) cond_resched_lock(&kvm->mmu_lock); - next = kvm_pgd_addr_end(addr, end); - if (pgd_present(*pgd)) + next = stage2_pgd_addr_end(addr, end); + if (stage2_pgd_present(*pgd)) stage2_wp_puds(pgd, addr, next); } while (pgd++, addr = next, addr != end); } -- cgit v1.2.3 From 64f324979210d4064adf64f19da40c125c9dd137 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 18:56:21 +0000 Subject: kvm-arm: Add explicit hyp page table modifiers We have common routines to modify hyp and stage2 page tables based on the 'kvm' parameter. For a smoother transition to using separate routines for each, duplicate the routines and modify the copy to work on hyp. Marks the forked routines with _hyp_ and gets rid of the kvm parameter which is no longer needed and is NULL for hyp. Also, gets rid of calls to kvm_tlb_flush_by_vmid_ipa() calls from the hyp versions. Uses explicit host page table accessors instead of the kvm_* page table helpers. Suggested-by: Christoffer Dall Cc: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 5 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f93f717b5d8b..af526f67022c 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -388,6 +388,100 @@ static void stage2_flush_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } +static void clear_hyp_pgd_entry(pgd_t *pgd) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); + pgd_clear(pgd); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_hyp_pud_entry(pud_t *pud) +{ + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_hyp_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (hyp_pte_table_empty(start_pte)) + clear_hyp_pmd_entry(pmd); +} + +static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* Hyp doesn't use huge pmds */ + if (!pmd_none(*pmd)) + unmap_hyp_ptes(pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + if (hyp_pmd_table_empty(start_pmd)) + clear_hyp_pud_entry(pud); +} + +static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + /* Hyp doesn't use huge puds */ + if (!pud_none(*pud)) + unmap_hyp_pmds(pud, addr, next); + } while (pud++, addr = next, addr != end); + + if (hyp_pud_table_empty(start_pud)) + clear_hyp_pgd_entry(pgd); +} + +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + /* + * We don't unmap anything from HYP, except at the hyp tear down. + * Hence, we don't have to invalidate the TLBs here. + */ + pgd = pgdp + pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) + unmap_hyp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + /** * free_boot_hyp_pgd - free HYP boot page tables * @@ -398,14 +492,14 @@ void free_boot_hyp_pgd(void) mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { - unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); - unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) - unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); mutex_unlock(&kvm_hyp_pgd_mutex); } @@ -430,9 +524,9 @@ void free_hyp_pgds(void) if (hyp_pgd) { for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; -- cgit v1.2.3 From 7a1c831ee8553b8199f21183942a46adf808f174 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 23 Mar 2016 12:08:02 +0000 Subject: kvm-arm: Add stage2 page table modifiers Now that the hyp page table is handled by different set of routines, rename the original shared routines to stage2 handlers. Also make explicit use of the stage2 page table helpers. unmap_range has been merged to existing unmap_stage2_range. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 97 +++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 53 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index af526f67022c..f2a6d9b8ca2d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -152,26 +152,26 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); - pgd_clear(pgd); + pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); + stage2_pgd_clear(pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); - pud_free(NULL, pud_table); + stage2_pud_free(pud_table); put_page(virt_to_page(pgd)); } -static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - VM_BUG_ON(pud_huge(*pud)); - pud_clear(pud); + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); + VM_BUG_ON(stage2_pud_huge(*pud)); + stage2_pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + stage2_pmd_free(pmd_table); put_page(virt_to_page(pud)); } -static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); @@ -201,7 +201,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure * the IO subsystem will never hit in the cache. */ -static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -223,19 +223,19 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (kvm_pte_table_empty(kvm, start_pte)) - clear_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(start_pte)) + clear_stage2_pmd_entry(kvm, pmd, start_addr); } -static void unmap_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, phys_addr_t addr, phys_addr_t end) { phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; - start_pmd = pmd = pmd_offset(pud, addr); + start_pmd = pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; @@ -247,57 +247,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, put_page(virt_to_page(pmd)); } else { - unmap_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(kvm, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); - if (kvm_pmd_table_empty(kvm, start_pmd)) - clear_pud_entry(kvm, pud, start_addr); + if (stage2_pmd_table_empty(start_pmd)) + clear_stage2_pud_entry(kvm, pud, start_addr); } -static void unmap_puds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; - start_pud = pud = pud_offset(pgd, addr); + start_pud = pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { - if (pud_huge(*pud)) { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) { pud_t old_pud = *pud; - pud_clear(pud); + stage2_pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - kvm_flush_dcache_pud(old_pud); - put_page(virt_to_page(pud)); } else { - unmap_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(kvm, pud, addr, next); } } } while (pud++, addr = next, addr != end); - if (kvm_pud_table_empty(kvm, start_pud)) - clear_pgd_entry(kvm, pgd, start_addr); + if (stage2_pud_table_empty(start_pud)) + clear_stage2_pgd_entry(kvm, pgd, start_addr); } - -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - phys_addr_t start, u64 size) +/** + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range + * @kvm: The VM pointer + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before + * destroying the VM), otherwise another faulting VCPU may come in and mess + * with things behind our backs. + */ +static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; - pgd = pgdp + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { - next = kvm_pgd_addr_end(addr, end); - if (!pgd_none(*pgd)) - unmap_puds(kvm, pgd, addr, next); + next = stage2_pgd_addr_end(addr, end); + if (!stage2_pgd_none(*pgd)) + unmap_stage2_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -792,22 +799,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -/** - * unmap_stage2_range -- Clear stage2 page table entries to unmap a range - * @kvm: The VM pointer - * @start: The intermediate physical base address of the range to unmap - * @size: The size of the area to unmap - * - * Clear a range of stage-2 mappings, lowering the various ref-counts. Must - * be called while holding mmu_lock (unless for freeing the stage2 pgd before - * destroying the VM), otherwise another faulting VCPU may come in and mess - * with things behind our backs. - */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) -{ - unmap_range(kvm, kvm->arch.pgd, start, size); -} - static void stage2_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { -- cgit v1.2.3 From 8684e701df5a3f52e3ff580128cbd5d71fcd5f5c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:14:25 +0000 Subject: kvm-arm: Cleanup kvm_* wrappers Now that we have switched to explicit page table routines, get rid of the obsolete kvm_* wrappers. Also, kvm_tlb_flush_vmid_by_ipa is now called only on stage2 page tables, hence get rid of the redundant check. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/include/asm/kvm_mmu.h | 16 ---------------- arch/arm/kvm/mmu.c | 9 +-------- arch/arm64/include/asm/kvm_mmu.h | 24 ------------------------ 3 files changed, 1 insertion(+), 48 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5522cdd9dedf..a7736d53a408 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -136,22 +136,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; } - -/* Open coded p*d_addr_end that can deal with 64bit addresses */ -#define kvm_pgd_addr_end(addr, end) \ -({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) - -#define kvm_pud_addr_end(addr,end) (end) - -#define kvm_pmd_addr_end(addr, end) \ -({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) - -#define kvm_pgd_index(addr) pgd_index(addr) - static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f2a6d9b8ca2d..d3fa96e0f709 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -66,14 +66,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - /* - * This function also gets called when dealing with HYP page - * tables. As HYP doesn't have an associated struct kvm (and - * the HYP page tables are fairly static), we don't do - * anything there. - */ - if (kvm) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index edf3c62c660e..a3c0d05311ef 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -153,13 +153,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; } - -#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) -#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) -#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) - -#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) - static inline void *kvm_get_hwpgd(struct kvm *kvm) { pgd_t *pgd = kvm->arch.pgd; @@ -232,23 +225,6 @@ static inline bool kvm_page_empty(void *ptr) return page_count(ptr_page) == 1; } -#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) - -#ifdef __PAGETABLE_PMD_FOLDED -#define kvm_pmd_table_empty(kvm, pmdp) (0) -#else -#define kvm_pmd_table_empty(kvm, pmdp) \ - (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) -#endif - -#ifdef __PAGETABLE_PUD_FOLDED -#define kvm_pud_table_empty(kvm, pudp) (0) -#else -#define kvm_pud_table_empty(kvm, pudp) \ - (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) -#endif - - #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) #ifdef __PAGETABLE_PMD_FOLDED -- cgit v1.2.3 From 9163ee23e72333e4712f7edd1a49aef06eae6304 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:01:21 +0000 Subject: kvm-arm: Cleanup stage2 pgd handling Now that we don't have any fake page table levels for arm64, cleanup the common code to get rid of the dead code. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/include/asm/kvm_mmu.h | 19 ------------------- arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/mmu.c | 37 ++++++------------------------------- arch/arm64/include/asm/kvm_mmu.h | 18 ------------------ 4 files changed, 7 insertions(+), 69 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index a7736d53a408..6344ea0ad624 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -150,25 +150,6 @@ static inline bool kvm_page_empty(void *ptr) #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) #define hyp_pud_table_empty(pudp) false -static inline void *kvm_get_hwpgd(struct kvm *kvm) -{ - return kvm->arch.pgd; -} - -static inline unsigned int kvm_get_hwpgd_size(void) -{ - return PTRS_PER_S2_PGD * sizeof(pgd_t); -} - -static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) -{ - return hwpgd; -} - -static inline void kvm_free_fake_pgd(pgd_t *pgd) -{ -} - struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dded1b763c16..be4b6394a062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -448,7 +448,7 @@ static void update_vttbr(struct kvm *kvm) kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; /* update vttbr to be used with the new vmid */ - pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); + pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d3fa96e0f709..42eefab3e8e1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -43,6 +43,7 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -736,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } -/* Free the HW pgd, one page at a time */ -static void kvm_free_hwpgd(void *hwpgd) -{ - free_pages_exact(hwpgd, kvm_get_hwpgd_size()); -} - -/* Allocate the HW PGD, making sure that each page gets its own refcount */ -static void *kvm_alloc_hwpgd(void) -{ - unsigned int size = kvm_get_hwpgd_size(); - - return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); -} - /** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. @@ -764,29 +751,17 @@ static void *kvm_alloc_hwpgd(void) int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; - void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } - hwpgd = kvm_alloc_hwpgd(); - if (!hwpgd) + /* Allocate the HW PGD, making sure that each page gets its own refcount */ + pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + if (!pgd) return -ENOMEM; - /* - * When the kernel uses more levels of page tables than the - * guest, we allocate a fake PGD and pre-populate it to point - * to the next-level page table, which will be the real - * initial page table pointed to by the VTTBR. - */ - pgd = kvm_setup_fake_pgd(hwpgd); - if (IS_ERR(pgd)) { - kvm_free_hwpgd(hwpgd); - return PTR_ERR(pgd); - } - kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; @@ -874,8 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - kvm_free_hwpgd(kvm_get_hwpgd(kvm)); - kvm_free_fake_pgd(kvm->arch.pgd); + /* Free the HW pgd, one page at a time */ + free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); kvm->arch.pgd = NULL; } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e3fee0acd1a2..249c4fc9c5f6 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -141,24 +141,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; } -static inline void *kvm_get_hwpgd(struct kvm *kvm) -{ - return kvm->arch.pgd; -} - -static inline unsigned int kvm_get_hwpgd_size(void) -{ - return PTRS_PER_S2_PGD * sizeof(pgd_t); -} - -static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) -{ - return hwpgd; -} - -static inline void kvm_free_fake_pgd(pgd_t *pgd) -{ -} static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); -- cgit v1.2.3 From d4b9e0790aa764c0b01e18d4e8d33e93ba36d51f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 28 Apr 2016 16:16:31 +0100 Subject: arm/arm64: KVM: Enforce Break-Before-Make on Stage-2 page tables The ARM architecture mandates that when changing a page table entry from a valid entry to another valid entry, an invalid entry is first written, TLB invalidated, and only then the new entry being written. The current code doesn't respect this, directly writing the new entry and only then invalidating TLBs. Let's fix it up. Cc: Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 42eefab3e8e1..74b5d199f6b7 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -910,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); old_pmd = *pmd; - kvm_set_pmd(pmd, *new_pmd); - if (pmd_present(old_pmd)) + if (pmd_present(old_pmd)) { + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); - else + } else { get_page(virt_to_page(pmd)); + } + + kvm_set_pmd(pmd, *new_pmd); return 0; } @@ -963,12 +966,14 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; - kvm_set_pte(pte, *new_pte); - if (pte_present(old_pte)) + if (pte_present(old_pte)) { + kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); - else + } else { get_page(virt_to_page(pte)); + } + kvm_set_pte(pte, *new_pte); return 0; } -- cgit v1.2.3 From 06485053244480f5f403d8f89b8617bd7d549113 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 13 Apr 2016 17:57:37 +0100 Subject: kvm: arm64: Enable hardware updates of the Access Flag for Stage 2 page tables The ARMv8.1 architecture extensions introduce support for hardware updates of the access and dirty information in page table entries. With VTCR_EL2.HA enabled (bit 21), when the CPU accesses an IPA with the PTE_AF bit cleared in the stage 2 page table, instead of raising an Access Flag fault to EL2 the CPU sets the actual page table entry bit (10). To ensure that kernel modifications to the page table do not inadvertently revert a bit set by hardware updates, certain Stage 2 software pte/pmd operations must be performed atomically. The main user of the AF bit is the kvm_age_hva() mechanism. The kvm_age_hva_handler() function performs a "test and clear young" action on the pte/pmd. This needs to be atomic in respect of automatic hardware updates of the AF bit. Since the AF bit is in the same position for both Stage 1 and Stage 2, the patch reuses the existing ptep_test_and_clear_young() functionality if __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG is defined. Otherwise, the existing pte_young/pte_mkold mechanism is preserved. The kvm_set_s2pte_readonly() (and the corresponding pmd equivalent) have to perform atomic modifications in order to avoid a race with updates of the AF bit. The arm64 implementation has been re-written using exclusives. Currently, kvm_set_s2pte_writable() (and pmd equivalent) take a pointer argument and modify the pte/pmd in place. However, these functions are only used on local variables rather than actual page table entries, so it makes more sense to follow the pte_mkwrite() approach for stage 1 attributes. The change to kvm_s2pte_mkwrite() makes it clear that these functions do not modify the actual page table entries. The (pte|pmd)_mkyoung() uses on Stage 2 entries (setting the AF bit explicitly) do not need to be modified since hardware updates of the dirty status are not supported by KVM, so there is no possibility of losing such information. Signed-off-by: Catalin Marinas Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 10 +++++---- arch/arm/kvm/mmu.c | 46 +++++++++++++++++++++++++--------------- arch/arm64/include/asm/kvm_arm.h | 2 ++ arch/arm64/include/asm/kvm_mmu.h | 27 +++++++++++++++++------ arch/arm64/include/asm/pgtable.h | 13 ++++++++---- arch/arm64/kvm/hyp/s2-setup.c | 8 +++++++ 6 files changed, 74 insertions(+), 32 deletions(-) (limited to 'arch/arm/kvm/mmu.c') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 6344ea0ad624..ef0b276d97fc 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -106,14 +106,16 @@ static inline void kvm_clean_pte(pte_t *pte) clean_pte_table(pte); } -static inline void kvm_set_s2pte_writable(pte_t *pte) +static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { - pte_val(*pte) |= L_PTE_S2_RDWR; + pte_val(pte) |= L_PTE_S2_RDWR; + return pte; } -static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) { - pmd_val(*pmd) |= L_PMD_S2_RDWR; + pmd_val(pmd) |= L_PMD_S2_RDWR; + return pmd; } static inline void kvm_set_s2pte_readonly(pte_t *pte) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 74b5d199f6b7..783e5ff0b32e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -977,6 +977,27 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; } +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); + return 1; + } + return 0; +} +#else +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + return __ptep_test_and_clear_young(pte); +} +#endif + +static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pmd); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * @@ -1000,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); if (writable) - kvm_set_s2pte_writable(&pte); + pte = kvm_s2pte_mkwrite(pte); ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, KVM_NR_MEM_OBJS); @@ -1342,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); if (writable) { - kvm_set_s2pmd_writable(&new_pmd); + new_pmd = kvm_s2pmd_mkwrite(new_pmd); kvm_set_pfn_dirty(pfn); } coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); @@ -1351,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pte_t new_pte = pfn_pte(pfn, mem_type); if (writable) { - kvm_set_s2pte_writable(&new_pte); + new_pte = kvm_s2pte_mkwrite(new_pte); kvm_set_pfn_dirty(pfn); mark_page_dirty(kvm, gfn); } @@ -1370,6 +1391,8 @@ out_unlock: * Resolve the access fault by making the page young again. * Note that because the faulting entry is guaranteed not to be * cached in the TLB, we don't need to invalidate anything. + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), + * so there is no need for atomic (pte|pmd)_mkyoung operations. */ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { @@ -1610,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ - if (pmd_young(*pmd)) { - *pmd = pmd_mkold(*pmd); - return 1; - } - - return 0; - } + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ + return stage2_pmdp_test_and_clear_young(pmd); pte = pte_offset_kernel(pmd, gpa); if (pte_none(*pte)) return 0; - if (pte_young(*pte)) { - *pte = pte_mkold(*pte); /* Just a page... */ - return 1; - } - - return 0; + return stage2_ptep_test_and_clear_young(pte); } static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c6cbb361bbcf..ffde15fed3e1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -111,6 +111,8 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1 << 31) +#define VTCR_EL2_HD (1 << 22) +#define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK #define VTCR_EL2_TG0_MASK TCR_TG0_MASK #define VTCR_EL2_TG0_4K TCR_TG0_4K diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 249c4fc9c5f6..844fe5d5ff44 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -111,19 +111,32 @@ static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} static inline void kvm_clean_pte_entry(pte_t *pte) {} -static inline void kvm_set_s2pte_writable(pte_t *pte) +static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { - pte_val(*pte) |= PTE_S2_RDWR; + pte_val(pte) |= PTE_S2_RDWR; + return pte; } -static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) { - pmd_val(*pmd) |= PMD_S2_RDWR; + pmd_val(pmd) |= PMD_S2_RDWR; + return pmd; } static inline void kvm_set_s2pte_readonly(pte_t *pte) { - pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; + pteval_t pteval; + unsigned long tmp; + + asm volatile("// kvm_set_s2pte_readonly\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " and %0, %0, %3 // clear PTE_S2_RDWR\n" + " orr %0, %0, %4 // set PTE_S2_RDONLY\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte)) + : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY)); } static inline bool kvm_s2pte_readonly(pte_t *pte) @@ -133,12 +146,12 @@ static inline bool kvm_s2pte_readonly(pte_t *pte) static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) { - pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; + kvm_set_s2pte_readonly((pte_t *)pmd); } static inline bool kvm_s2pmd_readonly(pmd_t *pmd) { - return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; + return kvm_s2pte_readonly((pte_t *)pmd); } static inline bool kvm_page_empty(void *ptr) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dda4aa9ba3f8..f1d5afdb12db 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -532,14 +532,12 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) * Atomic pte/pmd modifications. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) +static inline int __ptep_test_and_clear_young(pte_t *ptep) { pteval_t pteval; unsigned int tmp, res; - asm volatile("// ptep_test_and_clear_young\n" + asm volatile("// __ptep_test_and_clear_young\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" @@ -552,6 +550,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, return res; } +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) +{ + return __ptep_test_and_clear_young(ptep); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index bcbe761a5a3d..b81f4091c909 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -65,6 +65,14 @@ u32 __hyp_text __init_stage2_translation(void) */ val |= 64 - (parange > 40 ? 40 : parange); + /* + * Check the availability of Hardware Access Flag / Dirty Bit + * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. + */ + tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp) + val |= VTCR_EL2_HA; + /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS * bit in VTCR_EL2. -- cgit v1.2.3