summaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 14:32:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 14:32:31 -0700
commit8a5de18239e418fe7b1f36504834689f754d8ccc (patch)
tree8d05ae77da1d4a8512b6052e2ba23571543666c7 /arch/arm/kvm
parent857b50f5d0eed113428c864e927289d8f5f2b864 (diff)
parent2df36a5dd6792870bef48f63bfca42055ea5b79c (diff)
downloadtalos-op-linux-8a5de18239e418fe7b1f36504834689f754d8ccc.tar.gz
talos-op-linux-8a5de18239e418fe7b1f36504834689f754d8ccc.zip
Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier: "The most obvious thing is the sizeable MMU changes to support 48bit VAs on arm64. Summary: - support for 48bit IPA and VA (EL2) - a number of fixes for devices mapped into guests - yet another VGIC fix for BE - a fix for CPU hotplug - a few compile fixes (disabled VGIC, strict mm checks)" [ I'm pulling directly from Marc at the request of Paolo Bonzini, whose backpack was stolen at Düsseldorf airport and will do new keys and rebuild his web of trust. - Linus ] * tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm: arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2 arm/arm64: KVM: map MMIO regions at creation time arm64: kvm: define PAGE_S2_DEVICE as read-only by default ARM: kvm: define PAGE_S2_DEVICE as read-only by default arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap arm/arm64: KVM: fix potential NULL dereference in user_mem_abort() arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages ARM: KVM: fix vgic-disabled build arm: kvm: fix CPU hotplug
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/arm.c5
-rw-r--r--arch/arm/kvm/interrupts_head.S7
-rw-r--r--arch/arm/kvm/mmu.c235
3 files changed, 207 insertions, 40 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 779605122f32..9e193c8a959e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
kvm_next_vmid++;
/* update vttbr to be used with the new vmid */
- pgd_phys = virt_to_phys(kvm->arch.pgd);
+ pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
kvm->arch.vttbr = pgd_phys | vmid;
@@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_STARTING:
case CPU_STARTING_FROZEN:
- cpu_init_hyp_mode(NULL);
+ if (__hyp_get_vectors() == hyp_default_vectors)
+ cpu_init_hyp_mode(NULL);
break;
}
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 98c8c5b9a87f..14d488388480 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -433,10 +433,17 @@ ARM_BE8(rev r10, r10 )
str r3, [r11, #VGIC_V2_CPU_HCR]
str r4, [r11, #VGIC_V2_CPU_VMCR]
str r5, [r11, #VGIC_V2_CPU_MISR]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
+ str r7, [r11, #VGIC_V2_CPU_EISR]
+ str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+ str r9, [r11, #VGIC_V2_CPU_ELRSR]
+#else
str r6, [r11, #VGIC_V2_CPU_EISR]
str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
str r8, [r11, #VGIC_V2_CPU_ELRSR]
str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+#endif
str r10, [r11, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index eea03069161b..57a403a5c22b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (kvm_pte_table_empty(start_pte))
+ if (kvm_pte_table_empty(kvm, start_pte))
clear_pmd_entry(kvm, pmd, start_addr);
}
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
}
} while (pmd++, addr = next, addr != end);
- if (kvm_pmd_table_empty(start_pmd))
+ if (kvm_pmd_table_empty(kvm, start_pmd))
clear_pud_entry(kvm, pud, start_addr);
}
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
}
} while (pud++, addr = next, addr != end);
- if (kvm_pud_table_empty(start_pud))
+ if (kvm_pud_table_empty(kvm, start_pud))
clear_pgd_entry(kvm, pgd, start_addr);
}
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
if (boot_hyp_pgd) {
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- free_pages((unsigned long)boot_hyp_pgd, pgd_order);
+ free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
- free_pages((unsigned long)hyp_pgd, pgd_order);
+ free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
}
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return 0;
}
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next;
+ int ret;
+
+ addr = start;
+ do {
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none_or_clear_bad(pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate Hyp pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ get_page(virt_to_page(pud));
+ kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ }
+
+ next = pud_addr_end(addr, end);
+ ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ if (ret)
+ return ret;
+ pfn += (next - addr) >> PAGE_SHIFT;
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+
static int __create_hyp_mappings(pgd_t *pgdp,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd;
unsigned long addr, next;
int err = 0;
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
end = PAGE_ALIGN(end);
do {
pgd = pgdp + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- if (pud_none_or_clear_bad(pud)) {
- pmd = pmd_alloc_one(NULL, addr);
- if (!pmd) {
- kvm_err("Cannot allocate Hyp pmd\n");
+ if (pgd_none(*pgd)) {
+ pud = pud_alloc_one(NULL, addr);
+ if (!pud) {
+ kvm_err("Cannot allocate Hyp pud\n");
err = -ENOMEM;
goto out;
}
- pud_populate(NULL, pud, pmd);
- get_page(virt_to_page(pud));
- kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
}
next = pgd_addr_end(addr, end);
- err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
if (err)
goto out;
pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
+ int ret;
pgd_t *pgd;
if (kvm->arch.pgd != NULL) {
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return -EINVAL;
}
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+ if (KVM_PREALLOC_LEVEL > 0) {
+ /*
+ * Allocate fake pgd for the page table manipulation macros to
+ * work. This is not used by the hardware and we have no
+ * alignment requirement for this allocation.
+ */
+ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+ GFP_KERNEL | __GFP_ZERO);
+ } else {
+ /*
+ * Allocate actual first-level Stage-2 page table used by the
+ * hardware for Stage-2 page table walks.
+ */
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ }
+
if (!pgd)
return -ENOMEM;
- memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+ ret = kvm_prealloc_hwpgd(kvm, pgd);
+ if (ret)
+ goto out_err;
+
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
-
return 0;
+out_err:
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(pgd);
+ else
+ free_pages((unsigned long)pgd, S2_PGD_ORDER);
+ return ret;
}
/**
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+ kvm_free_hwpgd(kvm);
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(kvm->arch.pgd);
+ else
+ free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
kvm->arch.pgd = NULL;
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd;
pgd = kvm->arch.pgd + pgd_index(addr);
- pud = pud_offset(pgd, addr);
+ if (WARN_ON(pgd_none(*pgd))) {
+ if (!cache)
+ return NULL;
+ pud = mmu_memory_cache_alloc(cache);
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ }
+
+ return pud_offset(pgd, addr);
+}
+
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = stage2_get_pud(kvm, cache, addr);
if (pud_none(*pud)) {
if (!cache)
return NULL;
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
pmd_t *pmd;
pte_t *pte, old_pte;
- /* Create stage-2 page table mapping - Level 1 */
+ /* Create stage-2 page table mapping - Levels 0 and 1 */
pmd = stage2_get_pmd(kvm, cache, addr);
if (!pmd) {
/*
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* @size: The size of the mapping
*/
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size)
+ phys_addr_t pa, unsigned long size, bool writable)
{
phys_addr_t addr, end;
int ret = 0;
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
- ret = mmu_topup_memory_cache(&cache, 2, 2);
+ if (writable)
+ kvm_set_s2pte_writable(&pte);
+
+ ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+
if (is_vm_hugetlb_page(vma)) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
up_read(&current->mm->mmap_sem);
/* We need minimum second+third level pages */
- ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+ ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
return ret;
@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
- mem_type == PAGE_S2_DEVICE);
+ pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
}
@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
+ /* Userspace should not be able to register out-of-bounds IPAs */
+ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void)
(unsigned long)phys_base);
}
- hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
- boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+ hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+ boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
- gpa_t gpa = old->base_gfn << PAGE_SHIFT;
- phys_addr_t size = old->npages << PAGE_SHIFT;
- if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
- spin_lock(&kvm->mmu_lock);
- unmap_stage2_range(kvm, gpa, size);
- spin_unlock(&kvm->mmu_lock);
- }
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- return 0;
+ hva_t hva = mem->userspace_addr;
+ hva_t reg_end = hva + mem->memory_size;
+ bool writable = !(mem->flags & KVM_MEM_READONLY);
+ int ret = 0;
+
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
+ return 0;
+
+ /*
+ * Prevent userspace from creating a memory region outside of the IPA
+ * space addressable by the KVM guest IPA space.
+ */
+ if (memslot->base_gfn + memslot->npages >=
+ (KVM_PHYS_SIZE >> PAGE_SHIFT))
+ return -EFAULT;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we can map
+ * any of them right now.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Mapping a read-only VMA is only allowed if the
+ * memory region is configured as read-only.
+ */
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
+ ret = -EPERM;
+ break;
+ }
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (vma->vm_flags & VM_PFNMAP) {
+ gpa_t gpa = mem->guest_phys_addr +
+ (vm_start - mem->userspace_addr);
+ phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
+ vm_start - vma->vm_start;
+
+ ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+ vm_end - vm_start,
+ writable);
+ if (ret)
+ break;
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+
+ if (ret) {
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ spin_unlock(&kvm->mmu_lock);
+ }
+ return ret;
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, gpa, size);
+ spin_unlock(&kvm->mmu_lock);
}
OpenPOWER on IntegriCloud