summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig4
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c6
-rw-r--r--arch/arm64/kvm/guest.c28
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/kvm/hyp/entry.S52
-rw-r--r--arch/arm64/kvm/hyp/switch.c156
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c35
-rw-r--r--arch/arm64/kvm/hyp/tlb.c81
-rw-r--r--arch/arm64/kvm/inject_fault.c74
-rw-r--r--arch/arm64/kvm/reset.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c31
-rw-r--r--arch/arm64/kvm/sys_regs.h17
-rw-r--r--arch/arm64/kvm/va_layout.c68
14 files changed, 418 insertions, 142 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..a475c68cbfec 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,8 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET && MULTIUSER
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -39,6 +41,8 @@ config KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
+ select TASKSTATS
+ select TASK_DELAY_ACCT
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ac1a64d2fb9..5ffbdc39e780 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 43487f035385..7a7e425616b5 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
- unsigned long mdscr;
+ unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ /* Write mdcr_el2 changes since vcpu_load on VHE systems */
+ if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..2bd92301d32f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,6 +34,10 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(halt_successful_poll),
+ VCPU_STAT(halt_attempted_poll),
+ VCPU_STAT(halt_poll_invalid),
+ VCPU_STAT(halt_wakeup),
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
@@ -43,11 +47,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static bool core_reg_offset_is_vreg(u64 off)
{
return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
@@ -712,6 +711,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+ /*
+ * We never return a pending ext_dabt here because we deliver it to
+ * the virtual CPU directly when setting the event and it's no longer
+ * 'pending' at this point.
+ */
+
return 0;
}
@@ -720,6 +725,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
+ bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr) {
if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
@@ -733,6 +739,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
kvm_inject_vabt(vcpu);
}
+ if (ext_dabt_pending)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
return 0;
}
@@ -858,6 +867,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -878,6 +890,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -898,6 +913,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 706cca23f0d2..aacfc55de44c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -11,8 +11,6 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-#include <kvm/arm_psci.h>
-
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/kvm_asm.h>
@@ -22,6 +20,8 @@
#include <asm/debug-monitors.h>
#include <asm/traps.h>
+#include <kvm/arm_hypercalls.h>
+
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..d22d0534dd60 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -22,7 +22,12 @@
.text
.pushsection .hyp.text, "ax"
+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +37,8 @@
.endm
.macro restore_callee_saved_regs ctxt
+ // We require \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -44,11 +51,11 @@
* u64 __guest_enter(struct kvm_vcpu *vcpu,
* struct kvm_cpu_context *host_ctxt);
*/
-ENTRY(__guest_enter)
+SYM_FUNC_START(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
- // x18: guest context
+ // x29: guest context
// Store the host regs
save_callee_saved_regs x1
@@ -67,38 +74,34 @@ alternative_else_nop_endif
ret
1:
- add x18, x0, #VCPU_CONTEXT
+ add x29, x0, #VCPU_CONTEXT
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
- ptrauth_switch_to_guest x18, x0, x1, x2
+ ptrauth_switch_to_guest x29, x0, x1, x2
// Restore guest regs x0-x17
- ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
- ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
- ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
- ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
- ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
- ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
- ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
- ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
- ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
- // Restore guest regs x19-x29, lr
- restore_callee_saved_regs x18
-
- // Restore guest reg x18
- ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+ ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x18-x29, lr
+ restore_callee_saved_regs x29
// Do not touch any register after this!
eret
sb
-ENDPROC(__guest_enter)
-ENTRY(__guest_exit)
+SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// x0: return code
// x1: vcpu
// x2-x29,lr: vcpu regs
@@ -114,7 +117,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1
- // Store the guest regs x0-x1 and x4-x18
+ // Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +126,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
- str x18, [x1, #CPU_XREG_OFFSET(18)]
- // Store the guest regs x19-x29, lr
+ // Store the guest regs x18-x29, lr
save_callee_saved_regs x1
get_host_ctxt x2, x3
@@ -192,4 +194,4 @@ abort_guest_exit_end:
msr spsr_el2, x4
orr x0, x0, x5
1: ret
-ENDPROC(__guest_exit)
+SYM_FUNC_END(__guest_enter)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index adaf266d8de8..dfe8dd172512 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -12,7 +12,7 @@
#include <kvm/arm_psci.h>
-#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
@@ -28,7 +28,15 @@
/* Check whether the FP regs were dirtied while in the host-side run loop: */
static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ /*
+ * When the system doesn't support FP/SIMD, we cannot rely on
+ * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+ * abort on the very first access to FP and thus we should never
+ * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+ * trap the accesses.
+ */
+ if (!system_supports_fpsimd() ||
+ vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
@@ -118,12 +126,29 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
}
write_sysreg(val, cptr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
}
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
u64 hcr = vcpu->arch.hcr_el2;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
write_sysreg(hcr, hcr_el2);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
@@ -141,11 +166,11 @@ static void deactivate_traps_vhe(void)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
/*
- * ARM erratum 1165522 requires the actual execution of the above
- * before we can switch to the EL2/EL0 translation regime used by
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL2/EL0 translation regime used by
* the host.
*/
- asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -156,6 +181,23 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps_nvhe (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
__deactivate_traps_common();
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
@@ -174,8 +216,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE)
- vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
if (has_vhe())
deactivate_traps_vhe();
@@ -229,20 +273,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
}
}
-static bool __hyp_text __true_value(void)
-{
- return true;
-}
-
-static bool __hyp_text __false_value(void)
-{
- return false;
-}
-
-static hyp_alternate_select(__check_arm_834220,
- __false_value, __true_value,
- ARM64_WORKAROUND_834220);
-
static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
@@ -264,7 +294,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
tmp = read_sysreg(par_el1);
write_sysreg(par, par_el1);
- if (unlikely(tmp & 1))
+ if (unlikely(tmp & SYS_PAR_EL1_F))
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
@@ -298,7 +328,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
- (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ (cpus_have_const_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
@@ -393,6 +424,61 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
return true;
}
+static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -412,6 +498,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
/*
* We trap the first access to the FP/SIMD to save the host context
* and restore the guest context lazily.
@@ -605,7 +696,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
*/
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- dsb(sy);
+ pmr_sync();
}
vcpu = kern_hyp_va(vcpu);
@@ -618,18 +709,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_save_state_nvhe(host_ctxt);
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
/*
* We must restore the 32-bit state before the sysregs, thanks
* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
__debug_switch_to_guest(vcpu);
__set_guest_arch_workaround_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..7672a978926c 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+
+ if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with __activate_traps_nvhe().
+ */
+ write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
@@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with __deactivate_traps_nvhe().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
+
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index d49a14497715..92f560e3e1aa 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -23,10 +23,10 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
local_irq_save(cxt->flags);
- if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
/*
- * For CPUs that are affected by ARM erratum 1165522, we
- * cannot trust stage-1 to be in a correct state at that
+ * For CPUs that are affected by ARM errata 1165522 or 1530923,
+ * we cannot trust stage-1 to be in a correct state at that
* point. Since we do not want to force a full load of the
* vcpu state, we prevent the EL1 page-table walker to
* allocate new TLBs. This is done by setting the EPD bits
@@ -63,14 +63,34 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
__load_guest_stage2(kvm);
isb();
}
-static hyp_alternate_select(__tlb_switch_to_guest,
- __tlb_switch_to_guest_nvhe,
- __tlb_switch_to_guest_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
+static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
+ struct tlb_inv_context *cxt)
+{
+ if (has_vhe())
+ __tlb_switch_to_guest_vhe(kvm, cxt);
+ else
+ __tlb_switch_to_guest_nvhe(kvm, cxt);
+}
static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
@@ -83,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
isb();
- if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, SYS_TCR);
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
@@ -96,12 +116,23 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
}
-static hyp_alternate_select(__tlb_switch_to_host,
- __tlb_switch_to_host_nvhe,
- __tlb_switch_to_host_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
+static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
+ struct tlb_inv_context *cxt)
+{
+ if (has_vhe())
+ __tlb_switch_to_host_vhe(kvm, cxt);
+ else
+ __tlb_switch_to_host_nvhe(kvm, cxt);
+}
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
@@ -111,7 +142,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest()(kvm, &cxt);
+ __tlb_switch_to_guest(kvm, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -154,7 +185,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
if (!has_vhe() && icache_is_vpipt())
__flush_icache_all();
- __tlb_switch_to_host()(kvm, &cxt);
+ __tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -165,13 +196,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest()(kvm, &cxt);
+ __tlb_switch_to_guest(kvm, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host()(kvm, &cxt);
+ __tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -180,19 +211,31 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest()(kvm, &cxt);
+ __tlb_switch_to_guest(kvm, &cxt);
__tlbi(vmalle1);
dsb(nsh);
isb();
- __tlb_switch_to_host()(kvm, &cxt);
+ __tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
- asm volatile("ic ialluis" : : );
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
dsb(ish);
}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..6aafc2825c1c 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -14,9 +14,6 @@
#include <asm/kvm_emulate.h>
#include <asm/esr.h>
-#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
- PSR_I_BIT | PSR_D_BIT)
-
#define CURRENT_EL_SP_EL0_VECTOR 0x0
#define CURRENT_EL_SP_ELx_VECTOR 0x200
#define LOWER_EL_AArch64_VECTOR 0x400
@@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
+/*
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_N_BIT);
+ new |= (old & PSR_Z_BIT);
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+ new |= (old & PSR_DIT_BIT);
+
+ // PSTATE.UAO is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D5-2579.
+
+ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page D5-2578.
+ new |= (old & PSR_PAN_BIT);
+ if (!(sctlr & SCTLR_EL1_SPAN))
+ new |= PSR_PAN_BIT;
+
+ // PSTATE.SS is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D2-2452.
+
+ // PSTATE.IL is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D1-2306.
+
+ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D13-3258
+ if (sctlr & SCTLR_ELx_DSSBS)
+ new |= PSR_SSBS_BIT;
+
+ // PSTATE.BTYPE is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+ new |= PSR_D_BIT;
+ new |= PSR_A_BIT;
+ new |= PSR_I_BIT;
+ new |= PSR_F_BIT;
+
+ new |= PSR_MODE_EL1h;
+
+ return new;
+}
+
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
- *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
/*
@@ -109,7 +169,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
/**
* kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the data abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
@@ -125,7 +185,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
/**
* kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the prefetch abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f4a8ae918827..30b7ea680f66 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -204,7 +204,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
return true;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.sve_state);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2071260a275b..3e909b117f0c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
}
@@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
kvm_vcpu_pmu_restore_guest(vcpu);
@@ -1420,7 +1424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_SANITISED(ID_ISAR4_EL1),
ID_SANITISED(ID_ISAR5_EL1),
ID_SANITISED(ID_MMFR4_EL1),
- ID_UNALLOCATED(2,7),
+ ID_SANITISED(ID_ISAR6_EL1),
/* CRm=3 */
ID_SANITISED(MVFR0_EL1),
@@ -2094,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
WARN_ON(1);
}
- kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
- cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest CP%d access at: %08lx [%08lx]\n",
+ cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
@@ -2229,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
NULL, 0);
}
+static bool is_imp_def_sys_reg(struct sys_reg_params *params)
+{
+ // See ARM DDI 0487E.a, section D12.3.2
+ return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
+}
+
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
@@ -2244,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
if (likely(r)) {
perform_access(vcpu, params, r);
+ } else if (is_imp_def_sys_reg(params)) {
+ kvm_inject_undefined(vcpu);
} else {
- kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
- *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+ *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
return 1;
@@ -2356,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
return NULL;
+ if (!index_to_params(id, &params))
+ return NULL;
+
table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg_by_id(id, &params, table, num);
+ r = find_reg(&params, table, num);
if (!r)
r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9bca0312d798..5a6fc30f5989 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -62,11 +62,24 @@ struct sys_reg_desc {
#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
-static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+static __printf(2, 3)
+inline void print_sys_reg_msg(const struct sys_reg_params *p,
+ char *fmt, ...)
{
+ va_list va;
+
+ va_start(va, fmt);
/* Look, we even formatted it for you to paste into the table! */
- kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ &(struct va_format){ fmt, &va },
p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ va_end(va);
+}
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+ /* GCC warns on an empty format string */
+ print_sys_reg_msg(p, "%s", "");
}
static inline bool ignore_write(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index acd8084f1f2c..a4f48c1ac28c 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -13,52 +13,46 @@
#include <asm/kvm_mmu.h>
/*
- * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ * The LSB of the HYP VA tag
*/
static u8 tag_lsb;
/*
- * The random hyp VA tag value with the region bit if hyp randomization is used
+ * The HYP VA tag value with the region bit
*/
static u64 tag_val;
static u64 va_mask;
-static void compute_layout(void)
+/*
+ * We want to generate a hyp VA with the following format (with V ==
+ * vabits_actual):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ * |--------- tag_val -----------|----- va_mask ---|
+ *
+ * which does not conflict with the idmap regions.
+ */
+__init void kvm_compute_layout(void)
{
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
u64 hyp_va_msb;
- int kva_msb;
/* Where is my RAM region? */
- hyp_va_msb = idmap_addr & BIT(VA_BITS - 1);
- hyp_va_msb ^= BIT(VA_BITS - 1);
+ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1);
+ hyp_va_msb ^= BIT(vabits_actual - 1);
- kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
(u64)(high_memory - 1));
- if (kva_msb == (VA_BITS - 1)) {
- /*
- * No space in the address, let's compute the mask so
- * that it covers (VA_BITS - 1) bits, and the region
- * bit. The tag stays set to zero.
- */
- va_mask = BIT(VA_BITS - 1) - 1;
- va_mask |= hyp_va_msb;
- } else {
- /*
- * We do have some free bits to insert a random tag.
- * Hyp VAs are now created from kernel linear map VAs
- * using the following formula (with V == VA_BITS):
- *
- * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
- * ---------------------------------------------------------
- * | 0000000 | hyp_va_msb | random tag | kern linear VA |
- */
- tag_lsb = kva_msb;
- va_mask = GENMASK_ULL(tag_lsb - 1, 0);
- tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
- tag_val |= hyp_va_msb;
- tag_val >>= tag_lsb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = hyp_va_msb;
+
+ if (tag_lsb != (vabits_actual - 1)) {
+ /* We have some free bits to insert a random tag. */
+ tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
+ tag_val >>= tag_lsb;
}
static u32 compute_instruction(int n, u32 rd, u32 rn)
@@ -110,9 +104,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
BUG_ON(nr_inst != 5);
- if (!has_vhe() && !va_mask)
- compute_layout();
-
for (i = 0; i < nr_inst; i++) {
u32 rd, rn, insn, oinsn;
@@ -120,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
* VHE doesn't need any address translation, let's NOP
* everything.
*
- * Alternatively, if we don't have any spare bits in
- * the address, NOP everything after masking that
- * kernel VA.
+ * Alternatively, if the tag is zero (because the layout
+ * dictates it and we don't have any spare bits in the
+ * address), NOP everything after masking the kernel VA.
*/
- if (has_vhe() || (!tag_lsb && i > 0)) {
+ if (has_vhe() || (!tag_val && i > 0)) {
updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
continue;
}
@@ -156,9 +147,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
return;
}
- if (!va_mask)
- compute_layout();
-
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
*/
OpenPOWER on IntegriCloud