diff options
Diffstat (limited to 'virt/kvm/arm')
-rw-r--r-- | virt/kvm/arm/aarch32.c | 131 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 13 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 164 | ||||
-rw-r--r-- | virt/kvm/arm/hypercalls.c | 71 | ||||
-rw-r--r-- | virt/kvm/arm/mmio.c | 82 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 62 | ||||
-rw-r--r-- | virt/kvm/arm/perf.c | 6 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 158 | ||||
-rw-r--r-- | virt/kvm/arm/psci.c | 84 | ||||
-rw-r--r-- | virt/kvm/arm/pvtime.c | 131 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/trace.h | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 59 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-irqfd.c | 36 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 216 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 90 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 45 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 5 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 24 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v4.c | 59 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 37 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 7 |
22 files changed, 980 insertions, 514 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index c4c57ba99e90..0a356aa91aa1 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -10,10 +10,15 @@ * Author: Christoffer Dall <c.dall@virtualopensystems.com> */ +#include <linux/bits.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ @@ -28,25 +33,115 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long cpsr; - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | PSR_AA32_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= PSR_AA32_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= PSR_AA32_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, new_spsr_value); + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ @@ -84,16 +179,18 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); *far = addr; /* Give the guest an IMPLEMENTATION DEFINED exception */ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) - *fsr = 1 << 9 | 0x34; - else - *fsr = 0x14; + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } } void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index e2bb5bd60227..0d9438e9de2a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -80,7 +80,7 @@ static inline bool userspace_irqchip(struct kvm *kvm) static void soft_timer_start(struct hrtimer *hrt, u64 ns) { hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_HARD); } static void soft_timer_cancel(struct hrtimer *hrt) @@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); ptimer->cntvoff = 0; - hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); vtimer->hrtimer.function = kvm_hrtimer_expire; ptimer->hrtimer.function = kvm_hrtimer_expire; @@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); break; case TIMER_REG_CTL: @@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val; + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; break; case TIMER_REG_CTL: @@ -1022,7 +1023,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) bool kvm_arch_timer_get_input_level(int vintid) { - struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu(); + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct arch_timer_context *timer; if (vintid == vcpu_vtimer(vcpu)->irq.irq) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 35a069815baf..d65a0faa46d8 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -20,8 +20,6 @@ #include <linux/irqbypass.h> #include <linux/sched/stat.h> #include <trace/events/kvm.h> -#include <kvm/arm_pmu.h> -#include <kvm/arm_psci.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -40,6 +38,10 @@ #include <asm/kvm_coproc.h> #include <asm/sections.h> +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_pmu.h> +#include <kvm/arm_psci.h> + #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); #endif @@ -47,9 +49,6 @@ __asm__(".arch_extension virt"); DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -/* Per-CPU variable containing the currently running vcpu. */ -static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); - /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; @@ -58,31 +57,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); - -static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(kvm_arm_running_vcpu, vcpu); -} - DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -/** - * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. - * Must be called from non-preemptible context - */ -struct kvm_vcpu *kvm_arm_get_running_vcpu(void) -{ - return __this_cpu_read(kvm_arm_running_vcpu); -} - -/** - * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. - */ -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) -{ - return &kvm_arm_running_vcpu; -} - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -98,6 +74,26 @@ int kvm_arch_check_processor_compat(void) return 0; } +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_ARM_NISV_TO_USER: + r = 0; + kvm->arch.return_nisv_io_abort_to_user = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} /** * kvm_arch_init_vm - initializes a VM data structure @@ -170,7 +166,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { - kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm_vcpu_destroy(kvm->vcpus[i]); kvm->vcpus[i] = NULL; } } @@ -196,6 +192,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_VCPU_EVENTS: + case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: + case KVM_CAP_ARM_NISV_TO_USER: + case KVM_CAP_ARM_INJECT_EXT_DABT: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -252,49 +251,46 @@ void kvm_arch_free_vm(struct kvm *kvm) vfree(kvm); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + return -EBUSY; + + if (id >= kvm->arch.max_vcpus) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; - struct kvm_vcpu *vcpu; - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { - err = -EBUSY; - goto out; - } + /* Force users to call KVM_ARM_VCPU_INIT */ + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - if (id >= kvm->arch.max_vcpus) { - err = -EINVAL; - goto out; - } + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) { - err = -ENOMEM; - goto out; - } + kvm_pmu_vcpu_init(vcpu); - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; + kvm_arm_reset_debug_ptr(vcpu); + + kvm_arm_pvtime_vcpu_init(&vcpu->arch); - err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); + err = kvm_vgic_vcpu_init(vcpu); if (err) - goto vcpu_uninit; + return err; - return vcpu; -vcpu_uninit: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); + return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) static_branch_dec(&userspace_irqchip_in_use); @@ -302,13 +298,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvm_arm_vcpu_destroy(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -321,36 +312,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) /* * If we're about to block (most likely because we've just hit a * WFI), we need to sync back the state of the GIC CPU interface - * so that we have the lastest PMR and group enables. This ensures + * so that we have the latest PMR and group enables. This ensures * that kvm_arch_vcpu_runnable has up-to-date data to decide * whether we have pending interrupts. + * + * For the same reason, we want to tell GICv4 that we need + * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); kvm_vgic_vmcr_sync(vcpu); + vgic_v4_put(vcpu, true); preempt_enable(); - - kvm_vgic_v4_enable_doorbell(vcpu); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - kvm_vgic_v4_disable_doorbell(vcpu); -} - -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - /* Force users to call KVM_ARM_VCPU_INIT */ - vcpu->arch.target = -1; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* Set up the timer */ - kvm_timer_vcpu_init(vcpu); - - kvm_pmu_vcpu_init(vcpu); - - kvm_arm_reset_debug_ptr(vcpu); - - return kvm_vgic_vcpu_init(vcpu); + preempt_disable(); + vgic_v4_load(vcpu); + preempt_enable(); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -373,17 +352,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; - kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); kvm_arch_vcpu_load_fp(vcpu); kvm_vcpu_pmu_restore_guest(vcpu); + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); if (single_task_running()) - vcpu_clear_wfe_traps(vcpu); + vcpu_clear_wfx_traps(vcpu); else - vcpu_set_wfe_traps(vcpu); + vcpu_set_wfx_traps(vcpu); vcpu_ptrauth_setup_lazy(vcpu); } @@ -397,8 +377,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_restore_host(vcpu); vcpu->cpu = -1; - - kvm_arm_set_running_vcpu(NULL); } static void vcpu_power_off(struct kvm_vcpu *vcpu) @@ -644,6 +622,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) * that a VCPU sees new virtual interrupts. */ kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); } } @@ -888,6 +869,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); @@ -1313,7 +1295,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } } -static void cpu_init_hyp_mode(void *dummy) +static void cpu_init_hyp_mode(void) { phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; @@ -1347,7 +1329,7 @@ static void cpu_hyp_reinit(void) if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); else - cpu_init_hyp_mode(NULL); + cpu_init_hyp_mode(); kvm_arm_init_debug(); @@ -1498,7 +1480,6 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - hyp_cpu_pm_exit(); } /** @@ -1712,6 +1693,7 @@ int kvm_arch_init(void *opaque) return 0; out_hyp: + hyp_cpu_pm_exit(); if (!in_hyp_mode) teardown_hyp_mode(); out_err: diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c new file mode 100644 index 000000000000..550dfa3e53cd --- /dev/null +++ b/virt/kvm/arm/hypercalls.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + gpa_t gpa; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch (feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + break; + case KVM_BP_HARDEN_WA_NEEDED: + val = SMCCC_RET_SUCCESS; + break; + case KVM_BP_HARDEN_NOT_REQUIRED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = SMCCC_RET_SUCCESS; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = kvm_hypercall_pv_features(vcpu); + break; + case ARM_SMCCC_HV_PV_TIME_ST: + gpa = kvm_init_stolen_time(vcpu); + if (gpa != GPA_INVALID) + val = gpa; + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index a8a6a0c883f1..aedfcff99ac5 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -5,7 +5,6 @@ */ #include <linux/kvm_host.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_emulate.h> #include <trace/events/kvm.h> @@ -86,23 +85,29 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned int len; int mask; - if (!run->mmio.is_write) { - len = run->mmio.len; - if (len > sizeof(unsigned long)) - return -EINVAL; + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + vcpu->mmio_needed = 0; + + if (!kvm_vcpu_dabt_iswrite(vcpu)) { + len = kvm_vcpu_dabt_get_as(vcpu); data = kvm_mmio_read_buf(run->mmio.data, len); - if (vcpu->arch.mmio_decode.sign_extend && + if (kvm_vcpu_dabt_issext(vcpu) && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); data = (data ^ mask) - mask; } + if (!kvm_vcpu_dabt_issf(vcpu)) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); - vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); } /* @@ -114,33 +119,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) -{ - unsigned long rt; - int access_size; - bool sign_extend; - - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - /* page table accesses IO mem: tell guest to fix its TTBR */ - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - - access_size = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(access_size < 0)) - return access_size; - - *is_write = kvm_vcpu_dabt_iswrite(vcpu); - sign_extend = kvm_vcpu_dabt_issext(vcpu); - rt = kvm_vcpu_dabt_get_rd(vcpu); - - *len = access_size; - vcpu->arch.mmio_decode.sign_extend = sign_extend; - vcpu->arch.mmio_decode.rt = rt; - - return 0; -} - int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { @@ -152,20 +130,35 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, u8 data_buf[8]; /* - * Prepare MMIO operation. First decode the syndrome data we get - * from the CPU. Then try if some in-kernel emulation feels - * responsible, otherwise let user space do its magic. + * No valid syndrome? Ask userspace for help if it has + * voluntered to do so, and bail out otherwise. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, &is_write, &len); - if (ret) - return ret; - } else { - kvm_err("load/store instruction decoding not implemented\n"); + if (!kvm_vcpu_dabt_isvalid(vcpu)) { + if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + run->exit_reason = KVM_EXIT_ARM_NISV; + run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + + kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n"); return -ENOSYS; } - rt = vcpu->arch.mmio_decode.rt; + /* Page table accesses IO mem: tell guest to fix its TTBR */ + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + /* + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. + */ + is_write = kvm_vcpu_dabt_iswrite(vcpu); + len = kvm_vcpu_dabt_get_as(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); if (is_write) { data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), @@ -188,6 +181,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; + vcpu->mmio_needed = 1; if (!ret) { /* We handled the access successfully in the kernel. */ diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 38b4c910b6c3..19c961ac4e3c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -14,7 +14,6 @@ #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_ras.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> @@ -38,6 +37,11 @@ static unsigned long io_map_base; #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) +static bool is_iomap(unsigned long flags) +{ + return flags & KVM_S2PTE_FLAG_IS_IOMAP; +} + static bool memslot_is_logging(struct kvm_memory_slot *memslot) { return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); @@ -1372,14 +1376,8 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; - struct page *page = pfn_to_page(pfn); - /* - * PageTransCompoundMap() returns true for THP and - * hugetlbfs. Make sure the adjustment is done only for THP - * pages. - */ - if (!PageHuge(page) && PageTransCompoundMap(page)) { + if (kvm_is_transparent_hugepage(pfn)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge @@ -1591,16 +1589,8 @@ static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) __invalidate_icache_guest_page(pfn, size); } -static void kvm_send_hwpoison_signal(unsigned long address, - struct vm_area_struct *vma) +static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { - short lsb; - - if (is_vm_hugetlb_page(vma)) - lsb = huge_page_shift(hstate_vma(vma)); - else - lsb = PAGE_SHIFT; - send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); } @@ -1673,6 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; + short vma_shift; kvm_pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); @@ -1696,8 +1687,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - vma_pagesize = vma_kernel_pagesize(vma); + if (is_vm_hugetlb_page(vma)) + vma_shift = huge_page_shift(hstate_vma(vma)); + else + vma_shift = PAGE_SHIFT; + + vma_pagesize = 1ULL << vma_shift; if (logging_active || + (vma->vm_flags & VM_PFNMAP) || !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { force_pte = true; vma_pagesize = PAGE_SIZE; @@ -1735,7 +1732,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(hva, vma); + kvm_send_hwpoison_signal(hva, vma_shift); return 0; } if (is_error_noslot_pfn(pfn)) @@ -1760,6 +1757,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, writable = false; } + if (exec_fault && is_iomap(flags)) + return -ENOEXEC; + spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -1781,7 +1781,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) kvm_set_pfn_dirty(pfn); - if (fault_status != FSC_PERM) + if (fault_status != FSC_PERM && !is_iomap(flags)) clean_dcache_guest_page(pfn, vma_pagesize); if (exec_fault) @@ -1948,9 +1948,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; - goto out_unlock; + ret = -ENOEXEC; + goto out; } /* @@ -1992,6 +1991,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; +out: + if (ret == -ENOEXEC) { + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + } out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; @@ -2134,7 +2138,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) @@ -2302,15 +2307,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, break; /* - * Mapping a read-only VMA is only allowed if the - * memory region is configured as read-only. - */ - if (writable && !(vma->vm_flags & VM_WRITE)) { - ret = -EPERM; - break; - } - - /* * Take the intersection of this VMA with the memory region */ vm_start = max(hva, vma->vm_start); diff --git a/virt/kvm/arm/perf.c b/virt/kvm/arm/perf.c index 918cdc3839ea..d45b8b9a4415 100644 --- a/virt/kvm/arm/perf.c +++ b/virt/kvm/arm/perf.c @@ -13,14 +13,14 @@ static int kvm_is_in_guest(void) { - return kvm_arm_get_running_vcpu() != NULL; + return kvm_get_running_vcpu() != NULL; } static int kvm_is_user_mode(void) { struct kvm_vcpu *vcpu; - vcpu = kvm_arm_get_running_vcpu(); + vcpu = kvm_get_running_vcpu(); if (vcpu) return !vcpu_mode_priv(vcpu); @@ -32,7 +32,7 @@ static unsigned long kvm_get_guest_ip(void) { struct kvm_vcpu *vcpu; - vcpu = kvm_arm_get_running_vcpu(); + vcpu = kvm_get_running_vcpu(); if (vcpu) return *vcpu_pc(vcpu); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 362a01886bab..f0d0312c0a55 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -8,12 +8,15 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/perf_event.h> +#include <linux/perf/arm_pmu.h> #include <linux/uaccess.h> #include <asm/kvm_emulate.h> #include <kvm/arm_pmu.h> #include <kvm/arm_vgic.h> static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 @@ -74,6 +77,13 @@ static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) return pmc; } +static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + else + return pmc + 1; +} /** * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain @@ -146,8 +156,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (kvm_pmu_pmc_is_chained(pmc) && kvm_pmu_idx_is_high_counter(select_idx)) counter = upper_32_bits(counter); - - else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + else if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -193,7 +202,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg; + u64 counter, reg, val; pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) @@ -201,16 +210,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_pmc_is_chained(pmc)) { - reg = PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + reg = PMCCNTR_EL0; + val = counter; } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); + reg = PMEVCNTR0_EL0 + pmc->idx; + val = lower_32_bits(counter); } + __vcpu_sys_reg(vcpu, reg) = val; + + if (kvm_pmu_pmc_is_chained(pmc)) + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + kvm_pmu_release_perf_event(pmc); } @@ -235,10 +247,11 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) */ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { - int i; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); @@ -291,15 +304,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute set. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) { @@ -332,15 +339,9 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute unset. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) @@ -440,8 +441,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct pt_regs *regs) { struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; + u64 period; + + cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * Reset the sample period to the architectural limit, + * i.e. the point where the counter overflows. + */ + period = -(local64_read(&perf_event->count)); + + if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + period &= GENMASK(31, 0); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); @@ -449,6 +467,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); } + + cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); } /** @@ -458,25 +478,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { + struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; - u64 type, enable, reg; - if (val == 0) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; - enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + if (!(val & BIT(i))) continue; - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) - & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) - && (enable & BIT(i))) { - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (!reg) - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } @@ -488,10 +528,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - u64 mask; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); int i; - mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); @@ -503,7 +542,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } } @@ -560,19 +599,18 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + if (kvm_pmu_pmc_is_chained(pmc)) { /** * The initial sample period (overflow count) of an event. For * chained counters we only support overflow interrupts on the * high counter. */ attr.sample_period = (-counter) & GENMASK(63, 0); + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc + 1); - - if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; } else { /* The initial sample period (overflow count) of an event. */ if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) @@ -599,25 +637,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) * @select_idx: The number of selected counter * * Update the chained bitmap based on the event type written in the - * typer register. + * typer register and the enable state of the odd register. */ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; + bool new_state, old_state; - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + old_state = kvm_pmu_pmc_is_chained(pmc); + new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && + kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); + + if (old_state == new_state) + return; + + canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); + kvm_pmu_stop_counter(vcpu, canonical_pmc); + if (new_state) { /* * During promotion from !chained to chained we must ensure * the adjacent counter is stopped and its event destroyed */ - if (!kvm_pmu_pmc_is_chained(pmc)) - kvm_pmu_stop_counter(vcpu, pmc); - + kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - } else { - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return; } + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); } /** diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 87927f7e1ee7..17e2bdd4b76f 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -15,6 +15,7 @@ #include <asm/kvm_host.h> #include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> /* * This is an implementation of the Power State Coordination Interface @@ -23,38 +24,6 @@ #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) -static u32 smccc_get_function(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 0); -} - -static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 1); -} - -static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 2); -} - -static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 3); -} - -static void smccc_set_retval(struct kvm_vcpu *vcpu, - unsigned long a0, - unsigned long a1, - unsigned long a2, - unsigned long a3) -{ - vcpu_set_reg(vcpu, 0, a0); - vcpu_set_reg(vcpu, 1, a1); - vcpu_set_reg(vcpu, 2, a2); - vcpu_set_reg(vcpu, 3, a3); -} - static unsigned long psci_affinity_mask(unsigned long affinity_level) { if (affinity_level <= 3) @@ -373,7 +342,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * Errors: * -EINVAL: Unrecognized PSCI function */ -static int kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu, vcpu->kvm)) { case KVM_ARM_PSCI_1_0: @@ -387,55 +356,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) }; } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) -{ - u32 func_id = smccc_get_function(vcpu); - u32 val = SMCCC_RET_NOT_SUPPORTED; - u32 feature; - - switch (func_id) { - case ARM_SMCCC_VERSION_FUNC_ID: - val = ARM_SMCCC_VERSION_1_1; - break; - case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - feature = smccc_get_arg1(vcpu); - switch(feature) { - case ARM_SMCCC_ARCH_WORKAROUND_1: - switch (kvm_arm_harden_branch_predictor()) { - case KVM_BP_HARDEN_UNKNOWN: - break; - case KVM_BP_HARDEN_WA_NEEDED: - val = SMCCC_RET_SUCCESS; - break; - case KVM_BP_HARDEN_NOT_REQUIRED: - val = SMCCC_RET_NOT_REQUIRED; - break; - } - break; - case ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - case KVM_SSBD_UNKNOWN: - break; - case KVM_SSBD_KERNEL: - val = SMCCC_RET_SUCCESS; - break; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: - val = SMCCC_RET_NOT_REQUIRED; - break; - } - break; - } - break; - default: - return kvm_psci_call(vcpu); - } - - smccc_set_retval(vcpu, val, 0, 0, 0); - return 1; -} - int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { return 3; /* PSCI version and two workaround registers */ diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c new file mode 100644 index 000000000000..1e0f4c284888 --- /dev/null +++ b/virt/kvm/arm/pvtime.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> +#include <asm/pvclock-abi.h> + +#include <kvm/arm_hypercalls.h> + +void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u64 steal; + __le64 steal_le; + u64 offset; + int idx; + u64 base = vcpu->arch.steal.base; + + if (base == GPA_INVALID) + return; + + /* Let's do the local bookkeeping */ + steal = vcpu->arch.steal.steal; + steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + vcpu->arch.steal.steal = steal; + + steal_le = cpu_to_le64(steal); + idx = srcu_read_lock(&kvm->srcu); + offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + kvm_put_guest(kvm, base + offset, steal_le, u64); + srcu_read_unlock(&kvm->srcu, idx); +} + +long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +} + +gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + struct pvclock_vcpu_stolen_time init_values = {}; + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; + int idx; + + if (base == GPA_INVALID) + return base; + + /* + * Start counting stolen time from the time the guest requests + * the feature enabled. + */ + vcpu->arch.steal.steal = 0; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + + idx = srcu_read_lock(&kvm->srcu); + kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); + srcu_read_unlock(&kvm->srcu, idx); + + return base; +} + +int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 ipa; + int ret = 0; + int idx; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + if (get_user(ipa, user)) + return -EFAULT; + if (!IS_ALIGNED(ipa, 64)) + return -EINVAL; + if (vcpu->arch.steal.base != GPA_INVALID) + return -EEXIST; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.steal.base = ipa; + + return ret; +} + +int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 ipa; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + ipa = vcpu->arch.steal.base; + + if (put_user(ipa, user)) + return -EFAULT; + return 0; +} + +int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PVTIME_IPA: + return 0; + } + return -ENXIO; +} diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h index 55fed77a9f73..4fd4f6db181b 100644 --- a/virt/kvm/arm/vgic/trace.h +++ b/virt/kvm/arm/vgic/trace.h @@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending, #endif /* _TRACE_VGIC_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic +#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index bdbc297d06fb..a963b9d766b7 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -8,6 +8,7 @@ #include <linux/cpu.h> #include <linux/kvm_host.h> #include <kvm/arm_vgic.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include "vgic.h" @@ -53,6 +54,7 @@ void kvm_vgic_early_init(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; INIT_LIST_HEAD(&dist->lpi_list_head); + INIT_LIST_HEAD(&dist->lpi_translation_cache); raw_spin_lock_init(&dist->lpi_list_lock); } @@ -68,7 +70,7 @@ void kvm_vgic_early_init(struct kvm *kvm) */ int kvm_vgic_create(struct kvm *kvm, u32 type) { - int i, vcpu_lock_idx = -1, ret; + int i, ret; struct kvm_vcpu *vcpu; if (irqchip_in_kernel(kvm)) @@ -84,17 +86,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run while we create the vgic. - */ ret = -EBUSY; - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!mutex_trylock(&vcpu->mutex)) - goto out_unlock; - vcpu_lock_idx = i; - } + if (!lock_all_vcpus(kvm)) + return ret; kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->arch.has_run_once) @@ -123,10 +117,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&vcpu->mutex); - } + unlock_all_vcpus(kvm); return ret; } @@ -164,12 +155,19 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) irq->vcpu = NULL; irq->target_vcpu = vcpu0; kref_init(&irq->refcount); - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->targets = 0; irq->group = 0; - } else { + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->mpidr = 0; irq->group = 1; + break; + default: + kfree(dist->spis); + dist->spis = NULL; + return -EINVAL; } } return 0; @@ -192,10 +190,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) int i; vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; - vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&vgic_cpu->ap_list_head); raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); /* * Enable and configure all SGIs to be edge-triggered and @@ -209,7 +207,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; kref_init(&irq->refcount); if (vgic_irq_is_sgi(i)) { /* SGIs */ @@ -219,11 +216,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } - - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - irq->group = 1; - else - irq->group = 0; } if (!irqchip_in_kernel(vcpu->kvm)) @@ -286,14 +278,24 @@ int vgic_init(struct kvm *kvm) for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->group = 1; - else + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } } } if (vgic_has_its(kvm)) { + vgic_lpi_translation_cache_init(kvm); ret = vgic_v4_init(kvm); if (ret) goto out; @@ -335,6 +337,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) INIT_LIST_HEAD(&dist->rd_regions); } + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_destroy(kvm); + if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); } @@ -505,7 +510,7 @@ int kvm_vgic_hyp_init(void) break; default: ret = -ENODEV; - }; + } if (ret) return ret; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index c9304b88e720..d8cdfea5cc96 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -66,6 +66,15 @@ out: return r; } +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm_msi *msi) +{ + msi->address_lo = e->msi.address_lo; + msi->address_hi = e->msi.address_hi; + msi->data = e->msi.data; + msi->flags = e->msi.flags; + msi->devid = e->msi.devid; +} /** * kvm_set_msi: inject the MSI corresponding to the * MSI routing entry @@ -79,21 +88,36 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, { struct kvm_msi msi; - msi.address_lo = e->msi.address_lo; - msi.address_hi = e->msi.address_hi; - msi.data = e->msi.data; - msi.flags = e->msi.flags; - msi.devid = e->msi.devid; - if (!vgic_has_its(kvm)) return -ENODEV; if (!level) return -1; + kvm_populate_msi(e, &msi); return vgic_its_inject_msi(kvm, &msi); } +/** + * kvm_arch_set_irq_inatomic: fast-path for irqfd injection + * + * Currently only direct MSI injection is supported. + */ +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + struct kvm_msi msi; + + kvm_populate_msi(e, &msi); + if (!vgic_its_inject_cached_translation(kvm, &msi)) + return 0; + } + + return -EWOULDBLOCK; +} + int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) { struct kvm_irq_routing_entry *entries; diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 482036612adf..d53d34a33e35 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -138,6 +138,14 @@ struct its_ite { u32 event_id; }; +struct vgic_translation_cache_entry { + struct list_head entry; + phys_addr_t db; + u32 devid; + u32 eventid; + struct vgic_irq *irq; +}; + /** * struct vgic_its_abi - ITS abi ops and settings * @cte_esz: collection table entry size @@ -352,7 +360,10 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) if (ret) return ret; + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); ret = its_map_vlpi(irq->host_irq, &map); } @@ -527,6 +538,127 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, return 0; } +static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, + phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_translation_cache_entry *cte; + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + if (cte->db != db || cte->devid != devid || + cte->eventid != eventid) + continue; + + /* + * Move this entry to the head, as it is the most + * recently used. + */ + if (!list_is_first(&cte->entry, &dist->lpi_translation_cache)) + list_move(&cte->entry, &dist->lpi_translation_cache); + + return cte->irq; + } + + return NULL; +} + +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + irq = __vgic_its_check_cache(dist, db, devid, eventid); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, + struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + phys_addr_t db; + + /* Do not cache a directly injected interrupt */ + if (irq->hw) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + if (unlikely(list_empty(&dist->lpi_translation_cache))) + goto out; + + /* + * We could have raced with another CPU caching the same + * translation behind our back, so let's check it is not in + * already + */ + db = its->vgic_its_base + GITS_TRANSLATER; + if (__vgic_its_check_cache(dist, db, devid, eventid)) + goto out; + + /* Always reuse the last entry (LRU policy) */ + cte = list_last_entry(&dist->lpi_translation_cache, + typeof(*cte), entry); + + /* + * Caching the translation implies having an extra reference + * to the interrupt, so drop the potential reference on what + * was in the cache, and increment it on the new interrupt. + */ + if (cte->irq) + __vgic_put_lpi_locked(kvm, cte->irq); + + vgic_get_irq_kref(irq); + + cte->db = db; + cte->devid = devid; + cte->eventid = eventid; + cte->irq = irq; + + /* Move the new translation to the head of the list */ + list_move(&cte->entry, &dist->lpi_translation_cache); + +out: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_its_invalidate_cache(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + __vgic_put_lpi_locked(kvm, cte->irq); + cte->irq = NULL; + } + + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq) { @@ -547,6 +679,8 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; + vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); + *irq = ite->irq; return 0; } @@ -608,6 +742,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return 0; } +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_irq *irq; + unsigned long flags; + phys_addr_t db; + + db = (u64)msi->address_hi << 32 | msi->address_lo; + irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); + + if (!irq) + return -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + /* * Queries the KVM IO bus framework to get the ITS pointer from the given * doorbell address. @@ -619,6 +772,9 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) struct vgic_its *its; int ret; + if (!vgic_its_inject_cached_translation(kvm, msi)) + return 1; + its = vgic_msi_to_its(kvm, msi); if (IS_ERR(its)) return PTR_ERR(its); @@ -683,14 +839,15 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, u32 event_id = its_cmd_get_id(its_cmd); struct its_ite *ite; - ite = find_ite(its, device_id, event_id); - if (ite && ite->collection) { + if (ite && its_is_collection_mapped(ite->collection)) { /* * Though the spec talks about removing the pending state, we * don't bother here since we clear the ITTE anyway and the * pending state is a property of the ITTE struct. */ + vgic_its_invalidate_cache(kvm); + its_free_ite(kvm, ite); return 0; } @@ -726,6 +883,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, ite->collection = collection; vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vgic_its_invalidate_cache(kvm); + return update_affinity(ite->irq, vcpu); } @@ -954,6 +1113,8 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) its_free_ite(kvm, ite); + vgic_its_invalidate_cache(kvm); + list_del(&device->dev_list); kfree(device); } @@ -1059,6 +1220,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, if (!valid) { vgic_its_free_collection(its, coll_id); + vgic_its_invalidate_cache(kvm); } else { collection = find_collection(its, coll_id); @@ -1207,6 +1369,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vgic_put_irq(kvm, irq); } + vgic_its_invalidate_cache(kvm); + kfree(intids); return 0; } @@ -1557,6 +1721,8 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, goto out; its->enabled = !!(val & GITS_CTLR_ENABLE); + if (!its->enabled) + vgic_its_invalidate_cache(kvm); /* * Try to process any pending commands. This function bails out early @@ -1657,6 +1823,47 @@ out: return ret; } +/* Default is 16 cached LPIs per vcpu */ +#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 + +void vgic_lpi_translation_cache_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned int sz; + int i; + + if (!list_empty(&dist->lpi_translation_cache)) + return; + + sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; + + for (i = 0; i < sz; i++) { + struct vgic_translation_cache_entry *cte; + + /* An allocation failure is not fatal */ + cte = kzalloc(sizeof(*cte), GFP_KERNEL); + if (WARN_ON(!cte)) + break; + + INIT_LIST_HEAD(&cte->entry); + list_add(&cte->entry, &dist->lpi_translation_cache); + } +} + +void vgic_lpi_translation_cache_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte, *tmp; + + vgic_its_invalidate_cache(kvm); + + list_for_each_entry_safe(cte, tmp, + &dist->lpi_translation_cache, entry) { + list_del(&cte->entry); + kfree(cte); + } +} + #define INITIAL_BASER_VALUE \ (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ @@ -1685,6 +1892,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) kfree(its); return ret; } + + vgic_lpi_translation_cache_init(dev->kvm); } mutex_init(&its->its_lock); @@ -2265,7 +2474,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index c45e2d7e942f..ebc218840fc2 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -192,8 +192,10 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; - if (was_enabled && !vgic_cpu->lpis_enabled) + if (was_enabled && !vgic_cpu->lpis_enabled) { vgic_flush_pending_lpis(vcpu); + vgic_its_invalidate_cache(vcpu->kvm); + } if (!was_enabled && vgic_cpu->lpis_enabled) vgic_enable_lpis(vcpu); @@ -412,8 +414,11 @@ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; + + value &= ~GICR_PENDBASER_PTZ; - return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); + return extract_bytes(value, addr & 7, len); } static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, @@ -515,7 +520,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), }; -static const struct vgic_register_region vgic_v3_rdbase_registers[] = { +static const struct vgic_register_region vgic_v3_rd_registers[] = { + /* RD_base registers */ REGISTER_DESC_WITH_LENGTH(GICR_CTLR, vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, VGIC_ACCESS_32bit), @@ -540,44 +546,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, VGIC_ACCESS_32bit), -}; - -static const struct vgic_register_region vgic_v3_sgibase_registers[] = { - REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, + /* SGI_base registers */ + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, vgic_mmio_read_group, vgic_mmio_write_group, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0, vgic_mmio_read_enable, vgic_mmio_write_senable, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0, vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, vgic_mmio_read_pending, vgic_mmio_write_spending, vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, vgic_mmio_uaccess_write_sactive, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, vgic_mmio_uaccess_write_cactive, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, vgic_mmio_read_priority, vgic_mmio_write_priority, 32, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0, vgic_mmio_read_config, vgic_mmio_write_config, 8, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_NSACR, + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), }; @@ -607,9 +611,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) struct vgic_dist *vgic = &kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; struct vgic_redist_region *rdreg; - gpa_t rd_base, sgi_base; + gpa_t rd_base; int ret; if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) @@ -631,52 +634,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) vgic_cpu->rdreg = rdreg; rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; - sgi_base = rd_base + SZ_64K; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); rd_dev->base_addr = rd_base; rd_dev->iodev_type = IODEV_REDIST; - rd_dev->regions = vgic_v3_rdbase_registers; - rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); + rd_dev->regions = vgic_v3_rd_registers; + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); rd_dev->redist_vcpu = vcpu; mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, - SZ_64K, &rd_dev->dev); + 2 * SZ_64K, &rd_dev->dev); mutex_unlock(&kvm->slots_lock); if (ret) return ret; - kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); - sgi_dev->base_addr = sgi_base; - sgi_dev->iodev_type = IODEV_REDIST; - sgi_dev->regions = vgic_v3_sgibase_registers; - sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); - sgi_dev->redist_vcpu = vcpu; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, - SZ_64K, &sgi_dev->dev); - if (ret) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &rd_dev->dev); - goto out; - } - rdreg->free_index++; -out: - mutex_unlock(&kvm->slots_lock); - return ret; + return 0; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); - kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev); } static int vgic_register_all_redist_iodevs(struct kvm *kvm) @@ -826,8 +808,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) iodev.base_addr = 0; break; case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ - iodev.regions = vgic_v3_rdbase_registers; - iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); + iodev.regions = vgic_v3_rd_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); iodev.base_addr = 0; break; } @@ -985,21 +967,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val) { struct vgic_io_device rd_dev = { - .regions = vgic_v3_rdbase_registers, - .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers), - }; - - struct vgic_io_device sgi_dev = { - .regions = vgic_v3_sgibase_registers, - .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers), + .regions = vgic_v3_rd_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers), }; - /* SGI_base is the next 64K frame after RD_base */ - if (offset >= SZ_64K) - return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K, - val); - else - return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); } int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 44efc2ff863f..97fb2a40e6ba 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -179,27 +179,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } -/* - * This function will return the VCPU that performed the MMIO access and - * trapped from within the VM, and will return NULL if this is a userspace - * access. - * - * We can disable preemption locally around accessing the per-CPU variable, - * and use the resolved vcpu pointer after enabling preemption again, because - * even if the current thread is migrated to another CPU, reading the per-CPU - * value later will give us the same value as we update the per-CPU variable - * in the preempt notifier handlers. - */ -static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void) -{ - struct kvm_vcpu *vcpu; - - preempt_disable(); - vcpu = kvm_arm_get_running_vcpu(); - preempt_enable(); - return vcpu; -} - /* Must be called with irq->irq_lock held */ static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool is_uaccess) @@ -211,11 +190,17 @@ static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, vgic_irq_set_phys_active(irq, true); } +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !vgic_get_mmio_requester_vcpu(); + bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -223,6 +208,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) vgic_hw_irq_spending(vcpu, irq, is_uaccess); @@ -262,7 +253,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !vgic_get_mmio_requester_vcpu(); + bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -270,6 +261,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) @@ -317,7 +314,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool active) { unsigned long flags; - struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); + struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu(); raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 836f418f1ee8..5af2aefad435 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -98,11 +98,6 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .uaccess_write = uwr, \ } -int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, - struct vgic_register_region *reg_desc, - struct vgic_io_device *region, - int nr_irqs, bool offset_private); - unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 96aab77d0471..621cc168fe3f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -184,7 +184,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (vgic_irq_is_sgi(irq->intid)) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) { @@ -354,10 +357,11 @@ out: DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); /** - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT - * @node: pointer to the DT node + * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller + * @info: pointer to the GIC description * - * Returns 0 if a GICv2 has been found, returns an error code otherwise + * Returns 0 if the VGICv2 has been probed successfully, returns an error code + * otherwise */ int vgic_v2_probe(const struct gic_kvm_info *info) { diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 0c653a1e5215..f45635a6f0ec 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -167,7 +167,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) model == KVM_DEV_TYPE_ARM_VGIC_V2) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) { @@ -354,14 +357,14 @@ retry: } /** - * vgic_its_save_pending_tables - Save the pending tables into guest RAM + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ int vgic_v3_save_pending_tables(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - int last_byte_offset = -1; struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; int ret; u8 val; @@ -381,11 +384,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - if (byte_offset != last_byte_offset) { + if (ptr != last_ptr) { ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; - last_byte_offset = byte_offset; + last_ptr = ptr; } stored = val & (1U << bit_nr); @@ -570,10 +573,11 @@ static int __init early_gicv4_enable(char *buf) early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); /** - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT - * @node: pointer to the DT node + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description * - * Returns 0 if a GICv3 has been found, returns an error code otherwise + * Returns 0 if the VGICv3 has been probed successfully, returns an error code + * otherwise */ int vgic_v3_probe(const struct gic_kvm_info *info) { @@ -660,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) if (has_vhe()) __vgic_v3_activate_traps(vcpu); + + WARN_ON(vgic_v4_load(vcpu)); } void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) @@ -672,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) void vgic_v3_put(struct kvm_vcpu *vcpu) { + WARN_ON(vgic_v4_put(vcpu, false)); + vgic_v3_vmcr_sync(vcpu); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 477af6aebb97..46f875589c47 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) { struct kvm_vcpu *vcpu = info; + /* We got the message, no need to fire again */ + if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) + disable_irq_nosync(irq); + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); @@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db) { - if (!vgic_supports_direct_msis(vcpu->kvm)) + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + struct irq_desc *desc = irq_to_desc(vpe->irq); + + if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false); + /* + * If blocking, a doorbell is required. Undo the nested + * disable_irq() calls... + */ + while (need_db && irqd_irq_disabled(&desc->irq_data)) + enable_irq(vpe->irq); + + return its_schedule_vpe(vpe, false); } -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) +int vgic_v4_load(struct kvm_vcpu *vcpu) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; int err; - if (!vgic_supports_direct_msis(vcpu->kvm)) + if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) return 0; /* @@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) * doc in drivers/irqchip/irq-gic-v4.c to understand how this * turns into a VMOVP command at the ITS level. */ - err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); + err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id())); if (err) return err; - err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true); + /* Disabled the doorbell, as we're about to enter the guest */ + disable_irq_nosync(vpe->irq); + + err = its_schedule_vpe(vpe, true); if (err) return err; @@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) * Now that the VPE is resident, let's get rid of a potential * doorbell interrupt that would still be pending. */ - err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); - - return err; + return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false); } static struct vgic_its *vgic_get_its(struct kvm *kvm, @@ -266,7 +281,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, mutex_lock(&its->its_lock); - /* Perform then actual DevID/EventID -> LPI translation. */ + /* Perform the actual DevID/EventID -> LPI translation. */ ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, irq_entry->msi.data, &irq); if (ret) @@ -294,6 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, irq->hw = true; irq->host_irq = virq; + atomic_inc(&map.vpe->vlpi_count); out: mutex_unlock(&its->its_lock); @@ -327,6 +343,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, WARN_ON(!(irq->hw && irq->host_irq == virq)); if (irq->hw) { + atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); irq->hw = false; ret = its_unmap_vlpi(virq); } @@ -335,21 +352,3 @@ out: mutex_unlock(&its->its_lock); return ret; } - -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) -{ - if (vgic_supports_direct_msis(vcpu->kvm)) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; - if (irq) - enable_irq(irq); - } -} - -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) -{ - if (vgic_supports_direct_msis(vcpu->kvm)) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; - if (irq) - disable_irq(irq); - } -} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 13d4b38a94ec..99b02ca730a8 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -119,6 +119,22 @@ static void vgic_irq_release(struct kref *ref) { } +/* + * Drop the refcount on the LPI. Must be called with lpi_list_lock held. + */ +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + list_del(&irq->lpi_list); + dist->lpi_list_count--; + + kfree(irq); +} + void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -128,16 +144,8 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) return; raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - if (!kref_put(&irq->refcount, vgic_irq_release)) { - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); - return; - }; - - list_del(&irq->lpi_list); - dist->lpi_list_count--; + __vgic_put_lpi_locked(kvm, irq); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); - - kfree(irq); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) @@ -254,6 +262,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) bool penda, pendb; int ret; + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + raw_spin_lock(&irqa->irq_lock); raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); @@ -842,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - WARN_ON(vgic_v4_sync_hwstate(vcpu)); - /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; @@ -867,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { - WARN_ON(vgic_v4_flush_hwstate(vcpu)); - /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 797e05004d80..c7fefd6b1c80 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -161,6 +161,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); @@ -307,11 +308,13 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); +void vgic_lpi_translation_cache_init(struct kvm *kvm); +void vgic_lpi_translation_cache_destroy(struct kvm *kvm); +void vgic_its_invalidate_cache(struct kvm *kvm); bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); #endif |