summaryrefslogtreecommitdiffstats
path: root/virt/kvm/arm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/arm')
-rw-r--r--virt/kvm/arm/aarch32.c131
-rw-r--r--virt/kvm/arm/arch_timer.c13
-rw-r--r--virt/kvm/arm/arm.c164
-rw-r--r--virt/kvm/arm/hypercalls.c71
-rw-r--r--virt/kvm/arm/mmio.c82
-rw-r--r--virt/kvm/arm/mmu.c62
-rw-r--r--virt/kvm/arm/perf.c6
-rw-r--r--virt/kvm/arm/pmu.c158
-rw-r--r--virt/kvm/arm/psci.c84
-rw-r--r--virt/kvm/arm/pvtime.c131
-rw-r--r--virt/kvm/arm/vgic/trace.h2
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c59
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c36
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c216
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c90
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c45
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h5
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c12
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c24
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c59
-rw-r--r--virt/kvm/arm/vgic/vgic.c37
-rw-r--r--virt/kvm/arm/vgic/vgic.h7
22 files changed, 980 insertions, 514 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index c4c57ba99e90..0a356aa91aa1 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -10,10 +10,15 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#define DFSR_FSC_EXTABT_LPAE 0x10
+#define DFSR_FSC_EXTABT_nLPAE 0x08
+#define DFSR_LPAE BIT(9)
+
/*
* Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
*/
@@ -28,25 +33,115 @@ static const u8 return_offsets[8][2] = {
[7] = { 4, 4 }, /* FIQ, unused */
};
+/*
+ * When an exception is taken, most CPSR fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]).
+ *
+ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
+ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
+ * obsoleted by the ARMv7 virtualization extensions and is RES0.
+ *
+ * For the SPSR layout seen from AArch32, see:
+ * - ARM DDI 0406C.d, page B1-1148
+ * - ARM DDI 0487E.a, page G8-6264
+ *
+ * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
+ * - ARM DDI 0487E.a, page C5-426
+ *
+ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_AA32_N_BIT);
+ new |= (old & PSR_AA32_Z_BIT);
+ new |= (old & PSR_AA32_C_BIT);
+ new |= (old & PSR_AA32_V_BIT);
+ new |= (old & PSR_AA32_Q_BIT);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See ARM DDI 0487E.a, section G1.12.3
+ // See ARM DDI 0406C.d, section B1.8.3
+
+ new |= (old & PSR_AA32_DIT_BIT);
+
+ // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
+ // See ARM DDI 0487E.a, page G8-6244
+ if (sctlr & BIT(31))
+ new |= PSR_AA32_SSBS_BIT;
+
+ // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
+ // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page G8-6246
+ new |= (old & PSR_AA32_PAN_BIT);
+ if (!(sctlr & BIT(23)))
+ new |= PSR_AA32_PAN_BIT;
+
+ // SS does not exist in AArch32, so ignore
+
+ // CPSR.IL is set to zero upon any exception
+ // See ARM DDI 0487E.a, page G1-5527
+
+ new |= (old & PSR_AA32_GE_MASK);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See prior comment above
+
+ // CPSR.E is set to SCTLR.EE upon any exception
+ // See ARM DDI 0487E.a, page G8-6245
+ // See ARM DDI 0406C.d, page B4-1701
+ if (sctlr & BIT(25))
+ new |= PSR_AA32_E_BIT;
+
+ // CPSR.A is unchanged upon an exception to Undefined, Supervisor
+ // CPSR.A is set upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_A_BIT);
+ if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
+ new |= PSR_AA32_A_BIT;
+
+ // CPSR.I is set upon any exception
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= PSR_AA32_I_BIT;
+
+ // CPSR.F is set upon an exception to FIQ
+ // CPSR.F is unchanged upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_F_BIT);
+ if (mode == PSR_AA32_MODE_FIQ)
+ new |= PSR_AA32_F_BIT;
+
+ // CPSR.T is set to SCTLR.TE upon any exception
+ // See ARM DDI 0487E.a, page G8-5514
+ // See ARM DDI 0406C.d, page B1-1181
+ if (sctlr & BIT(30))
+ new |= PSR_AA32_T_BIT;
+
+ new |= mode;
+
+ return new;
+}
+
static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
{
- unsigned long cpsr;
- unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
- bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT);
+ unsigned long spsr = *vcpu_cpsr(vcpu);
+ bool is_thumb = (spsr & PSR_AA32_T_BIT);
u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
- cpsr = mode | PSR_AA32_I_BIT;
-
- if (sctlr & (1 << 30))
- cpsr |= PSR_AA32_T_BIT;
- if (sctlr & (1 << 25))
- cpsr |= PSR_AA32_E_BIT;
-
- *vcpu_cpsr(vcpu) = cpsr;
+ *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
/* Note: These now point to the banked copies */
- vcpu_write_spsr(vcpu, new_spsr_value);
+ vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr));
*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
@@ -84,16 +179,18 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
fsr = &vcpu_cp15(vcpu, c5_DFSR);
}
- prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset);
+ prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset);
*far = addr;
/* Give the guest an IMPLEMENTATION DEFINED exception */
is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
- if (is_lpae)
- *fsr = 1 << 9 | 0x34;
- else
- *fsr = 0x14;
+ if (is_lpae) {
+ *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
+ } else {
+ /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+ *fsr = DFSR_FSC_EXTABT_nLPAE;
+ }
}
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index e2bb5bd60227..0d9438e9de2a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -80,7 +80,7 @@ static inline bool userspace_irqchip(struct kvm *kvm)
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
static void soft_timer_cancel(struct hrtimer *hrt)
@@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
ptimer->cntvoff = 0;
- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
vtimer->hrtimer.function = kvm_hrtimer_expire;
ptimer->hrtimer.function = kvm_hrtimer_expire;
@@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
switch (treg) {
case TIMER_REG_TVAL:
val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
+ val &= lower_32_bits(val);
break;
case TIMER_REG_CTL:
@@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
{
switch (treg) {
case TIMER_REG_TVAL:
- timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
+ timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val;
break;
case TIMER_REG_CTL:
@@ -1022,7 +1023,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
bool kvm_arch_timer_get_input_level(int vintid)
{
- struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 35a069815baf..d65a0faa46d8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -20,8 +20,6 @@
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
-#include <kvm/arm_pmu.h>
-#include <kvm/arm_psci.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -40,6 +38,10 @@
#include <asm/kvm_coproc.h>
#include <asm/sections.h>
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
@@ -47,9 +49,6 @@ __asm__(".arch_extension virt");
DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-/* Per-CPU variable containing the currently running vcpu. */
-static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
-
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u32 kvm_next_vmid;
@@ -58,31 +57,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
-
-static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(kvm_arm_running_vcpu, vcpu);
-}
-
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
-/**
- * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
- * Must be called from non-preemptible context
- */
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
-{
- return __this_cpu_read(kvm_arm_running_vcpu);
-}
-
-/**
- * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
- */
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
-{
- return &kvm_arm_running_vcpu;
-}
-
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -98,6 +74,26 @@ int kvm_arch_check_processor_compat(void)
return 0;
}
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_ARM_NISV_TO_USER:
+ r = 0;
+ kvm->arch.return_nisv_io_abort_to_user = true;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -170,7 +166,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
- kvm_arch_vcpu_free(kvm->vcpus[i]);
+ kvm_vcpu_destroy(kvm->vcpus[i]);
kvm->vcpus[i] = NULL;
}
}
@@ -196,6 +192,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_VCPU_EVENTS:
+ case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
+ case KVM_CAP_ARM_NISV_TO_USER:
+ case KVM_CAP_ARM_INJECT_EXT_DABT:
r = 1;
break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -252,49 +251,46 @@ void kvm_arch_free_vm(struct kvm *kvm)
vfree(kvm);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+ return -EBUSY;
+
+ if (id >= kvm->arch.max_vcpus)
+ return -EINVAL;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int err;
- struct kvm_vcpu *vcpu;
- if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
- err = -EBUSY;
- goto out;
- }
+ /* Force users to call KVM_ARM_VCPU_INIT */
+ vcpu->arch.target = -1;
+ bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
- if (id >= kvm->arch.max_vcpus) {
- err = -EINVAL;
- goto out;
- }
+ /* Set up the timer */
+ kvm_timer_vcpu_init(vcpu);
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu) {
- err = -ENOMEM;
- goto out;
- }
+ kvm_pmu_vcpu_init(vcpu);
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ kvm_arm_reset_debug_ptr(vcpu);
+
+ kvm_arm_pvtime_vcpu_init(&vcpu->arch);
- err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ err = kvm_vgic_vcpu_init(vcpu);
if (err)
- goto vcpu_uninit;
+ return err;
- return vcpu;
-vcpu_uninit:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+ return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
@@ -302,13 +298,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvm_arm_vcpu_destroy(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -321,36 +312,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
/*
* If we're about to block (most likely because we've just hit a
* WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the lastest PMR and group enables. This ensures
+ * so that we have the latest PMR and group enables. This ensures
* that kvm_arch_vcpu_runnable has up-to-date data to decide
* whether we have pending interrupts.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
preempt_enable();
-
- kvm_vgic_v4_enable_doorbell(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- kvm_vgic_v4_disable_doorbell(vcpu);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- /* Force users to call KVM_ARM_VCPU_INIT */
- vcpu->arch.target = -1;
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
- /* Set up the timer */
- kvm_timer_vcpu_init(vcpu);
-
- kvm_pmu_vcpu_init(vcpu);
-
- kvm_arm_reset_debug_ptr(vcpu);
-
- return kvm_vgic_vcpu_init(vcpu);
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -373,17 +352,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->cpu = cpu;
vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
- kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
+ if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
+ kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
if (single_task_running())
- vcpu_clear_wfe_traps(vcpu);
+ vcpu_clear_wfx_traps(vcpu);
else
- vcpu_set_wfe_traps(vcpu);
+ vcpu_set_wfx_traps(vcpu);
vcpu_ptrauth_setup_lazy(vcpu);
}
@@ -397,8 +377,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_restore_host(vcpu);
vcpu->cpu = -1;
-
- kvm_arm_set_running_vcpu(NULL);
}
static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@ -644,6 +622,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
* that a VCPU sees new virtual interrupts.
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+ if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+ kvm_update_stolen_time(vcpu);
}
}
@@ -888,6 +869,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+ vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
@@ -1313,7 +1295,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
@@ -1347,7 +1329,7 @@ static void cpu_hyp_reinit(void)
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
- cpu_init_hyp_mode(NULL);
+ cpu_init_hyp_mode();
kvm_arm_init_debug();
@@ -1498,7 +1480,6 @@ static void teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
- hyp_cpu_pm_exit();
}
/**
@@ -1712,6 +1693,7 @@ int kvm_arch_init(void *opaque)
return 0;
out_hyp:
+ hyp_cpu_pm_exit();
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
new file mode 100644
index 000000000000..550dfa3e53cd
--- /dev/null
+++ b/virt/kvm/arm/hypercalls.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_psci.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+{
+ u32 func_id = smccc_get_function(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+ u32 feature;
+ gpa_t gpa;
+
+ switch (func_id) {
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ val = ARM_SMCCC_VERSION_1_1;
+ break;
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ feature = smccc_get_arg1(vcpu);
+ switch (feature) {
+ case ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (kvm_arm_harden_branch_predictor()) {
+ case KVM_BP_HARDEN_UNKNOWN:
+ break;
+ case KVM_BP_HARDEN_WA_NEEDED:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_BP_HARDEN_NOT_REQUIRED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (kvm_arm_have_ssbd()) {
+ case KVM_SSBD_FORCE_DISABLE:
+ case KVM_SSBD_UNKNOWN:
+ break;
+ case KVM_SSBD_KERNEL:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_SSBD_FORCE_ENABLE:
+ case KVM_SSBD_MITIGATED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = kvm_hypercall_pv_features(vcpu);
+ break;
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ gpa = kvm_init_stolen_time(vcpu);
+ if (gpa != GPA_INVALID)
+ val = gpa;
+ break;
+ default:
+ return kvm_psci_call(vcpu);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+}
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index a8a6a0c883f1..aedfcff99ac5 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -5,7 +5,6 @@
*/
#include <linux/kvm_host.h>
-#include <asm/kvm_mmio.h>
#include <asm/kvm_emulate.h>
#include <trace/events/kvm.h>
@@ -86,23 +85,29 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned int len;
int mask;
- if (!run->mmio.is_write) {
- len = run->mmio.len;
- if (len > sizeof(unsigned long))
- return -EINVAL;
+ /* Detect an already handled MMIO return */
+ if (unlikely(!vcpu->mmio_needed))
+ return 0;
+ vcpu->mmio_needed = 0;
+
+ if (!kvm_vcpu_dabt_iswrite(vcpu)) {
+ len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
- if (vcpu->arch.mmio_decode.sign_extend &&
+ if (kvm_vcpu_dabt_issext(vcpu) &&
len < sizeof(unsigned long)) {
mask = 1U << ((len * 8) - 1);
data = (data ^ mask) - mask;
}
+ if (!kvm_vcpu_dabt_issf(vcpu))
+ data = data & 0xffffffff;
+
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
&data);
data = vcpu_data_host_to_guest(vcpu, data, len);
- vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
+ vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data);
}
/*
@@ -114,33 +119,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
-{
- unsigned long rt;
- int access_size;
- bool sign_extend;
-
- if (kvm_vcpu_dabt_iss1tw(vcpu)) {
- /* page table accesses IO mem: tell guest to fix its TTBR */
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
-
- access_size = kvm_vcpu_dabt_get_as(vcpu);
- if (unlikely(access_size < 0))
- return access_size;
-
- *is_write = kvm_vcpu_dabt_iswrite(vcpu);
- sign_extend = kvm_vcpu_dabt_issext(vcpu);
- rt = kvm_vcpu_dabt_get_rd(vcpu);
-
- *len = access_size;
- vcpu->arch.mmio_decode.sign_extend = sign_extend;
- vcpu->arch.mmio_decode.rt = rt;
-
- return 0;
-}
-
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
@@ -152,20 +130,35 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
u8 data_buf[8];
/*
- * Prepare MMIO operation. First decode the syndrome data we get
- * from the CPU. Then try if some in-kernel emulation feels
- * responsible, otherwise let user space do its magic.
+ * No valid syndrome? Ask userspace for help if it has
+ * voluntered to do so, and bail out otherwise.
*/
- if (kvm_vcpu_dabt_isvalid(vcpu)) {
- ret = decode_hsr(vcpu, &is_write, &len);
- if (ret)
- return ret;
- } else {
- kvm_err("load/store instruction decoding not implemented\n");
+ if (!kvm_vcpu_dabt_isvalid(vcpu)) {
+ if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
+ run->exit_reason = KVM_EXIT_ARM_NISV;
+ run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
+ run->arm_nisv.fault_ipa = fault_ipa;
+ return 0;
+ }
+
+ kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
return -ENOSYS;
}
- rt = vcpu->arch.mmio_decode.rt;
+ /* Page table accesses IO mem: tell guest to fix its TTBR */
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ return 1;
+ }
+
+ /*
+ * Prepare MMIO operation. First decode the syndrome data we get
+ * from the CPU. Then try if some in-kernel emulation feels
+ * responsible, otherwise let user space do its magic.
+ */
+ is_write = kvm_vcpu_dabt_iswrite(vcpu);
+ len = kvm_vcpu_dabt_get_as(vcpu);
+ rt = kvm_vcpu_dabt_get_rd(vcpu);
if (is_write) {
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
@@ -188,6 +181,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
run->mmio.is_write = is_write;
run->mmio.phys_addr = fault_ipa;
run->mmio.len = len;
+ vcpu->mmio_needed = 1;
if (!ret) {
/* We handled the access successfully in the kernel. */
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 38b4c910b6c3..19c961ac4e3c 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -14,7 +14,6 @@
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
-#include <asm/kvm_mmio.h>
#include <asm/kvm_ras.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
@@ -38,6 +37,11 @@ static unsigned long io_map_base;
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
+static bool is_iomap(unsigned long flags)
+{
+ return flags & KVM_S2PTE_FLAG_IS_IOMAP;
+}
+
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1372,14 +1376,8 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
- struct page *page = pfn_to_page(pfn);
- /*
- * PageTransCompoundMap() returns true for THP and
- * hugetlbfs. Make sure the adjustment is done only for THP
- * pages.
- */
- if (!PageHuge(page) && PageTransCompoundMap(page)) {
+ if (kvm_is_transparent_hugepage(pfn)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
@@ -1591,16 +1589,8 @@ static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
__invalidate_icache_guest_page(pfn, size);
}
-static void kvm_send_hwpoison_signal(unsigned long address,
- struct vm_area_struct *vma)
+static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
{
- short lsb;
-
- if (is_vm_hugetlb_page(vma))
- lsb = huge_page_shift(hstate_vma(vma));
- else
- lsb = PAGE_SHIFT;
-
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
}
@@ -1673,6 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
+ short vma_shift;
kvm_pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
bool logging_active = memslot_is_logging(memslot);
@@ -1696,8 +1687,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- vma_pagesize = vma_kernel_pagesize(vma);
+ if (is_vm_hugetlb_page(vma))
+ vma_shift = huge_page_shift(hstate_vma(vma));
+ else
+ vma_shift = PAGE_SHIFT;
+
+ vma_pagesize = 1ULL << vma_shift;
if (logging_active ||
+ (vma->vm_flags & VM_PFNMAP) ||
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
force_pte = true;
vma_pagesize = PAGE_SIZE;
@@ -1735,7 +1732,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
if (pfn == KVM_PFN_ERR_HWPOISON) {
- kvm_send_hwpoison_signal(hva, vma);
+ kvm_send_hwpoison_signal(hva, vma_shift);
return 0;
}
if (is_error_noslot_pfn(pfn))
@@ -1760,6 +1757,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
+ if (exec_fault && is_iomap(flags))
+ return -ENOEXEC;
+
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1781,7 +1781,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
kvm_set_pfn_dirty(pfn);
- if (fault_status != FSC_PERM)
+ if (fault_status != FSC_PERM && !is_iomap(flags))
clean_dcache_guest_page(pfn, vma_pagesize);
if (exec_fault)
@@ -1948,9 +1948,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- goto out_unlock;
+ ret = -ENOEXEC;
+ goto out;
}
/*
@@ -1992,6 +1991,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
+out:
+ if (ret == -ENOEXEC) {
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ }
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2134,7 +2138,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
if (!kvm->arch.pgd)
return 0;
trace_kvm_test_age_hva(hva);
- return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+ return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
+ kvm_test_age_hva_handler, NULL);
}
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
@@ -2302,15 +2307,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
break;
/*
- * Mapping a read-only VMA is only allowed if the
- * memory region is configured as read-only.
- */
- if (writable && !(vma->vm_flags & VM_WRITE)) {
- ret = -EPERM;
- break;
- }
-
- /*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
diff --git a/virt/kvm/arm/perf.c b/virt/kvm/arm/perf.c
index 918cdc3839ea..d45b8b9a4415 100644
--- a/virt/kvm/arm/perf.c
+++ b/virt/kvm/arm/perf.c
@@ -13,14 +13,14 @@
static int kvm_is_in_guest(void)
{
- return kvm_arm_get_running_vcpu() != NULL;
+ return kvm_get_running_vcpu() != NULL;
}
static int kvm_is_user_mode(void)
{
struct kvm_vcpu *vcpu;
- vcpu = kvm_arm_get_running_vcpu();
+ vcpu = kvm_get_running_vcpu();
if (vcpu)
return !vcpu_mode_priv(vcpu);
@@ -32,7 +32,7 @@ static unsigned long kvm_get_guest_ip(void)
{
struct kvm_vcpu *vcpu;
- vcpu = kvm_arm_get_running_vcpu();
+ vcpu = kvm_get_running_vcpu();
if (vcpu)
return *vcpu_pc(vcpu);
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 362a01886bab..f0d0312c0a55 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -8,12 +8,15 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
@@ -74,6 +77,13 @@ static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
return pmc;
}
+static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
+{
+ if (kvm_pmu_idx_is_high_counter(pmc->idx))
+ return pmc - 1;
+ else
+ return pmc + 1;
+}
/**
* kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
@@ -146,8 +156,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(select_idx))
counter = upper_32_bits(counter);
-
- else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+ else if (select_idx != ARMV8_PMU_CYCLE_IDX)
counter = lower_32_bits(counter);
return counter;
@@ -193,7 +202,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
*/
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
- u64 counter, reg;
+ u64 counter, reg, val;
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (!pmc->perf_event)
@@ -201,16 +210,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_pmc_is_chained(pmc)) {
- reg = PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
- __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+ if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+ reg = PMCCNTR_EL0;
+ val = counter;
} else {
- reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+ val = lower_32_bits(counter);
}
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ if (kvm_pmu_pmc_is_chained(pmc))
+ __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+
kvm_pmu_release_perf_event(pmc);
}
@@ -235,10 +247,11 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
{
- int i;
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ int i;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for_each_set_bit(i, &mask, 32)
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
@@ -291,15 +304,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
pmc = &pmu->pmc[i];
- /*
- * For high counters of chained events we must recreate the
- * perf event with the long (64bit) attribute set.
- */
- if (kvm_pmu_pmc_is_chained(pmc) &&
- kvm_pmu_idx_is_high_counter(i)) {
- kvm_pmu_create_perf_event(vcpu, i);
- continue;
- }
+ /* A change in the enable state may affect the chain state */
+ kvm_pmu_update_pmc_chained(vcpu, i);
+ kvm_pmu_create_perf_event(vcpu, i);
/* At this point, pmc must be the canonical */
if (pmc->perf_event) {
@@ -332,15 +339,9 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
pmc = &pmu->pmc[i];
- /*
- * For high counters of chained events we must recreate the
- * perf event with the long (64bit) attribute unset.
- */
- if (kvm_pmu_pmc_is_chained(pmc) &&
- kvm_pmu_idx_is_high_counter(i)) {
- kvm_pmu_create_perf_event(vcpu, i);
- continue;
- }
+ /* A change in the enable state may affect the chain state */
+ kvm_pmu_update_pmc_chained(vcpu, i);
+ kvm_pmu_create_perf_event(vcpu, i);
/* At this point, pmc must be the canonical */
if (pmc->perf_event)
@@ -440,8 +441,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
+ u64 period;
+
+ cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * Reset the sample period to the architectural limit,
+ * i.e. the point where the counter overflows.
+ */
+ period = -(local64_read(&perf_event->count));
+
+ if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+ period &= GENMASK(31, 0);
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
@@ -449,6 +467,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
+
+ cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
}
/**
@@ -458,25 +478,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
*/
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
int i;
- u64 type, enable, reg;
- if (val == 0)
+ if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
return;
- enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+ /* Weed out disabled counters */
+ val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
+ u64 type, reg;
+
if (!(val & BIT(i)))
continue;
- type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
- & ARMV8_PMU_EVTYPE_EVENT;
- if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
- && (enable & BIT(i))) {
- reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+
+ /* PMSWINC only applies to ... SW_INC! */
+ type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
+ type &= ARMV8_PMU_EVTYPE_EVENT;
+ if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
+ continue;
+
+ /* increment this even SW_INC counter */
+ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+ reg = lower_32_bits(reg);
+ __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+
+ if (reg) /* no overflow on the low part */
+ continue;
+
+ if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
+ /* increment the high counter */
+ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
reg = lower_32_bits(reg);
- __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
- if (!reg)
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
+ if (!reg) /* mark overflow on the high counter */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
+ } else {
+ /* mark overflow on low counter */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
}
}
}
@@ -488,10 +528,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
- u64 mask;
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
int i;
- mask = kvm_pmu_valid_counter_mask(vcpu);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
@@ -503,7 +542,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
- for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
+ for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
}
@@ -560,19 +599,18 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ if (kvm_pmu_pmc_is_chained(pmc)) {
/**
* The initial sample period (overflow count) of an event. For
* chained counters we only support overflow interrupts on the
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
pmc + 1);
-
- if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
- attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
} else {
/* The initial sample period (overflow count) of an event. */
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
@@ -599,25 +637,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
* @select_idx: The number of selected counter
*
* Update the chained bitmap based on the event type written in the
- * typer register.
+ * typer register and the enable state of the odd register.
*/
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
+ bool new_state, old_state;
- if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ old_state = kvm_pmu_pmc_is_chained(pmc);
+ new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
+ kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
+
+ if (old_state == new_state)
+ return;
+
+ canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
+ kvm_pmu_stop_counter(vcpu, canonical_pmc);
+ if (new_state) {
/*
* During promotion from !chained to chained we must ensure
* the adjacent counter is stopped and its event destroyed
*/
- if (!kvm_pmu_pmc_is_chained(pmc))
- kvm_pmu_stop_counter(vcpu, pmc);
-
+ kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
- } else {
- clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
+ return;
}
+ clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}
/**
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 87927f7e1ee7..17e2bdd4b76f 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -15,6 +15,7 @@
#include <asm/kvm_host.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
/*
* This is an implementation of the Power State Coordination Interface
@@ -23,38 +24,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 0);
-}
-
-static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 1);
-}
-
-static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 2);
-}
-
-static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 3);
-}
-
-static void smccc_set_retval(struct kvm_vcpu *vcpu,
- unsigned long a0,
- unsigned long a1,
- unsigned long a2,
- unsigned long a3)
-{
- vcpu_set_reg(vcpu, 0, a0);
- vcpu_set_reg(vcpu, 1, a1);
- vcpu_set_reg(vcpu, 2, a2);
- vcpu_set_reg(vcpu, 3, a3);
-}
-
static unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
@@ -373,7 +342,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
* Errors:
* -EINVAL: Unrecognized PSCI function
*/
-static int kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
{
switch (kvm_psci_version(vcpu, vcpu->kvm)) {
case KVM_ARM_PSCI_1_0:
@@ -387,55 +356,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
};
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
-{
- u32 func_id = smccc_get_function(vcpu);
- u32 val = SMCCC_RET_NOT_SUPPORTED;
- u32 feature;
-
- switch (func_id) {
- case ARM_SMCCC_VERSION_FUNC_ID:
- val = ARM_SMCCC_VERSION_1_1;
- break;
- case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
- feature = smccc_get_arg1(vcpu);
- switch(feature) {
- case ARM_SMCCC_ARCH_WORKAROUND_1:
- switch (kvm_arm_harden_branch_predictor()) {
- case KVM_BP_HARDEN_UNKNOWN:
- break;
- case KVM_BP_HARDEN_WA_NEEDED:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_BP_HARDEN_NOT_REQUIRED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- case ARM_SMCCC_ARCH_WORKAROUND_2:
- switch (kvm_arm_have_ssbd()) {
- case KVM_SSBD_FORCE_DISABLE:
- case KVM_SSBD_UNKNOWN:
- break;
- case KVM_SSBD_KERNEL:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_SSBD_FORCE_ENABLE:
- case KVM_SSBD_MITIGATED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- }
- break;
- default:
- return kvm_psci_call(vcpu);
- }
-
- smccc_set_retval(vcpu, val, 0, 0, 0);
- return 1;
-}
-
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
return 3; /* PSCI version and two workaround registers */
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
new file mode 100644
index 000000000000..1e0f4c284888
--- /dev/null
+++ b/virt/kvm/arm/pvtime.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+#include <asm/pvclock-abi.h>
+
+#include <kvm/arm_hypercalls.h>
+
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 steal;
+ __le64 steal_le;
+ u64 offset;
+ int idx;
+ u64 base = vcpu->arch.steal.base;
+
+ if (base == GPA_INVALID)
+ return;
+
+ /* Let's do the local bookkeeping */
+ steal = vcpu->arch.steal.steal;
+ steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+ vcpu->arch.steal.steal = steal;
+
+ steal_le = cpu_to_le64(steal);
+ idx = srcu_read_lock(&kvm->srcu);
+ offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+ kvm_put_guest(kvm, base + offset, steal_le, u64);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+ u32 feature = smccc_get_arg1(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+
+ switch (feature) {
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+
+ return val;
+}
+
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_vcpu_stolen_time init_values = {};
+ struct kvm *kvm = vcpu->kvm;
+ u64 base = vcpu->arch.steal.base;
+ int idx;
+
+ if (base == GPA_INVALID)
+ return base;
+
+ /*
+ * Start counting stolen time from the time the guest requests
+ * the feature enabled.
+ */
+ vcpu->arch.steal.steal = 0;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return base;
+}
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+ u64 ipa;
+ int ret = 0;
+ int idx;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ if (get_user(ipa, user))
+ return -EFAULT;
+ if (!IS_ALIGNED(ipa, 64))
+ return -EINVAL;
+ if (vcpu->arch.steal.base != GPA_INVALID)
+ return -EEXIST;
+
+ /* Check the address is in a valid memslot */
+ idx = srcu_read_lock(&kvm->srcu);
+ if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT)))
+ ret = -EINVAL;
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (!ret)
+ vcpu->arch.steal.base = ipa;
+
+ return ret;
+}
+
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ u64 ipa;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ ipa = vcpu->arch.steal.base;
+
+ if (put_user(ipa, user))
+ return -EFAULT;
+ return 0;
+}
+
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_PVTIME_IPA:
+ return 0;
+ }
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h
index 55fed77a9f73..4fd4f6db181b 100644
--- a/virt/kvm/arm/vgic/trace.h
+++ b/virt/kvm/arm/vgic/trace.h
@@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending,
#endif /* _TRACE_VGIC_H */
#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic
+#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index bdbc297d06fb..a963b9d766b7 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -8,6 +8,7 @@
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include "vgic.h"
@@ -53,6 +54,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
struct vgic_dist *dist = &kvm->arch.vgic;
INIT_LIST_HEAD(&dist->lpi_list_head);
+ INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock);
}
@@ -68,7 +70,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, vcpu_lock_idx = -1, ret;
+ int i, ret;
struct kvm_vcpu *vcpu;
if (irqchip_in_kernel(kvm))
@@ -84,17 +86,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run while we create the vgic.
- */
ret = -EBUSY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!mutex_trylock(&vcpu->mutex))
- goto out_unlock;
- vcpu_lock_idx = i;
- }
+ if (!lock_all_vcpus(kvm))
+ return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.has_run_once)
@@ -123,10 +117,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&vcpu->mutex);
- }
+ unlock_all_vcpus(kvm);
return ret;
}
@@ -164,12 +155,19 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
irq->vcpu = NULL;
irq->target_vcpu = vcpu0;
kref_init(&irq->refcount);
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->targets = 0;
irq->group = 0;
- } else {
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->mpidr = 0;
irq->group = 1;
+ break;
+ default:
+ kfree(dist->spis);
+ dist->spis = NULL;
+ return -EINVAL;
}
}
return 0;
@@ -192,10 +190,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
int i;
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
- vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
/*
* Enable and configure all SGIs to be edge-triggered and
@@ -209,7 +207,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
irq->intid = i;
irq->vcpu = NULL;
irq->target_vcpu = vcpu;
- irq->targets = 1U << vcpu->vcpu_id;
kref_init(&irq->refcount);
if (vgic_irq_is_sgi(i)) {
/* SGIs */
@@ -219,11 +216,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
/* PPIs */
irq->config = VGIC_CONFIG_LEVEL;
}
-
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
- irq->group = 1;
- else
- irq->group = 0;
}
if (!irqchip_in_kernel(vcpu->kvm))
@@ -286,14 +278,24 @@ int vgic_init(struct kvm *kvm)
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->group = 1;
- else
+ irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->group = 0;
+ irq->targets = 1U << idx;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
}
}
if (vgic_has_its(kvm)) {
+ vgic_lpi_translation_cache_init(kvm);
ret = vgic_v4_init(kvm);
if (ret)
goto out;
@@ -335,6 +337,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
INIT_LIST_HEAD(&dist->rd_regions);
}
+ if (vgic_has_its(kvm))
+ vgic_lpi_translation_cache_destroy(kvm);
+
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
}
@@ -505,7 +510,7 @@ int kvm_vgic_hyp_init(void)
break;
default:
ret = -ENODEV;
- };
+ }
if (ret)
return ret;
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index c9304b88e720..d8cdfea5cc96 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -66,6 +66,15 @@ out:
return r;
}
+static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm_msi *msi)
+{
+ msi->address_lo = e->msi.address_lo;
+ msi->address_hi = e->msi.address_hi;
+ msi->data = e->msi.data;
+ msi->flags = e->msi.flags;
+ msi->devid = e->msi.devid;
+}
/**
* kvm_set_msi: inject the MSI corresponding to the
* MSI routing entry
@@ -79,21 +88,36 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
{
struct kvm_msi msi;
- msi.address_lo = e->msi.address_lo;
- msi.address_hi = e->msi.address_hi;
- msi.data = e->msi.data;
- msi.flags = e->msi.flags;
- msi.devid = e->msi.devid;
-
if (!vgic_has_its(kvm))
return -ENODEV;
if (!level)
return -1;
+ kvm_populate_msi(e, &msi);
return vgic_its_inject_msi(kvm, &msi);
}
+/**
+ * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
+ *
+ * Currently only direct MSI injection is supported.
+ */
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) {
+ struct kvm_msi msi;
+
+ kvm_populate_msi(e, &msi);
+ if (!vgic_its_inject_cached_translation(kvm, &msi))
+ return 0;
+ }
+
+ return -EWOULDBLOCK;
+}
+
int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
{
struct kvm_irq_routing_entry *entries;
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 482036612adf..d53d34a33e35 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -138,6 +138,14 @@ struct its_ite {
u32 event_id;
};
+struct vgic_translation_cache_entry {
+ struct list_head entry;
+ phys_addr_t db;
+ u32 devid;
+ u32 eventid;
+ struct vgic_irq *irq;
+};
+
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -352,7 +360,10 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (map.vpe)
+ atomic_dec(&map.vpe->vlpi_count);
map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_inc(&map.vpe->vlpi_count);
ret = its_map_vlpi(irq->host_irq, &map);
}
@@ -527,6 +538,127 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
+static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
+ phys_addr_t db,
+ u32 devid, u32 eventid)
+{
+ struct vgic_translation_cache_entry *cte;
+
+ list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
+ /*
+ * If we hit a NULL entry, there is nothing after this
+ * point.
+ */
+ if (!cte->irq)
+ break;
+
+ if (cte->db != db || cte->devid != devid ||
+ cte->eventid != eventid)
+ continue;
+
+ /*
+ * Move this entry to the head, as it is the most
+ * recently used.
+ */
+ if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
+ list_move(&cte->entry, &dist->lpi_translation_cache);
+
+ return cte->irq;
+ }
+
+ return NULL;
+}
+
+static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
+ u32 devid, u32 eventid)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+
+ return irq;
+}
+
+static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid,
+ struct vgic_irq *irq)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_translation_cache_entry *cte;
+ unsigned long flags;
+ phys_addr_t db;
+
+ /* Do not cache a directly injected interrupt */
+ if (irq->hw)
+ return;
+
+ raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+
+ if (unlikely(list_empty(&dist->lpi_translation_cache)))
+ goto out;
+
+ /*
+ * We could have raced with another CPU caching the same
+ * translation behind our back, so let's check it is not in
+ * already
+ */
+ db = its->vgic_its_base + GITS_TRANSLATER;
+ if (__vgic_its_check_cache(dist, db, devid, eventid))
+ goto out;
+
+ /* Always reuse the last entry (LRU policy) */
+ cte = list_last_entry(&dist->lpi_translation_cache,
+ typeof(*cte), entry);
+
+ /*
+ * Caching the translation implies having an extra reference
+ * to the interrupt, so drop the potential reference on what
+ * was in the cache, and increment it on the new interrupt.
+ */
+ if (cte->irq)
+ __vgic_put_lpi_locked(kvm, cte->irq);
+
+ vgic_get_irq_kref(irq);
+
+ cte->db = db;
+ cte->devid = devid;
+ cte->eventid = eventid;
+ cte->irq = irq;
+
+ /* Move the new translation to the head of the list */
+ list_move(&cte->entry, &dist->lpi_translation_cache);
+
+out:
+ raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+}
+
+void vgic_its_invalidate_cache(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_translation_cache_entry *cte;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+
+ list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
+ /*
+ * If we hit a NULL entry, there is nothing after this
+ * point.
+ */
+ if (!cte->irq)
+ break;
+
+ __vgic_put_lpi_locked(kvm, cte->irq);
+ cte->irq = NULL;
+ }
+
+ raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+}
+
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq)
{
@@ -547,6 +679,8 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu->arch.vgic_cpu.lpis_enabled)
return -EBUSY;
+ vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
+
*irq = ite->irq;
return 0;
}
@@ -608,6 +742,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
return 0;
}
+int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
+{
+ struct vgic_irq *irq;
+ unsigned long flags;
+ phys_addr_t db;
+
+ db = (u64)msi->address_hi << 32 | msi->address_lo;
+ irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
+
+ if (!irq)
+ return -1;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(kvm, irq, flags);
+
+ return 0;
+}
+
/*
* Queries the KVM IO bus framework to get the ITS pointer from the given
* doorbell address.
@@ -619,6 +772,9 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
struct vgic_its *its;
int ret;
+ if (!vgic_its_inject_cached_translation(kvm, msi))
+ return 1;
+
its = vgic_msi_to_its(kvm, msi);
if (IS_ERR(its))
return PTR_ERR(its);
@@ -683,14 +839,15 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
u32 event_id = its_cmd_get_id(its_cmd);
struct its_ite *ite;
-
ite = find_ite(its, device_id, event_id);
- if (ite && ite->collection) {
+ if (ite && its_is_collection_mapped(ite->collection)) {
/*
* Though the spec talks about removing the pending state, we
* don't bother here since we clear the ITTE anyway and the
* pending state is a property of the ITTE struct.
*/
+ vgic_its_invalidate_cache(kvm);
+
its_free_ite(kvm, ite);
return 0;
}
@@ -726,6 +883,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->collection = collection;
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vgic_its_invalidate_cache(kvm);
+
return update_affinity(ite->irq, vcpu);
}
@@ -954,6 +1113,8 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
its_free_ite(kvm, ite);
+ vgic_its_invalidate_cache(kvm);
+
list_del(&device->dev_list);
kfree(device);
}
@@ -1059,6 +1220,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!valid) {
vgic_its_free_collection(its, coll_id);
+ vgic_its_invalidate_cache(kvm);
} else {
collection = find_collection(its, coll_id);
@@ -1207,6 +1369,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vgic_put_irq(kvm, irq);
}
+ vgic_its_invalidate_cache(kvm);
+
kfree(intids);
return 0;
}
@@ -1557,6 +1721,8 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
goto out;
its->enabled = !!(val & GITS_CTLR_ENABLE);
+ if (!its->enabled)
+ vgic_its_invalidate_cache(kvm);
/*
* Try to process any pending commands. This function bails out early
@@ -1657,6 +1823,47 @@ out:
return ret;
}
+/* Default is 16 cached LPIs per vcpu */
+#define LPI_DEFAULT_PCPU_CACHE_SIZE 16
+
+void vgic_lpi_translation_cache_init(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned int sz;
+ int i;
+
+ if (!list_empty(&dist->lpi_translation_cache))
+ return;
+
+ sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
+
+ for (i = 0; i < sz; i++) {
+ struct vgic_translation_cache_entry *cte;
+
+ /* An allocation failure is not fatal */
+ cte = kzalloc(sizeof(*cte), GFP_KERNEL);
+ if (WARN_ON(!cte))
+ break;
+
+ INIT_LIST_HEAD(&cte->entry);
+ list_add(&cte->entry, &dist->lpi_translation_cache);
+ }
+}
+
+void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_translation_cache_entry *cte, *tmp;
+
+ vgic_its_invalidate_cache(kvm);
+
+ list_for_each_entry_safe(cte, tmp,
+ &dist->lpi_translation_cache, entry) {
+ list_del(&cte->entry);
+ kfree(cte);
+ }
+}
+
#define INITIAL_BASER_VALUE \
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
@@ -1685,6 +1892,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
kfree(its);
return ret;
}
+
+ vgic_lpi_translation_cache_init(dev->kvm);
}
mutex_init(&its->its_lock);
@@ -2265,7 +2474,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
coll_id = val & KVM_ITS_CTE_ICID_MASK;
- if (target_addr >= atomic_read(&kvm->online_vcpus))
+ if (target_addr != COLLECTION_NOT_MAPPED &&
+ target_addr >= atomic_read(&kvm->online_vcpus))
return -EINVAL;
collection = find_collection(its, coll_id);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index c45e2d7e942f..ebc218840fc2 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -192,8 +192,10 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
- if (was_enabled && !vgic_cpu->lpis_enabled)
+ if (was_enabled && !vgic_cpu->lpis_enabled) {
vgic_flush_pending_lpis(vcpu);
+ vgic_its_invalidate_cache(vcpu->kvm);
+ }
if (!was_enabled && vgic_cpu->lpis_enabled)
vgic_enable_lpis(vcpu);
@@ -412,8 +414,11 @@ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u64 value = vgic_cpu->pendbaser;
+
+ value &= ~GICR_PENDBASER_PTZ;
- return extract_bytes(vgic_cpu->pendbaser, addr & 7, len);
+ return extract_bytes(value, addr & 7, len);
}
static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
@@ -515,7 +520,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit),
};
-static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
+static const struct vgic_register_region vgic_v3_rd_registers[] = {
+ /* RD_base registers */
REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
VGIC_ACCESS_32bit),
@@ -540,44 +546,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
VGIC_ACCESS_32bit),
-};
-
-static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
- REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
+ /* SGI_base registers */
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
vgic_mmio_read_group, vgic_mmio_write_group, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive,
NULL, vgic_mmio_uaccess_write_sactive,
4, VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_cactive,
NULL, vgic_mmio_uaccess_write_cactive,
4, VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
vgic_mmio_read_config, vgic_mmio_write_config, 8,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
+ REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
};
@@ -607,9 +611,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
struct vgic_dist *vgic = &kvm->arch.vgic;
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
- struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
struct vgic_redist_region *rdreg;
- gpa_t rd_base, sgi_base;
+ gpa_t rd_base;
int ret;
if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
@@ -631,52 +634,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
vgic_cpu->rdreg = rdreg;
rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
- sgi_base = rd_base + SZ_64K;
kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
rd_dev->base_addr = rd_base;
rd_dev->iodev_type = IODEV_REDIST;
- rd_dev->regions = vgic_v3_rdbase_registers;
- rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+ rd_dev->regions = vgic_v3_rd_registers;
+ rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
rd_dev->redist_vcpu = vcpu;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
- SZ_64K, &rd_dev->dev);
+ 2 * SZ_64K, &rd_dev->dev);
mutex_unlock(&kvm->slots_lock);
if (ret)
return ret;
- kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
- sgi_dev->base_addr = sgi_base;
- sgi_dev->iodev_type = IODEV_REDIST;
- sgi_dev->regions = vgic_v3_sgibase_registers;
- sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
- sgi_dev->redist_vcpu = vcpu;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
- SZ_64K, &sgi_dev->dev);
- if (ret) {
- kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
- &rd_dev->dev);
- goto out;
- }
-
rdreg->free_index++;
-out:
- mutex_unlock(&kvm->slots_lock);
- return ret;
+ return 0;
}
static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
{
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
- struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
- kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
}
static int vgic_register_all_redist_iodevs(struct kvm *kvm)
@@ -826,8 +808,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
iodev.base_addr = 0;
break;
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
- iodev.regions = vgic_v3_rdbase_registers;
- iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+ iodev.regions = vgic_v3_rd_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
iodev.base_addr = 0;
break;
}
@@ -985,21 +967,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val)
{
struct vgic_io_device rd_dev = {
- .regions = vgic_v3_rdbase_registers,
- .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
- };
-
- struct vgic_io_device sgi_dev = {
- .regions = vgic_v3_sgibase_registers,
- .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
+ .regions = vgic_v3_rd_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
};
- /* SGI_base is the next 64K frame after RD_base */
- if (offset >= SZ_64K)
- return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
- val);
- else
- return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+ return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
}
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 44efc2ff863f..97fb2a40e6ba 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -179,27 +179,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
-/*
- * This function will return the VCPU that performed the MMIO access and
- * trapped from within the VM, and will return NULL if this is a userspace
- * access.
- *
- * We can disable preemption locally around accessing the per-CPU variable,
- * and use the resolved vcpu pointer after enabling preemption again, because
- * even if the current thread is migrated to another CPU, reading the per-CPU
- * value later will give us the same value as we update the per-CPU variable
- * in the preempt notifier handlers.
- */
-static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
-{
- struct kvm_vcpu *vcpu;
-
- preempt_disable();
- vcpu = kvm_arm_get_running_vcpu();
- preempt_enable();
- return vcpu;
-}
-
/* Must be called with irq->irq_lock held */
static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool is_uaccess)
@@ -211,11 +190,17 @@ static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
vgic_irq_set_phys_active(irq, true);
}
+static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+{
+ return (vgic_irq_is_sgi(irq->intid) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
+}
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- bool is_uaccess = !vgic_get_mmio_requester_vcpu();
+ bool is_uaccess = !kvm_get_running_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -223,6 +208,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ISPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw)
vgic_hw_irq_spending(vcpu, irq, is_uaccess);
@@ -262,7 +253,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- bool is_uaccess = !vgic_get_mmio_requester_vcpu();
+ bool is_uaccess = !kvm_get_running_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -270,6 +261,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ICPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw)
@@ -317,7 +314,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool active)
{
unsigned long flags;
- struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
+ struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu();
raw_spin_lock_irqsave(&irq->irq_lock, flags);
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 836f418f1ee8..5af2aefad435 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -98,11 +98,6 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.uaccess_write = uwr, \
}
-int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
- struct vgic_register_region *reg_desc,
- struct vgic_io_device *region,
- int nr_irqs, bool offset_private);
-
unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 96aab77d0471..621cc168fe3f 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -184,7 +184,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
@@ -354,10 +357,11 @@ out:
DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap);
/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node: pointer to the DT node
+ * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller
+ * @info: pointer to the GIC description
*
- * Returns 0 if a GICv2 has been found, returns an error code otherwise
+ * Returns 0 if the VGICv2 has been probed successfully, returns an error code
+ * otherwise
*/
int vgic_v2_probe(const struct gic_kvm_info *info)
{
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 0c653a1e5215..f45635a6f0ec 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -167,7 +167,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
@@ -354,14 +357,14 @@ retry:
}
/**
- * vgic_its_save_pending_tables - Save the pending tables into guest RAM
+ * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
* kvm lock and all vcpu lock must be held
*/
int vgic_v3_save_pending_tables(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- int last_byte_offset = -1;
struct vgic_irq *irq;
+ gpa_t last_ptr = ~(gpa_t)0;
int ret;
u8 val;
@@ -381,11 +384,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bit_nr = irq->intid % BITS_PER_BYTE;
ptr = pendbase + byte_offset;
- if (byte_offset != last_byte_offset) {
+ if (ptr != last_ptr) {
ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
- last_byte_offset = byte_offset;
+ last_ptr = ptr;
}
stored = val & (1U << bit_nr);
@@ -570,10 +573,11 @@ static int __init early_gicv4_enable(char *buf)
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
/**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
+ * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
+ * @info: pointer to the GIC description
*
- * Returns 0 if a GICv3 has been found, returns an error code otherwise
+ * Returns 0 if the VGICv3 has been probed successfully, returns an error code
+ * otherwise
*/
int vgic_v3_probe(const struct gic_kvm_info *info)
{
@@ -660,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (has_vhe())
__vgic_v3_activate_traps(vcpu);
+
+ WARN_ON(vgic_v4_load(vcpu));
}
void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
@@ -672,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
+ WARN_ON(vgic_v4_put(vcpu, false));
+
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 477af6aebb97..46f875589c47 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;
+ /* We got the message, no need to fire again */
+ if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
+ disable_irq_nosync(irq);
+
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
@@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
{
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ struct irq_desc *desc = irq_to_desc(vpe->irq);
+
+ if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
+ /*
+ * If blocking, a doorbell is required. Undo the nested
+ * disable_irq() calls...
+ */
+ while (need_db && irqd_irq_disabled(&desc->irq_data))
+ enable_irq(vpe->irq);
+
+ return its_schedule_vpe(vpe, false);
}
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_load(struct kvm_vcpu *vcpu)
{
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
/*
@@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* doc in drivers/irqchip/irq-gic-v4.c to understand how this
* turns into a VMOVP command at the ITS level.
*/
- err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
+ err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
if (err)
return err;
- err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
+ /* Disabled the doorbell, as we're about to enter the guest */
+ disable_irq_nosync(vpe->irq);
+
+ err = its_schedule_vpe(vpe, true);
if (err)
return err;
@@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* Now that the VPE is resident, let's get rid of a potential
* doorbell interrupt that would still be pending.
*/
- err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
-
- return err;
+ return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
}
static struct vgic_its *vgic_get_its(struct kvm *kvm,
@@ -266,7 +281,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
mutex_lock(&its->its_lock);
- /* Perform then actual DevID/EventID -> LPI translation. */
+ /* Perform the actual DevID/EventID -> LPI translation. */
ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq);
if (ret)
@@ -294,6 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
irq->hw = true;
irq->host_irq = virq;
+ atomic_inc(&map.vpe->vlpi_count);
out:
mutex_unlock(&its->its_lock);
@@ -327,6 +343,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
WARN_ON(!(irq->hw && irq->host_irq == virq));
if (irq->hw) {
+ atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
ret = its_unmap_vlpi(virq);
}
@@ -335,21 +352,3 @@ out:
mutex_unlock(&its->its_lock);
return ret;
}
-
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- enable_irq(irq);
- }
-}
-
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- disable_irq(irq);
- }
-}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 13d4b38a94ec..99b02ca730a8 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -119,6 +119,22 @@ static void vgic_irq_release(struct kref *ref)
{
}
+/*
+ * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
+ */
+void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ if (!kref_put(&irq->refcount, vgic_irq_release))
+ return;
+
+ list_del(&irq->lpi_list);
+ dist->lpi_list_count--;
+
+ kfree(irq);
+}
+
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
@@ -128,16 +144,8 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
return;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- if (!kref_put(&irq->refcount, vgic_irq_release)) {
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
- return;
- };
-
- list_del(&irq->lpi_list);
- dist->lpi_list_count--;
+ __vgic_put_lpi_locked(kvm, irq);
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-
- kfree(irq);
}
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
@@ -254,6 +262,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
bool penda, pendb;
int ret;
+ /*
+ * list_sort may call this function with the same element when
+ * the list is fairly long.
+ */
+ if (unlikely(irqa == irqb))
+ return 0;
+
raw_spin_lock(&irqa->irq_lock);
raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
@@ -842,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- WARN_ON(vgic_v4_sync_hwstate(vcpu));
-
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -867,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
- WARN_ON(vgic_v4_flush_hwstate(vcpu));
-
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 797e05004d80..c7fefd6b1c80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -161,6 +161,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
+void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
@@ -307,11 +308,13 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
+int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
+void vgic_lpi_translation_cache_init(struct kvm *kvm);
+void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
+void vgic_its_invalidate_cache(struct kvm *kvm);
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
#endif
OpenPOWER on IntegriCloud