summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arch_timer.c138
-rw-r--r--virt/kvm/arm/arm.c190
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c1
-rw-r--r--virt/kvm/arm/mmu.c78
-rw-r--r--virt/kvm/arm/psci.c143
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c4
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c115
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c29
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c29
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c2
-rw-r--r--virt/kvm/arm/vgic/vgic.c41
-rw-r--r--virt/kvm/arm/vgic/vgic.h8
-rw-r--r--virt/kvm/eventfd.c12
-rw-r--r--virt/kvm/kvm_main.c114
16 files changed, 640 insertions, 275 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 70691c08e1ed..cca7e065a075 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -51,3 +51,6 @@ config KVM_COMPAT
config HAVE_KVM_IRQ_BYPASS
bool
+
+config HAVE_KVM_VCPU_ASYNC_IOCTL
+ bool
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index cc29a8148328..70268c0bec79 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,7 +92,6 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
struct arch_timer_context *vtimer;
- u32 cnt_ctl;
/*
* We may see a timer interrupt after vcpu_put() has been called which
@@ -104,15 +103,11 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
return IRQ_HANDLED;
vtimer = vcpu_vtimer(vcpu);
- if (!vtimer->irq.level) {
- cnt_ctl = read_sysreg_el0(cntv_ctl);
- cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
- ARCH_TIMER_CTRL_IT_MASK;
- if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
- kvm_timer_update_irq(vcpu, true, vtimer);
- }
+ if (kvm_timer_should_fire(vtimer))
+ kvm_timer_update_irq(vcpu, true, vtimer);
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ if (static_branch_unlikely(&userspace_irqchip_in_use) &&
+ unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_vtimer_update_mask_user(vcpu);
return IRQ_HANDLED;
@@ -238,6 +233,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
+ if (timer_ctx->loaded) {
+ u32 cnt_ctl;
+
+ /* Only the virtual timer can be loaded so far */
+ cnt_ctl = read_sysreg_el0(cntv_ctl);
+ return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
+ (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
+ !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
+ }
+
if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
@@ -252,15 +257,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- if (vtimer->irq.level || ptimer->irq.level)
- return true;
-
- /*
- * When this is called from withing the wait loop of kvm_vcpu_block(),
- * the software view of the timer state is up to date (timer->loaded
- * is false), and so we can simply check if the timer should fire now.
- */
- if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
+ if (kvm_timer_should_fire(vtimer))
return true;
return kvm_timer_should_fire(ptimer);
@@ -278,9 +275,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
/* Populate the device bitmap with the timer states */
regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
KVM_ARM_DEV_EL1_PTIMER);
- if (vtimer->irq.level)
+ if (kvm_timer_should_fire(vtimer))
regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
- if (ptimer->irq.level)
+ if (kvm_timer_should_fire(ptimer))
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
}
@@ -293,7 +290,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
+ likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_ctx->irq.level,
@@ -331,12 +329,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ bool level;
if (unlikely(!timer->enabled))
return;
- if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
- kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
+ /*
+ * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
+ * of its lifecycle is offloaded to the hardware, and we therefore may
+ * not have lowered the irq.level value before having to signal a new
+ * interrupt, but have to signal an interrupt every time the level is
+ * asserted.
+ */
+ level = kvm_timer_should_fire(vtimer);
+ kvm_timer_update_irq(vcpu, level, vtimer);
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
@@ -344,6 +350,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
phys_timer_emulate(vcpu);
}
+static void __timer_snapshot_state(struct arch_timer_context *timer)
+{
+ timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ timer->cnt_cval = read_sysreg_el0(cntv_cval);
+}
+
static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -355,10 +367,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
if (!vtimer->loaded)
goto out;
- if (timer->enabled) {
- vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
- }
+ if (timer->enabled)
+ __timer_snapshot_state(vtimer);
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
@@ -456,8 +466,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
bool phys_active;
int ret;
- phys_active = vtimer->irq.level ||
- kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+ phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
@@ -504,8 +513,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
- return vtimer->irq.level != vlevel ||
- ptimer->irq.level != plevel;
+ return kvm_timer_should_fire(vtimer) != vlevel ||
+ kvm_timer_should_fire(ptimer) != plevel;
}
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
@@ -537,54 +546,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
set_cntvoff(0);
}
-static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
+/*
+ * With a userspace irqchip we have to check if the guest de-asserted the
+ * timer and if so, unmask the timer irq signal on the host interrupt
+ * controller to ensure that we see future timer signals.
+ */
+static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- kvm_vtimer_update_mask_user(vcpu);
- return;
- }
-
- /*
- * If the guest disabled the timer without acking the interrupt, then
- * we must make sure the physical and virtual active states are in
- * sync by deactivating the physical interrupt, because otherwise we
- * wouldn't see the next timer interrupt in the host.
- */
- if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
- int ret;
- ret = irq_set_irqchip_state(host_vtimer_irq,
- IRQCHIP_STATE_ACTIVE,
- false);
- WARN_ON(ret);
+ __timer_snapshot_state(vtimer);
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ kvm_vtimer_update_mask_user(vcpu);
+ }
}
}
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if any of the timers have expired while we were running in the guest,
- * and inject an interrupt if that was the case.
- */
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
- /*
- * If we entered the guest with the vtimer output asserted we have to
- * check if the guest has modified the timer so that we should lower
- * the line at this point.
- */
- if (vtimer->irq.level) {
- vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
- if (!kvm_timer_should_fire(vtimer)) {
- kvm_timer_update_irq(vcpu, false, vtimer);
- unmask_vtimer_irq(vcpu);
- }
- }
+ unmask_vtimer_irq_user(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -818,6 +800,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
return true;
}
+bool kvm_arch_timer_get_input_level(int vintid)
+{
+ struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
+ struct arch_timer_context *timer;
+
+ if (vintid == vcpu_vtimer(vcpu)->irq.irq)
+ timer = vcpu_vtimer(vcpu);
+ else
+ BUG(); /* We only map the vtimer so far */
+
+ return kvm_timer_should_fire(timer);
+}
+
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -839,7 +834,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
+ ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+ kvm_arch_timer_get_input_level);
if (ret)
return ret;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 2e43f9d42bd5..86941f6181bb 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -31,6 +31,7 @@
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -46,15 +47,14 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
-#include <asm/kvm_psci.h>
#include <asm/sections.h>
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
+DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
/* Per-CPU variable containing the currently running vcpu. */
static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
@@ -71,17 +71,17 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
- BUG_ON(preemptible());
__this_cpu_write(kvm_arm_running_vcpu, vcpu);
}
+DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
/**
* kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
* Must be called from non-preemptible context
*/
struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
{
- BUG_ON(preemptible());
return __this_cpu_read(kvm_arm_running_vcpu);
}
@@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ static_branch_dec(&userspace_irqchip_in_use);
+
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@@ -354,7 +357,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
vcpu->cpu = cpu;
- vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
+ vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);
kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
@@ -381,17 +384,24 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ vcpu_load(vcpu);
+
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
@@ -400,10 +410,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu_power_off(vcpu);
break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ vcpu_put(vcpu);
+ return ret;
}
/**
@@ -509,7 +520,7 @@ static void update_vttbr(struct kvm *kvm)
pgd_phys = virt_to_phys(kvm->arch.pgd);
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
- kvm->arch.vttbr = pgd_phys | vmid;
+ kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
spin_unlock(&kvm_vmid_lock);
}
@@ -524,14 +535,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
vcpu->arch.has_run_once = true;
- /*
- * Map the VGIC hardware resources before running a vcpu the first
- * time on this VM.
- */
- if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
- ret = kvm_vgic_map_resources(kvm);
- if (ret)
- return ret;
+ if (likely(irqchip_in_kernel(kvm))) {
+ /*
+ * Map the VGIC hardware resources before running a vcpu the
+ * first time on this VM.
+ */
+ if (unlikely(!vgic_ready(kvm))) {
+ ret = kvm_vgic_map_resources(kvm);
+ if (ret)
+ return ret;
+ }
+ } else {
+ /*
+ * Tell the rest of the code that there are userspace irqchip
+ * VMs in the wild.
+ */
+ static_branch_inc(&userspace_irqchip_in_use);
}
ret = kvm_timer_enable(vcpu);
@@ -619,21 +638,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
+ vcpu_load(vcpu);
+
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
- return ret;
+ goto out;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
- return ret;
- if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
- return 0;
+ goto out;
+ if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
+ ret = 0;
+ goto out;
+ }
}
- if (run->immediate_exit)
- return -EINTR;
+ if (run->immediate_exit) {
+ ret = -EINTR;
+ goto out;
+ }
kvm_sigset_activate(vcpu);
@@ -666,19 +691,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vgic_flush_hwstate(vcpu);
/*
- * If we have a singal pending, or need to notify a userspace
- * irqchip about timer or PMU level changes, then we exit (and
- * update the timer level state in kvm_timer_update_run
- * below).
+ * Exit if we have a signal pending so that we can deliver the
+ * signal to user space.
*/
- if (signal_pending(current) ||
- kvm_timer_should_notify_user(vcpu) ||
- kvm_pmu_should_notify_user(vcpu)) {
+ if (signal_pending(current)) {
ret = -EINTR;
run->exit_reason = KVM_EXIT_INTR;
}
/*
+ * If we're using a userspace irqchip, then check if we need
+ * to tell a userspace irqchip about timer or PMU level
+ * changes and if so, exit to userspace (the actual level
+ * state gets updated in kvm_timer_update_run and
+ * kvm_pmu_update_run below).
+ */
+ if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+ if (kvm_timer_should_notify_user(vcpu) ||
+ kvm_pmu_should_notify_user(vcpu)) {
+ ret = -EINTR;
+ run->exit_reason = KVM_EXIT_INTR;
+ }
+ }
+
+ /*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -690,7 +726,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
kvm_pmu_sync_hwstate(vcpu);
- kvm_timer_sync_hwstate(vcpu);
+ if (static_branch_unlikely(&userspace_irqchip_in_use))
+ kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
preempt_enable();
@@ -704,9 +741,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
+ if (has_vhe())
+ kvm_arm_vhe_guest_enter();
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
+ if (has_vhe())
+ kvm_arm_vhe_guest_exit();
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
@@ -734,7 +775,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* we don't want vtimer interrupts to race with syncing the
* timer virtual interrupt state.
*/
- kvm_timer_sync_hwstate(vcpu);
+ if (static_branch_unlikely(&userspace_irqchip_in_use))
+ kvm_timer_sync_hwstate(vcpu);
/*
* We may have taken a host interrupt in HYP mode (ie
@@ -759,6 +801,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+ /* Exit types that need handling before we can be preempted */
+ handle_exit_early(vcpu, run, ret);
+
preempt_enable();
ret = handle_exit(vcpu, run, ret);
@@ -772,6 +817,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
+out:
+ vcpu_put(vcpu);
return ret;
}
@@ -987,66 +1034,88 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
+ long r;
+
+ vcpu_load(vcpu);
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
struct kvm_vcpu_init init;
+ r = -EFAULT;
if (copy_from_user(&init, argp, sizeof(init)))
- return -EFAULT;
+ break;
- return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+ r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
+ break;
}
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -ENOEXEC;
if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
+ break;
+ r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
- return -EFAULT;
+ break;
+
if (ioctl == KVM_SET_ONE_REG)
- return kvm_arm_set_reg(vcpu, &reg);
+ r = kvm_arm_set_reg(vcpu, &reg);
else
- return kvm_arm_get_reg(vcpu, &reg);
+ r = kvm_arm_get_reg(vcpu, &reg);
+ break;
}
case KVM_GET_REG_LIST: {
struct kvm_reg_list __user *user_list = argp;
struct kvm_reg_list reg_list;
unsigned n;
+ r = -ENOEXEC;
if (unlikely(!kvm_vcpu_initialized(vcpu)))
- return -ENOEXEC;
+ break;
+ r = -EFAULT;
if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
n = reg_list.n;
reg_list.n = kvm_arm_num_regs(vcpu);
if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
+ r = -E2BIG;
if (n < reg_list.n)
- return -E2BIG;
- return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+ break;
+ r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+ break;
}
case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_set_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_set_attr(vcpu, &attr);
+ break;
}
case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_get_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_get_attr(vcpu, &attr);
+ break;
}
case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
if (copy_from_user(&attr, argp, sizeof(attr)))
- return -EFAULT;
- return kvm_arm_vcpu_has_attr(vcpu, &attr);
+ break;
+ r = kvm_arm_vcpu_has_attr(vcpu, &attr);
+ break;
}
default:
- return -EINVAL;
+ r = -EINVAL;
}
+
+ vcpu_put(vcpu);
+ return r;
}
/**
@@ -1158,7 +1227,7 @@ static void cpu_init_hyp_mode(void *dummy)
pgd_ptr = kvm_mmu_get_httbr();
stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
- vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
+ vector_ptr = (unsigned long)kvm_get_hyp_vector();
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
@@ -1239,6 +1308,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
cpu_hyp_reset();
return NOTIFY_OK;
+ case CPU_PM_ENTER_FAILED:
case CPU_PM_EXIT:
if (__this_cpu_read(kvm_arm_hardware_enabled))
/* The hardware was enabled before suspend. */
@@ -1272,19 +1342,8 @@ static inline void hyp_cpu_pm_exit(void)
}
#endif
-static void teardown_common_resources(void)
-{
- free_percpu(kvm_host_cpu_state);
-}
-
static int init_common_resources(void)
{
- kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
- if (!kvm_host_cpu_state) {
- kvm_err("Cannot allocate host CPU state\n");
- return -ENOMEM;
- }
-
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1403,6 +1462,12 @@ static int init_hyp_mode(void)
goto out_err;
}
+ err = kvm_map_vectors();
+ if (err) {
+ kvm_err("Cannot map vectors\n");
+ goto out_err;
+ }
+
/*
* Map the Hyp stack pages
*/
@@ -1420,7 +1485,7 @@ static int init_hyp_mode(void)
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;
- cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+ cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
if (err) {
@@ -1544,7 +1609,6 @@ out_hyp:
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
- teardown_common_resources();
return err;
}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index d7fd46fe9efb..4fe6e797e8b3 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,6 +21,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
{
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b4b69c2d1012..ec62d1cccab7 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -621,7 +621,7 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
return 0;
}
-static int __create_hyp_mappings(pgd_t *pgdp,
+static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
@@ -634,7 +634,7 @@ static int __create_hyp_mappings(pgd_t *pgdp,
addr = start & PAGE_MASK;
end = PAGE_ALIGN(end);
do {
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
@@ -697,8 +697,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
int err;
phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
- err = __create_hyp_mappings(hyp_pgd, virt_addr,
- virt_addr + PAGE_SIZE,
+ err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
+ virt_addr, virt_addr + PAGE_SIZE,
__phys_to_pfn(phys_addr),
prot);
if (err)
@@ -729,7 +729,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
- return __create_hyp_mappings(hyp_pgd, start, end,
+ return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
@@ -924,6 +924,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
return 0;
}
+static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
+{
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pmdp = stage2_get_pmd(kvm, NULL, addr);
+ if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
+ return false;
+
+ if (pmd_thp_or_huge(*pmdp))
+ return kvm_s2pmd_exec(pmdp);
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
+ return false;
+
+ return kvm_s2pte_exec(ptep);
+}
+
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pte_t *new_pte,
unsigned long flags)
@@ -1255,10 +1274,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
- unsigned long size)
+static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
- __coherent_cache_guest_page(vcpu, pfn, size);
+ __clean_dcache_guest_page(pfn, size);
+}
+
+static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+ __invalidate_icache_guest_page(pfn, size);
}
static void kvm_send_hwpoison_signal(unsigned long address,
@@ -1284,7 +1307,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long fault_status)
{
int ret;
- bool write_fault, writable, hugetlb = false, force_pte = false;
+ bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
struct kvm *kvm = vcpu->kvm;
@@ -1296,7 +1319,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long flags = 0;
write_fault = kvm_is_write_fault(vcpu);
- if (fault_status == FSC_PERM && !write_fault) {
+ exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
+ VM_BUG_ON(write_fault && exec_fault);
+
+ if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1310,7 +1336,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma) && !logging_active) {
+ if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
@@ -1389,7 +1415,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
new_pmd = kvm_s2pmd_mkwrite(new_pmd);
kvm_set_pfn_dirty(pfn);
}
- coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
+
+ if (fault_status != FSC_PERM)
+ clean_dcache_guest_page(pfn, PMD_SIZE);
+
+ if (exec_fault) {
+ new_pmd = kvm_s2pmd_mkexec(new_pmd);
+ invalidate_icache_guest_page(pfn, PMD_SIZE);
+ } else if (fault_status == FSC_PERM) {
+ /* Preserve execute if XN was already cleared */
+ if (stage2_is_exec(kvm, fault_ipa))
+ new_pmd = kvm_s2pmd_mkexec(new_pmd);
+ }
+
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1399,7 +1437,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_set_pfn_dirty(pfn);
mark_page_dirty(kvm, gfn);
}
- coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
+
+ if (fault_status != FSC_PERM)
+ clean_dcache_guest_page(pfn, PAGE_SIZE);
+
+ if (exec_fault) {
+ new_pte = kvm_s2pte_mkexec(new_pte);
+ invalidate_icache_guest_page(pfn, PAGE_SIZE);
+ } else if (fault_status == FSC_PERM) {
+ /* Preserve execute if XN was already cleared */
+ if (stage2_is_exec(kvm, fault_ipa))
+ new_pte = kvm_s2pte_mkexec(new_pte);
+ }
+
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
}
@@ -1735,7 +1785,7 @@ static int kvm_map_idmap_text(pgd_t *pgd)
int err;
/* Create the idmap in the boot page tables */
- err = __create_hyp_mappings(pgd,
+ err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
hyp_idmap_start, hyp_idmap_end,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP_EXEC);
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index f1e363bab5e8..6919352cbf15 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -15,16 +15,16 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
#include <linux/preempt.h>
#include <linux/kvm_host.h>
#include <linux/wait.h>
#include <asm/cputype.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_psci.h>
#include <asm/kvm_host.h>
-#include <uapi/linux/psci.h>
+#include <kvm/arm_psci.h>
/*
* This is an implementation of the Power State Coordination Interface
@@ -33,6 +33,38 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+static u32 smccc_get_function(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 0);
+}
+
+static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 1);
+}
+
+static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 2);
+}
+
+static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 3);
+}
+
+static void smccc_set_retval(struct kvm_vcpu *vcpu,
+ unsigned long a0,
+ unsigned long a1,
+ unsigned long a2,
+ unsigned long a3)
+{
+ vcpu_set_reg(vcpu, 0, a0);
+ vcpu_set_reg(vcpu, 1, a1);
+ vcpu_set_reg(vcpu, 2, a2);
+ vcpu_set_reg(vcpu, 3, a3);
+}
+
static unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
@@ -78,7 +110,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
unsigned long context_id;
phys_addr_t target_pc;
- cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
+ cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK;
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
@@ -91,14 +123,14 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
if (!vcpu->arch.power_off) {
- if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+ if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON;
else
return PSCI_RET_INVALID_PARAMS;
}
- target_pc = vcpu_get_reg(source_vcpu, 2);
- context_id = vcpu_get_reg(source_vcpu, 3);
+ target_pc = smccc_get_arg2(source_vcpu);
+ context_id = smccc_get_arg3(source_vcpu);
kvm_reset_vcpu(vcpu);
@@ -117,7 +149,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
* NOTE: We always update r0 (or x0) because for PSCI v0.1
* the general puspose registers are undefined upon CPU_ON.
*/
- vcpu_set_reg(vcpu, 0, context_id);
+ smccc_set_retval(vcpu, context_id, 0, 0, 0);
vcpu->arch.power_off = false;
smp_mb(); /* Make sure the above is visible */
@@ -137,8 +169,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
- target_affinity = vcpu_get_reg(vcpu, 1);
- lowest_affinity_level = vcpu_get_reg(vcpu, 2);
+ target_affinity = smccc_get_arg1(vcpu);
+ lowest_affinity_level = smccc_get_arg2(vcpu);
/* Determine target affinity mask */
target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
@@ -200,18 +232,10 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
}
-int kvm_psci_version(struct kvm_vcpu *vcpu)
-{
- if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
- return KVM_ARM_PSCI_0_2;
-
- return KVM_ARM_PSCI_0_1;
-}
-
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
+ u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
int ret = 1;
@@ -221,7 +245,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
* Bits[31:16] = Major Version = 0
* Bits[15:0] = Minor Version = 2
*/
- val = 2;
+ val = KVM_ARM_PSCI_0_2;
break;
case PSCI_0_2_FN_CPU_SUSPEND:
case PSCI_0_2_FN64_CPU_SUSPEND:
@@ -278,14 +302,56 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
break;
}
- vcpu_set_reg(vcpu, 0, val);
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return ret;
+}
+
+static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
+{
+ u32 psci_fn = smccc_get_function(vcpu);
+ u32 feature;
+ unsigned long val;
+ int ret = 1;
+
+ switch(psci_fn) {
+ case PSCI_0_2_FN_PSCI_VERSION:
+ val = KVM_ARM_PSCI_1_0;
+ break;
+ case PSCI_1_0_FN_PSCI_FEATURES:
+ feature = smccc_get_arg1(vcpu);
+ switch(feature) {
+ case PSCI_0_2_FN_PSCI_VERSION:
+ case PSCI_0_2_FN_CPU_SUSPEND:
+ case PSCI_0_2_FN64_CPU_SUSPEND:
+ case PSCI_0_2_FN_CPU_OFF:
+ case PSCI_0_2_FN_CPU_ON:
+ case PSCI_0_2_FN64_CPU_ON:
+ case PSCI_0_2_FN_AFFINITY_INFO:
+ case PSCI_0_2_FN64_AFFINITY_INFO:
+ case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ case PSCI_0_2_FN_SYSTEM_OFF:
+ case PSCI_0_2_FN_SYSTEM_RESET:
+ case PSCI_1_0_FN_PSCI_FEATURES:
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ val = 0;
+ break;
+ default:
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
+ }
+ break;
+ default:
+ return kvm_psci_0_2_call(vcpu);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
return ret;
}
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
+ u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
switch (psci_fn) {
@@ -303,7 +369,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
break;
}
- vcpu_set_reg(vcpu, 0, val);
+ smccc_set_retval(vcpu, val, 0, 0, 0);
return 1;
}
@@ -321,9 +387,11 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
* Errors:
* -EINVAL: Unrecognized PSCI function
*/
-int kvm_psci_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_call(struct kvm_vcpu *vcpu)
{
- switch (kvm_psci_version(vcpu)) {
+ switch (kvm_psci_version(vcpu, vcpu->kvm)) {
+ case KVM_ARM_PSCI_1_0:
+ return kvm_psci_1_0_call(vcpu);
case KVM_ARM_PSCI_0_2:
return kvm_psci_0_2_call(vcpu);
case KVM_ARM_PSCI_0_1:
@@ -332,3 +400,30 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
return -EINVAL;
};
}
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+{
+ u32 func_id = smccc_get_function(vcpu);
+ u32 val = PSCI_RET_NOT_SUPPORTED;
+ u32 feature;
+
+ switch (func_id) {
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ val = ARM_SMCCC_VERSION_1_1;
+ break;
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ feature = smccc_get_arg1(vcpu);
+ switch(feature) {
+ case ARM_SMCCC_ARCH_WORKAROUND_1:
+ if (kvm_arm_harden_branch_predictor())
+ val = 0;
+ break;
+ }
+ break;
+ default:
+ return kvm_psci_call(vcpu);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 62310122ee78..743ca5cb05ef 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
- ret = vgic_v4_init(kvm);
- if (ret)
- goto out;
+ if (vgic_has_its(kvm)) {
+ ret = vgic_v4_init(kvm);
+ if (ret)
+ goto out;
+ }
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8e633bd9cc1e..465095355666 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
device = vgic_its_alloc_device(its, device_id, itt_addr,
num_eventid_bits);
- if (IS_ERR(device))
- return PTR_ERR(device);
- return 0;
+ return PTR_ERR_OR_ZERO(device);
}
/*
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index deb51ee16a3d..83d82bd7dc4e 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -16,6 +16,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <kvm/iodev.h>
+#include <kvm/arm_arch_timer.h>
#include <kvm/arm_vgic.h>
#include "vgic.h"
@@ -122,10 +123,43 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+/*
+ * This function will return the VCPU that performed the MMIO access and
+ * trapped from within the VM, and will return NULL if this is a userspace
+ * access.
+ *
+ * We can disable preemption locally around accessing the per-CPU variable,
+ * and use the resolved vcpu pointer after enabling preemption again, because
+ * even if the current thread is migrated to another CPU, reading the per-CPU
+ * value later will give us the same value as we update the per-CPU variable
+ * in the preempt notifier handlers.
+ */
+static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+
+ preempt_disable();
+ vcpu = kvm_arm_get_running_vcpu();
+ preempt_enable();
+ return vcpu;
+}
+
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->pending_latch = true;
+ vgic_irq_set_phys_active(irq, true);
+}
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ bool is_uaccess = !vgic_get_mmio_requester_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -134,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = true;
-
+ if (irq->hw)
+ vgic_hw_irq_spending(vcpu, irq, is_uaccess);
+ else
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
}
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->pending_latch = false;
+
+ /*
+ * We don't want the guest to effectively mask the physical
+ * interrupt by doing a write to SPENDR followed by a write to
+ * CPENDR for HW interrupts, so we clear the active state on
+ * the physical side if the virtual interrupt is not active.
+ * This may lead to taking an additional interrupt on the
+ * host, but that should not be a problem as the worst that
+ * can happen is an additional vgic injection. We also clear
+ * the pending state to maintain proper semantics for edge HW
+ * interrupts.
+ */
+ vgic_irq_set_phys_pending(irq, false);
+ if (!irq->active)
+ vgic_irq_set_phys_active(irq, false);
+}
+
void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ bool is_uaccess = !vgic_get_mmio_requester_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -154,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = false;
+ if (irq->hw)
+ vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
+ else
+ irq->pending_latch = false;
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
@@ -181,27 +246,24 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
return value;
}
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool active, bool is_uaccess)
+{
+ if (is_uaccess)
+ return;
+
+ irq->active = active;
+ vgic_irq_set_phys_active(irq, active);
+}
+
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- bool new_active_state)
+ bool active)
{
- struct kvm_vcpu *requester_vcpu;
unsigned long flags;
- spin_lock_irqsave(&irq->irq_lock, flags);
+ struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
- /*
- * The vcpu parameter here can mean multiple things depending on how
- * this function is called; when handling a trap from the kernel it
- * depends on the GIC version, and these functions are also called as
- * part of save/restore from userspace.
- *
- * Therefore, we have to figure out the requester in a reliable way.
- *
- * When accessing VGIC state from user space, the requester_vcpu is
- * NULL, which is fine, because we guarantee that no VCPUs are running
- * when accessing VGIC state from user space so irq->vcpu->cpu is
- * always -1.
- */
- requester_vcpu = kvm_arm_get_running_vcpu();
+ spin_lock_irqsave(&irq->irq_lock, flags);
/*
* If this virtual IRQ was written into a list register, we
@@ -213,14 +275,23 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
* vgic_change_active_prepare) and still has to sync back this IRQ,
* so we release and re-acquire the spin_lock to let the other thread
* sync back the IRQ.
+ *
+ * When accessing VGIC state from user space, requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
- irq->active = new_active_state;
- if (new_active_state)
+ if (irq->hw)
+ vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
+ else
+ irq->active = active;
+
+ if (irq->active)
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
else
spin_unlock_irqrestore(&irq->irq_lock, flags);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 80897102da26..c32d7b93ffd1 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only
+ * observe rising edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample
+ * the physical line and set the line level, because the
+ * device state could have changed or we simply need to
+ * process the still pending interrupt later.
+ *
+ * If this causes us to lower the level, we have to also clear
+ * the physical active state, since we will otherwise never be
+ * told when the interrupt becomes asserted again.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+
+ if (!irq->line_level)
+ vgic_irq_set_phys_active(irq, false);
+ }
+
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= GICH_LR_EOI;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only observe
+ * rising edges as input to the VGIC. We therefore lower the line
+ * level here, so that we can take new virtual IRQs. See
+ * vgic_v2_fold_lr_state for more info.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT))
+ irq->line_level = false;
+
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index f47e8481fa45..6b329414e57a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
irq->pending_latch = false;
}
+ /*
+ * Level-triggered mapped IRQs are special because we only
+ * observe rising edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample
+ * the physical line and set the line level, because the
+ * device state could have changed or we simply need to
+ * process the still pending interrupt later.
+ *
+ * If this causes us to lower the level, we have to also clear
+ * the physical active state, since we will otherwise never be
+ * told when the interrupt becomes asserted again.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+
+ if (!irq->line_level)
+ vgic_irq_set_phys_active(irq, false);
+ }
+
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -146,6 +166,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
/*
+ * Level-triggered mapped IRQs are special because we only observe
+ * rising edges as input to the VGIC. We therefore lower the line
+ * level here, so that we can take new virtual IRQs. See
+ * vgic_v3_fold_lr_state for more info.
+ */
+ if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
+ irq->line_level = false;
+
+ /*
* We currently only support Group1 interrupts, which is a
* known defect. This needs to be addressed at some point.
*/
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 4a37292855bc..bc4265154bac 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i, nr_vcpus, ret;
- if (!vgic_supports_direct_msis(kvm))
+ if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
if (dist->its_vm.vpes)
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index ecb8e25f5fe5..c7c5ef190afa 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -144,6 +144,38 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
kfree(irq);
}
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
+{
+ WARN_ON(irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ pending));
+}
+
+bool vgic_get_phys_line_level(struct vgic_irq *irq)
+{
+ bool line_level;
+
+ BUG_ON(!irq->hw);
+
+ if (irq->get_input_level)
+ return irq->get_input_level(irq->intid);
+
+ WARN_ON(irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &line_level));
+ return line_level;
+}
+
+/* Set/Clear the physical active state */
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
+{
+
+ BUG_ON(!irq->hw);
+ WARN_ON(irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_ACTIVE,
+ active));
+}
+
/**
* kvm_vgic_target_oracle - compute the target vcpu for an irq
*
@@ -413,7 +445,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- unsigned int host_irq)
+ unsigned int host_irq,
+ bool (*get_input_level)(int vindid))
{
struct irq_desc *desc;
struct irq_data *data;
@@ -433,6 +466,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
+ irq->get_input_level = get_input_level;
return 0;
}
@@ -441,10 +475,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
+ irq->get_input_level = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid)
+ u32 vintid, bool (*get_input_level)(int vindid))
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
@@ -453,7 +488,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index efbcf8f96f9c..12c37b89f7a3 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level;
}
+static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
+{
+ return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
+}
+
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -140,6 +145,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
+bool vgic_get_phys_line_level(struct vgic_irq *irq);
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags);
void vgic_kick_vcpus(struct kvm *kvm);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f2ac53ab8243..6e865e8b5b10 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -188,13 +188,13 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct kvm_kernel_irqfd *irqfd =
container_of(wait, struct kvm_kernel_irqfd, wait);
- unsigned long flags = (unsigned long)key;
+ __poll_t flags = key_to_poll(key);
struct kvm_kernel_irq_routing_entry irq;
struct kvm *kvm = irqfd->kvm;
unsigned seq;
int idx;
- if (flags & POLLIN) {
+ if (flags & EPOLLIN) {
idx = srcu_read_lock(&kvm->irq_srcu);
do {
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
@@ -208,7 +208,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
srcu_read_unlock(&kvm->irq_srcu, idx);
}
- if (flags & POLLHUP) {
+ if (flags & EPOLLHUP) {
/* The eventfd is closing, detach from KVM */
unsigned long flags;
@@ -287,7 +287,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
struct fd f;
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret;
- unsigned int events;
+ __poll_t events;
int idx;
if (!kvm_arch_intc_initialized(kvm))
@@ -399,12 +399,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
*/
events = f.file->f_op->poll(f.file, &irqfd->pt);
- if (events & POLLIN)
+ if (events & EPOLLIN)
schedule_work(&irqfd->inject);
/*
* do not drop the file until the irqfd is fully initialized, otherwise
- * we might race against the POLLHUP
+ * we might race against the EPOLLHUP
*/
fdput(f);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 210bf820385a..4501e658e8d6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -151,17 +151,12 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
-int vcpu_load(struct kvm_vcpu *vcpu)
+void vcpu_load(struct kvm_vcpu *vcpu)
{
- int cpu;
-
- if (mutex_lock_killable(&vcpu->mutex))
- return -EINTR;
- cpu = get_cpu();
+ int cpu = get_cpu();
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
- return 0;
}
EXPORT_SYMBOL_GPL(vcpu_load);
@@ -171,7 +166,6 @@ void vcpu_put(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_put(vcpu);
preempt_notifier_unregister(&vcpu->preempt_notifier);
preempt_enable();
- mutex_unlock(&vcpu->mutex);
}
EXPORT_SYMBOL_GPL(vcpu_put);
@@ -476,6 +470,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+ .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
@@ -1322,17 +1317,6 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
-static int get_user_page_nowait(unsigned long start, int write,
- struct page **page)
-{
- int flags = FOLL_NOWAIT | FOLL_HWPOISON;
-
- if (write)
- flags |= FOLL_WRITE;
-
- return get_user_pages(start, 1, flags, page, NULL);
-}
-
static inline int check_user_page_hwpoison(unsigned long addr)
{
int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
@@ -1381,7 +1365,8 @@ static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
bool *writable, kvm_pfn_t *pfn)
{
- struct page *page[1];
+ unsigned int flags = FOLL_HWPOISON;
+ struct page *page;
int npages = 0;
might_sleep();
@@ -1389,35 +1374,26 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
if (writable)
*writable = write_fault;
- if (async) {
- down_read(&current->mm->mmap_sem);
- npages = get_user_page_nowait(addr, write_fault, page);
- up_read(&current->mm->mmap_sem);
- } else {
- unsigned int flags = FOLL_HWPOISON;
-
- if (write_fault)
- flags |= FOLL_WRITE;
+ if (write_fault)
+ flags |= FOLL_WRITE;
+ if (async)
+ flags |= FOLL_NOWAIT;
- npages = get_user_pages_unlocked(addr, 1, page, flags);
- }
+ npages = get_user_pages_unlocked(addr, 1, &page, flags);
if (npages != 1)
return npages;
/* map read fault as writable if possible */
if (unlikely(!write_fault) && writable) {
- struct page *wpage[1];
+ struct page *wpage;
- npages = __get_user_pages_fast(addr, 1, 1, wpage);
- if (npages == 1) {
+ if (__get_user_pages_fast(addr, 1, 1, &wpage) == 1) {
*writable = true;
- put_page(page[0]);
- page[0] = wpage[0];
+ put_page(page);
+ page = wpage;
}
-
- npages = 1;
}
- *pfn = page_to_pfn(page[0]);
+ *pfn = page_to_pfn(page);
return npages;
}
@@ -1434,7 +1410,8 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
- bool write_fault, kvm_pfn_t *p_pfn)
+ bool write_fault, bool *writable,
+ kvm_pfn_t *p_pfn)
{
unsigned long pfn;
int r;
@@ -1460,6 +1437,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
}
+ if (writable)
+ *writable = true;
/*
* Get a reference here because callers of *hva_to_pfn* and
@@ -1525,7 +1504,7 @@ retry:
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
- r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+ r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
if (r == -EAGAIN)
goto retry;
if (r < 0)
@@ -2418,7 +2397,10 @@ static struct file_operations kvm_vcpu_fops = {
*/
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
- return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
+ char name[8 + 1 + ITOA_MAX_LEN + 1];
+
+ snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id);
+ return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
}
static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
@@ -2550,19 +2532,16 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
-#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
- * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
- * so vcpu_load() would break it.
+ * Some architectures have vcpu ioctls that are asynchronous to vcpu
+ * execution; mutex_lock() would break them.
*/
- if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
- return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
-#endif
-
-
- r = vcpu_load(vcpu);
- if (r)
+ r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg);
+ if (r != -ENOIOCTLCMD)
return r;
+
+ if (mutex_lock_killable(&vcpu->mutex))
+ return -EINTR;
switch (ioctl) {
case KVM_RUN: {
struct pid *oldpid;
@@ -2734,7 +2713,7 @@ out_free1:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
- vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
kfree(fpu);
kfree(kvm_sregs);
return r;
@@ -3186,21 +3165,18 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return PTR_ERR(kvm);
#ifdef CONFIG_KVM_MMIO
r = kvm_coalesced_mmio_init(kvm);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
+ if (r < 0)
+ goto put_kvm;
#endif
r = get_unused_fd_flags(O_CLOEXEC);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
+ if (r < 0)
+ goto put_kvm;
+
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (IS_ERR(file)) {
put_unused_fd(r);
- kvm_put_kvm(kvm);
- return PTR_ERR(file);
+ r = PTR_ERR(file);
+ goto put_kvm;
}
/*
@@ -3218,6 +3194,10 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
fd_install(r, file);
return r;
+
+put_kvm:
+ kvm_put_kvm(kvm);
+ return r;
}
static long kvm_dev_ioctl(struct file *filp,
@@ -4037,8 +4017,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
/* A kmem cache lets us meet the alignment requirements of fx_save. */
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
- SLAB_ACCOUNT, NULL);
+ kvm_vcpu_cache =
+ kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
+ SLAB_ACCOUNT,
+ offsetof(struct kvm_vcpu, arch),
+ sizeof_field(struct kvm_vcpu, arch),
+ NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
goto out_free_3;
OpenPOWER on IntegriCloud