diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 316 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 168 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 4 |
3 files changed, 455 insertions, 33 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6fe469eabce8..36b6d98f1197 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -39,6 +39,7 @@ #include <asm/mmu_context.h> #include <asm/lppaca.h> #include <asm/processor.h> +#include <asm/cputhreads.h> #include <linux/gfp.h> #include <linux/sched.h> #include <linux/vmalloc.h> @@ -51,12 +52,16 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { } +static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu); +static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu); + void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) { u64 now; @@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL); } + kvmppc_vcpu_blocked(vcpu); + kvm_vcpu_block(vcpu); vcpu->stat.halt_wakeup++; if (vcpu->arch.dec_expires != ~(u64)0) hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + + kvmppc_vcpu_unblocked(vcpu); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) @@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void) struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err = -ENOMEM; + int err = -EINVAL; + int core; + struct kvmppc_vcore *vcore; unsigned long lpcr; + core = id / threads_per_core; + if (core >= KVM_MAX_VCORES) + goto out; + + err = -ENOMEM; vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); if (!vcpu) goto out; @@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) kvmppc_mmu_book3s_hv_init(vcpu); + /* + * Some vcpus may start out in stopped state. If we initialize + * them to busy-in-host state they will stop other vcpus in the + * vcore from running. Instead we initialize them to blocked + * state, effectively considering them to be stopped until we + * see the first run ioctl for them. + */ + vcpu->arch.state = KVMPPC_VCPU_BLOCKED; + + init_waitqueue_head(&vcpu->arch.cpu_run); + + mutex_lock(&kvm->lock); + vcore = kvm->arch.vcores[core]; + if (!vcore) { + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + if (vcore) { + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + } + kvm->arch.vcores[core] = vcore; + } + mutex_unlock(&kvm->lock); + + if (!vcore) + goto free_vcpu; + + spin_lock(&vcore->lock); + ++vcore->num_threads; + ++vcore->n_blocked; + spin_unlock(&vcore->lock); + vcpu->arch.vcore = vcore; + return vcpu; free_vcpu: @@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kfree(vcpu); } +static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + spin_lock(&vc->lock); + vcpu->arch.state = KVMPPC_VCPU_BLOCKED; + ++vc->n_blocked; + if (vc->n_runnable > 0 && + vc->n_runnable + vc->n_blocked == vc->num_threads) { + vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, + arch.run_list); + wake_up(&vcpu->arch.cpu_run); + } + spin_unlock(&vc->lock); +} + +static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + spin_lock(&vc->lock); + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + --vc->n_blocked; + spin_unlock(&vc->lock); +} + extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void xics_wake_cpu(int cpu); -static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) +static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, + struct kvm_vcpu *vcpu) { - u64 now; + struct kvm_vcpu *v; - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) + return; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + --vc->n_runnable; + /* decrement the physical thread id of each following vcpu */ + v = vcpu; + list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) + --v->arch.ptid; + list_del(&vcpu->arch.run_list); +} + +static void kvmppc_start_thread(struct kvm_vcpu *vcpu) +{ + int cpu; + struct paca_struct *tpaca; + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + cpu = vc->pcpu + vcpu->arch.ptid; + tpaca = &paca[cpu]; + tpaca->kvm_hstate.kvm_vcpu = vcpu; + tpaca->kvm_hstate.kvm_vcore = vc; + smp_wmb(); +#ifdef CONFIG_PPC_ICP_NATIVE + if (vcpu->arch.ptid) { + tpaca->cpu_start = 0x80; + tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST; + wmb(); + xics_wake_cpu(cpu); + ++vc->n_woken; } +#endif +} - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_vsx_to_thread(current); - preempt_disable(); +static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) +{ + int i; + + HMT_low(); + i = 0; + while (vc->nap_count < vc->n_woken) { + if (++i >= 1000000) { + pr_err("kvmppc_wait_for_nap timeout %d %d\n", + vc->nap_count, vc->n_woken); + break; + } + cpu_relax(); + } + HMT_medium(); +} + +/* + * Check that we are on thread 0 and that any other threads in + * this core are off-line. + */ +static int on_primary_thread(void) +{ + int cpu = smp_processor_id(); + int thr = cpu_thread_in_core(cpu); + + if (thr) + return 0; + while (++thr < threads_per_core) + if (cpu_online(cpu + thr)) + return 0; + return 1; +} + +/* + * Run a set of guest threads on a physical core. + * Called with vc->lock held. + */ +static int kvmppc_run_core(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu, *vnext; + long ret; + u64 now; + + /* don't start if any threads have a signal pending */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + if (signal_pending(vcpu->arch.run_task)) + return 0; /* * Make sure we are running on thread 0, and that @@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) * XXX we should also block attempts to bring any * secondary threads online. */ - if (threads_per_core > 1) { - int cpu = smp_processor_id(); - int thr = cpu_thread_in_core(cpu); - - if (thr) - goto out; - while (++thr < threads_per_core) - if (cpu_online(cpu + thr)) - goto out; + if (threads_per_core > 1 && !on_primary_thread()) { + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + vcpu->arch.ret = -EBUSY; + goto out; } - kvm_guest_enter(); + vc->n_woken = 0; + vc->nap_count = 0; + vc->entry_exit_count = 0; + vc->vcore_running = 1; + vc->in_guest = 0; + vc->pcpu = smp_processor_id(); + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + kvmppc_start_thread(vcpu); + vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, + arch.run_list); + + spin_unlock(&vc->lock); + preempt_disable(); + kvm_guest_enter(); __kvmppc_vcore_entry(NULL, vcpu); + /* wait for secondary threads to finish writing their state to memory */ + spin_lock(&vc->lock); + if (vc->nap_count < vc->n_woken) + kvmppc_wait_for_nap(vc); + /* prevent other vcpu threads from doing kvmppc_start_thread() now */ + vc->vcore_running = 2; + spin_unlock(&vc->lock); + + /* make sure updates to secondary vcpu structs are visible now */ + smp_mb(); kvm_guest_exit(); preempt_enable(); kvm_resched(vcpu); now = get_tb(); - /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) - kvmppc_core_dequeue_dec(vcpu); - - return kvmppc_handle_exit(run, vcpu, current); + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + /* cancel pending dec exception if dec is positive */ + if (now < vcpu->arch.dec_expires && + kvmppc_core_pending_dec(vcpu)) + kvmppc_core_dequeue_dec(vcpu); + if (!vcpu->arch.trap) { + if (signal_pending(vcpu->arch.run_task)) { + vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + } + continue; /* didn't get to run */ + } + ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); + vcpu->arch.ret = ret; + vcpu->arch.trap = 0; + } + spin_lock(&vc->lock); out: - preempt_enable(); - return -EBUSY; + vc->vcore_running = 0; + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { + if (vcpu->arch.ret != RESUME_GUEST) { + kvmppc_remove_runnable(vc, vcpu); + wake_up(&vcpu->arch.cpu_run); + } + } + + return 1; +} + +static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ptid; + int wait_state; + struct kvmppc_vcore *vc; + DEFINE_WAIT(wait); + + /* No need to go into the guest when all we do is going out */ + if (signal_pending(current)) { + kvm_run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + + kvm_run->exit_reason = 0; + vcpu->arch.ret = RESUME_GUEST; + vcpu->arch.trap = 0; + + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_vsx_to_thread(current); + + /* + * Synchronize with other threads in this virtual core + */ + vc = vcpu->arch.vcore; + spin_lock(&vc->lock); + /* This happens the first time this is called for a vcpu */ + if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED) + --vc->n_blocked; + vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + ptid = vc->n_runnable; + vcpu->arch.run_task = current; + vcpu->arch.kvm_run = kvm_run; + vcpu->arch.ptid = ptid; + list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + ++vc->n_runnable; + + wait_state = TASK_INTERRUPTIBLE; + while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { + if (signal_pending(current)) { + if (!vc->vcore_running) { + kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + break; + } + /* have to wait for vcore to stop executing guest */ + wait_state = TASK_UNINTERRUPTIBLE; + smp_send_reschedule(vc->pcpu); + } + + if (!vc->vcore_running && + vc->n_runnable + vc->n_blocked == vc->num_threads) { + /* we can run now */ + if (kvmppc_run_core(vc)) + continue; + } + + if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0) + kvmppc_start_thread(vcpu); + + /* wait for other threads to come in, or wait for vcore */ + prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); + spin_unlock(&vc->lock); + schedule(); + finish_wait(&vcpu->arch.cpu_run, &wait); + spin_lock(&vc->lock); + } + + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) + kvmppc_remove_runnable(vc, vcpu); + spin_unlock(&vc->lock); + + return vcpu->arch.ret; } int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e6adaadcdff2..c9bf177b7cf2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -30,8 +30,6 @@ * * ****************************************************************************/ -#define SHADOW_VCPU_OFF PACA_KVM_SVCPU - .globl kvmppc_skip_interrupt kvmppc_skip_interrupt: mfspr r13,SPRN_SRR0 @@ -79,6 +77,32 @@ _GLOBAL(kvmppc_hv_entry_trampoline) * * *****************************************************************************/ +#define XICS_XIRR 4 +#define XICS_QIRR 0xc + +/* + * We come in here when wakened from nap mode on a secondary hw thread. + * Relocation is off and most register values are lost. + * r13 points to the PACA. + */ + .globl kvm_start_guest +kvm_start_guest: + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + + /* We got here with an IPI; clear it */ + ld r5, HSTATE_XICS_PHYS(r13) + li r0, 0xff + li r6, XICS_QIRR + li r7, XICS_XIRR + lwzcix r8, r5, r7 /* ack the interrupt */ + sync + stbcix r0, r5, r6 /* clear it */ + stwcix r8, r5, r7 /* EOI it */ + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -200,7 +224,20 @@ kvmppc_hv_entry: slbia ptesync - /* Switch to guest partition. */ + /* Increment entry count iff exit count is zero. */ + ld r5,HSTATE_KVM_VCORE(r13) + addi r9,r5,VCORE_ENTRY_EXIT +21: lwarx r3,0,r9 + cmpwi r3,0x100 /* any threads starting to exit? */ + bge secondary_too_late /* if so we're too late to the party */ + addi r3,r3,1 + stwcx. r3,0,r9 + bne 21b + + /* Primary thread switches to guest partition. */ + lwz r6,VCPU_PTID(r4) + cmpwi r6,0 + bne 20f ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) @@ -210,7 +247,15 @@ kvmppc_hv_entry: mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync - ld r8,VCPU_LPCR(r4) + li r0,1 + stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ + b 10f + + /* Secondary threads wait for primary to have done partition switch */ +20: lbz r0,VCORE_IN_GUEST(r5) + cmpwi r0,0 + beq 20b +10: ld r8,VCPU_LPCR(r4) mtspr SPRN_LPCR,r8 isync @@ -225,10 +270,12 @@ kvmppc_hv_entry: * Invalidate the TLB if we could possibly have stale TLB * entries for this partition on this core due to the use * of tlbiel. + * XXX maybe only need this on primary thread? */ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ lwz r5,VCPU_VCPUID(r4) lhz r6,PACAPACAINDEX(r13) + rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */ lhz r8,VCPU_LAST_CPU(r4) sldi r7,r6,1 /* see if this is the same vcpu */ add r7,r7,r9 /* as last ran on this pcpu */ @@ -512,8 +559,60 @@ hcall_real_cont: ptesync hdec_soon: - /* Switch back to host partition */ + /* Increment the threads-exiting-guest count in the 0xff00 + bits of vcore->entry_exit_count */ + lwsync + ld r5,HSTATE_KVM_VCORE(r13) + addi r6,r5,VCORE_ENTRY_EXIT +41: lwarx r3,0,r6 + addi r0,r3,0x100 + stwcx. r0,0,r6 + bne 41b + + /* + * At this point we have an interrupt that we have to pass + * up to the kernel or qemu; we can't handle it in real mode. + * Thus we have to do a partition switch, so we have to + * collect the other threads, if we are the first thread + * to take an interrupt. To do this, we set the HDEC to 0, + * which causes an HDEC interrupt in all threads within 2ns + * because the HDEC register is shared between all 4 threads. + * However, we don't need to bother if this is an HDEC + * interrupt, since the other threads will already be on their + * way here in that case. + */ + cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER + beq 40f + cmpwi r3,0x100 /* Are we the first here? */ + bge 40f + cmpwi r3,1 + ble 40f + li r0,0 + mtspr SPRN_HDEC,r0 +40: + + /* Secondary threads wait for primary to do partition switch */ ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + ld r5,HSTATE_KVM_VCORE(r13) + lwz r3,VCPU_PTID(r9) + cmpwi r3,0 + beq 15f + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + b 16f + + /* Primary thread waits for all the secondaries to exit guest */ +15: lwz r3,VCORE_ENTRY_EXIT(r5) + srwi r0,r3,8 + clrldi r3,r3,56 + cmpw r3,r0 + bne 15b + isync + + /* Primary thread switches back to host partition */ ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) li r8,LPID_RSVD /* switch to reserved LPID */ @@ -522,10 +621,12 @@ hdec_soon: mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync + li r0,0 + stb r0,VCORE_IN_GUEST(r5) lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 - ld r8,KVM_HOST_LPCR(r4) +16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync @@ -634,6 +735,11 @@ hdec_soon: mr r3, r9 bl .kvmppc_save_fp + /* Secondary threads go off to take a nap */ + lwz r0,VCPU_PTID(r3) + cmpwi r0,0 + bne secondary_nap + /* * Reload DEC. HDEC interrupts were disabled when * we reloaded the host's LPCR value. @@ -840,6 +946,56 @@ _GLOBAL(kvmppc_h_set_dabr) li r3,0 blr +secondary_too_late: + ld r5,HSTATE_KVM_VCORE(r13) + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + ld r11,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r11) + ld r6,SLBSHADOW_SAVEAREA+8(r11) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r11,r11,16 + .endr + b 50f + +secondary_nap: + /* Clear any pending IPI */ +50: ld r5, HSTATE_XICS_PHYS(r13) + li r0, 0xff + li r6, XICS_QIRR + stbcix r0, r5, r6 + + /* increment the nap count and then go to nap mode */ + ld r4, HSTATE_KVM_VCORE(r13) + addi r4, r4, VCORE_NAP_COUNT + lwsync /* make previous updates visible */ +51: lwarx r3, 0, r4 + addi r3, r3, 1 + stwcx. r3, 0, r4 + bne 51b + isync + + mfspr r4, SPRN_LPCR + li r0, LPCR_PECE + andc r4, r4, r0 + ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */ + mtspr SPRN_LPCR, r4 + li r0, 0 + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + /* * Save away FP, VMX and VSX registers. * r3 = vcpu pointer diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c78ceb9d5605..4c549664c987 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,6 +30,7 @@ #include <asm/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/tlbflush.h> +#include <asm/cputhreads.h> #include "timing.h" #include "../mm/mmu_decl.h" @@ -207,6 +208,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SPAPR_TCE: r = 1; break; + case KVM_CAP_PPC_SMT: + r = threads_per_core; + break; #endif default: r = 0; |