diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/kexec.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/crash.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 96 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 10 |
8 files changed, 131 insertions, 43 deletions
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 317ff1703d0b..28feeba2fdd6 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -163,6 +163,8 @@ struct kimage_arch { }; #endif +extern void (*crash_vmclear_loaded_vmcss)(void); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 36ec21c36d68..c2d56b34830d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -445,8 +445,7 @@ enum vmcs_field { #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) -#define VMX_EPT_AD_BIT (1ull << 21) -#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) +#define VMX_EPT_AD_BIT (1ull << 21) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 13ad89971d47..2f6b8e838d18 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -16,6 +16,7 @@ #include <linux/delay.h> #include <linux/elf.h> #include <linux/elfcore.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/hardirq.h> @@ -30,6 +31,27 @@ int in_crash_kexec; +/* + * This is used to VMCLEAR all VMCSs loaded on the + * processor. And when loading kvm_intel module, the + * callback function pointer will be assigned. + * + * protected by rcu. + */ +void (*crash_vmclear_loaded_vmcss)(void) = NULL; +EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); + +static inline void cpu_crash_vmclear_loaded_vmcss(void) +{ + void (*do_vmclear_operation)(void) = NULL; + + rcu_read_lock(); + do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); + if (do_vmclear_operation) + do_vmclear_operation(); + rcu_read_unlock(); +} + #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) @@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) #endif crash_save_cpu(regs, cpu); + /* + * VMCLEAR VMCSs loaded on all cpus if needed. + */ + cpu_crash_vmclear_loaded_vmcss(); + /* Disable VMX or SVM if needed. * * We need to disable virtualization on all CPUs. @@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs) kdump_nmi_shootdown_cpus(); + /* + * VMCLEAR VMCSs loaded on this cpu if needed. + */ + cpu_crash_vmclear_loaded_vmcss(); + /* Booting kdump kernel with VMX or SVM enabled won't work, * because (among other limitations) we can't disable paging * with the virt flags. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 52f6166ef92c..a20ecb5b6cbf 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -661,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) } else *eax = *ebx = *ecx = *edx = 0; } +EXPORT_SYMBOL_GPL(kvm_cpuid); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b875a9ed9b8e..01d7c2ad05f5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, || (!vcpu->arch.mmu.direct_map && write_fault && !is_write_protection(vcpu) && !user_fault)) { + /* + * There are two cases: + * - the one is other vcpu creates new sp in the window + * between mapping_level() and acquiring mmu-lock. + * - the another case is the new sp is created by itself + * (page-fault path) when guest uses the target gfn as + * its page table. + * Both of these cases can be fixed by allowing guest to + * retry the access, it will refault, then we can establish + * the mapping by using small page. + */ if (level > PT_PAGE_TABLE_LEVEL && - has_wrprotected_page(vcpu->kvm, gfn, level)) { - ret = 1; - drop_spte(vcpu->kvm, sptep); + has_wrprotected_page(vcpu->kvm, gfn, level)) goto done; - } spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index dcb79527e7aa..d29d3cd1c156 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -20,6 +20,7 @@ #include "mmu.h" #include "kvm_cache_regs.h" #include "x86.h" +#include "cpuid.h" #include <linux/module.h> #include <linux/mod_devicetable.h> @@ -1193,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm) static int svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + u32 dummy; + u32 eax = 1; init_vmcb(svm); @@ -1201,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; } - vcpu->arch.regs_avail = ~0; - vcpu->arch.regs_dirty = ~0; + + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_register_write(vcpu, VCPU_REGS_RDX, eax); return 0; } @@ -1259,10 +1263,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->asid_generation = 0; init_vmcb(svm); - err = fx_init(&svm->vcpu); - if (err) - goto free_page4; - svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; @@ -1271,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) return &svm->vcpu; -free_page4: - __free_page(hsave_page); free_page3: __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); free_page2: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2fd2046dc94c..1a30fd5c3fb2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,6 +42,7 @@ #include <asm/i387.h> #include <asm/xcr.h> #include <asm/perf_event.h> +#include <asm/kexec.h> #include "trace.h" @@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void) return vmx_capability.ept & VMX_EPT_AD_BIT; } -static inline bool cpu_has_vmx_invept_individual_addr(void) -{ - return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; -} - static inline bool cpu_has_vmx_invept_context(void) { return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; @@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs) vmcs, phys_addr); } +#ifdef CONFIG_KEXEC +/* + * This bitmap is used to indicate whether the vmclear + * operation is enabled on all cpus. All disabled by + * default. + */ +static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; + +static inline void crash_enable_local_vmclear(int cpu) +{ + cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static inline void crash_disable_local_vmclear(int cpu) +{ + cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static inline int crash_local_vmclear_enabled(int cpu) +{ + return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); +} + +static void crash_vmclear_local_loaded_vmcss(void) +{ + int cpu = raw_smp_processor_id(); + struct loaded_vmcs *v; + + if (!crash_local_vmclear_enabled(cpu)) + return; + + list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), + loaded_vmcss_on_cpu_link) + vmcs_clear(v->vmcs); +} +#else +static inline void crash_enable_local_vmclear(int cpu) { } +static inline void crash_disable_local_vmclear(int cpu) { } +#endif /* CONFIG_KEXEC */ + static void __loaded_vmcs_clear(void *arg) { struct loaded_vmcs *loaded_vmcs = arg; @@ -1001,6 +1037,7 @@ static void __loaded_vmcs_clear(void *arg) return; /* vcpu migration can race with cpu offline */ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) per_cpu(current_vmcs, cpu) = NULL; + crash_disable_local_vmclear(cpu); list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); /* @@ -1012,6 +1049,7 @@ static void __loaded_vmcs_clear(void *arg) smp_wmb(); loaded_vmcs_init(loaded_vmcs); + crash_enable_local_vmclear(cpu); } static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) @@ -1062,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp) } } -static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) -{ - if (enable_ept) { - if (cpu_has_vmx_invept_individual_addr()) - __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, - eptp, gpa); - else - ept_sync_context(eptp); - } -} - static __always_inline unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -1546,6 +1573,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); local_irq_disable(); + crash_disable_local_vmclear(cpu); /* * Read loaded_vmcs->cpu should be before fetching @@ -1556,6 +1584,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, &per_cpu(loaded_vmcss_on_cpu, cpu)); + crash_enable_local_vmclear(cpu); local_irq_enable(); /* @@ -2369,6 +2398,18 @@ static int hardware_enable(void *garbage) return -EBUSY; INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + + /* + * Now we can enable the vmclear operation in kdump + * since the loaded_vmcss_on_cpu list on this cpu + * has been initialized. + * + * Though the cpu is not in VMX operation now, there + * is no problem to enable the vmclear operation + * for the loaded_vmcss_on_cpu list is empty! + */ + crash_enable_local_vmclear(cpu); + rdmsrl(MSR_IA32_FEATURE_CONTROL, old); test_bits = FEATURE_CONTROL_LOCKED; @@ -3934,8 +3975,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) u64 msr; int ret; - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); - vmx->rmode.vm86_active = 0; vmx->soft_vnmi_blocked = 0; @@ -3947,10 +3986,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) msr |= MSR_IA32_APICBASE_BSP; kvm_set_apic_base(&vmx->vcpu, msr); - ret = fx_init(&vmx->vcpu); - if (ret != 0) - goto out; - vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); @@ -3991,7 +4026,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0xfff0); else kvm_rip_write(vcpu, 0); - kvm_register_write(vcpu, VCPU_REGS_RSP, 0); vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4041,7 +4075,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) /* HACK: Don't enable emulation on guest boot/reset */ vmx->emulation_required = 0; -out: return ret; } @@ -4863,11 +4896,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - if (exit_qualification & (1 << 6)) { - printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); - return -EINVAL; - } - gla_validity = (exit_qualification >> 7) & 0x3; if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); @@ -7412,6 +7440,11 @@ static int __init vmx_init(void) if (r) goto out3; +#ifdef CONFIG_KEXEC + rcu_assign_pointer(crash_vmclear_loaded_vmcss, + crash_vmclear_local_loaded_vmcss); +#endif + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); @@ -7449,6 +7482,11 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); +#ifdef CONFIG_KEXEC + rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); + synchronize_rcu(); +#endif + kvm_exit(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3bdaf298b8c7..57c76e86e9bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6461,6 +6461,10 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_pmu_reset(vcpu); + memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); + vcpu->arch.regs_avail = ~0; + vcpu->arch.regs_dirty = ~0; + return kvm_x86_ops->vcpu_reset(vcpu); } @@ -6629,11 +6633,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) goto fail_free_mce_banks; + r = fx_init(vcpu); + if (r) + goto fail_free_wbinvd_dirty_mask; + vcpu->arch.ia32_tsc_adjust_msr = 0x0; kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); return 0; +fail_free_wbinvd_dirty_mask: + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); fail_free_mce_banks: kfree(vcpu->arch.mce_banks); fail_free_lapic: |