summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-07-31 10:23:35 +0200
committerIngo Molnar <mingo@kernel.org>2015-07-31 10:23:35 +0200
commit5b929bd11df23922daf1be5d52731cc3900c1d79 (patch)
tree105fa5987b03c9f4e0260a9eb04dff7bb3d16839 /arch/x86/kvm
parentb2c51106c7581866c37ffc77c5d739f3d4b7cbc9 (diff)
parent37868fe113ff2ba814b3b4eb12df214df555f8dc (diff)
downloadblackbird-obmc-linux-5b929bd11df23922daf1be5d52731cc3900c1d79.tar.gz
blackbird-obmc-linux-5b929bd11df23922daf1be5d52731cc3900c1d79.zip
Merge branch 'x86/urgent' into x86/asm, before applying dependent patches
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/iommu.c2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c10
-rw-r--r--arch/x86/kvm/mtrr.c40
-rw-r--r--arch/x86/kvm/svm.c110
-rw-r--r--arch/x86/kvm/vmx.c16
-rw-r--r--arch/x86/kvm/x86.c26
-rw-r--r--arch/x86/kvm/x86.h5
9 files changed, 179 insertions, 34 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 64dd46793099..2fbea2544f24 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
+ if (vcpu->arch.eager_fpu)
+ kvm_x86_ops->fpu_activate(vcpu);
/*
* The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 7dbced309ddb..5c520ebf6343 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
goto out_unmap;
}
+ kvm_arch_start_assignment(kvm);
pci_set_dev_assigned(pdev);
dev_info(&pdev->dev, "kvm assign device\n");
@@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
iommu_detach_device(domain, &pdev->dev);
pci_clear_dev_assigned(pdev);
+ kvm_arch_end_assignment(kvm);
dev_info(&pdev->dev, "kvm deassign device\n");
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2f0ade48614f..946f52c43b8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1595,7 +1595,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
apic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f807496b62c2..44171462bd2a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2479,6 +2479,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
return 0;
}
+static bool kvm_is_mmio_pfn(pfn_t pfn)
+{
+ if (pfn_valid(pfn))
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+
+ return true;
+}
+
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
@@ -2506,7 +2514,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_PAGE_SIZE_MASK;
if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
- kvm_is_reserved_pfn(pfn));
+ kvm_is_mmio_pfn(pfn));
if (host_writable)
spte |= SPTE_HOST_WRITEABLE;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index de1d2d8062e2..dc0a84a6f309 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,6 +120,16 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
}
+static u8 mtrr_disabled_type(void)
+{
+ /*
+ * Intel SDM 11.11.2.2: all MTRRs are disabled when
+ * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
+ * memory type is applied to all of physical memory.
+ */
+ return MTRR_TYPE_UNCACHABLE;
+}
+
/*
* Three terms are used in the following code:
* - segment, it indicates the address segments covered by fixed MTRRs.
@@ -434,6 +444,8 @@ struct mtrr_iter {
/* output fields. */
int mem_type;
+ /* mtrr is completely disabled? */
+ bool mtrr_disabled;
/* [start, end) is not fully covered in MTRRs? */
bool partial_map;
@@ -549,7 +561,7 @@ static void mtrr_lookup_var_next(struct mtrr_iter *iter)
static void mtrr_lookup_start(struct mtrr_iter *iter)
{
if (!mtrr_is_enabled(iter->mtrr_state)) {
- iter->partial_map = true;
+ iter->mtrr_disabled = true;
return;
}
@@ -563,6 +575,7 @@ static void mtrr_lookup_init(struct mtrr_iter *iter,
iter->mtrr_state = mtrr_state;
iter->start = start;
iter->end = end;
+ iter->mtrr_disabled = false;
iter->partial_map = false;
iter->fixed = false;
iter->range = NULL;
@@ -656,15 +669,19 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
return MTRR_TYPE_WRBACK;
}
- /* It is not covered by MTRRs. */
- if (iter.partial_map) {
- /*
- * We just check one page, partially covered by MTRRs is
- * impossible.
- */
- WARN_ON(type != -1);
- type = mtrr_default_type(mtrr_state);
- }
+ if (iter.mtrr_disabled)
+ return mtrr_disabled_type();
+
+ /*
+ * We just check one page, partially covered by MTRRs is
+ * impossible.
+ */
+ WARN_ON(iter.partial_map);
+
+ /* not contained in any MTRRs. */
+ if (type == -1)
+ return mtrr_default_type(mtrr_state);
+
return type;
}
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
@@ -689,6 +706,9 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
return false;
}
+ if (iter.mtrr_disabled)
+ return true;
+
if (!iter.partial_map)
return true;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8dfbad7a2c44..a4b8c8d57b96 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+#define MTRR_TYPE_UC_MINUS 7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+ /*
+ * WT and WP aren't always available in the host PAT. Treat
+ * them as UC and UC- respectively. Everything else should be
+ * there.
+ */
+ switch (mtrr)
+ {
+ case MTRR_TYPE_WRTHROUGH:
+ return MTRR_TYPE_UNCACHABLE;
+ case MTRR_TYPE_WRPROT:
+ return MTRR_TYPE_UC_MINUS;
+ default:
+ BUG();
+ }
+}
+
+static void build_mtrr2protval(void)
+{
+ int i;
+ u64 pat;
+
+ for (i = 0; i < 8; i++)
+ mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+ /* Ignore the invalid MTRR types. */
+ mtrr2protval[2] = 0;
+ mtrr2protval[3] = 0;
+
+ /*
+ * Use host PAT value to figure out the mapping from guest MTRR
+ * values to nested page table PAT/PCD/PWT values. We do not
+ * want to change the host PAT value every time we enter the
+ * guest.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ for (i = 0; i < 8; i++) {
+ u8 mtrr = pat >> (8 * i);
+
+ if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+ mtrr2protval[mtrr] = __cm_idx2pte(i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+ u8 fallback = fallback_mtrr_type(i);
+ mtrr2protval[i] = mtrr2protval[fallback];
+ BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+ }
+ }
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
+ build_mtrr2protval();
return 0;
err:
@@ -1085,6 +1144,39 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ /* Unlike Intel, AMD takes the guest's CR0.CD into account.
+ *
+ * AMD does not have IPAT. To emulate it for the case of guests
+ * with no assigned devices, just set everything to WB. If guests
+ * have assigned devices, however, we cannot force WB for RAM
+ * pages only, so use the guest PAT directly.
+ */
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
+ *g_pat = 0x0606060606060606;
+ else
+ *g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+ u8 mtrr;
+
+ /*
+ * 1. MMIO: trust guest MTRR, so same as item 3.
+ * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+ * 3. Passthrough: can't guarantee the result, try to trust guest.
+ */
+ if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
+ return 0;
+
+ mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ return mtrr2protval[mtrr];
+}
+
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1272,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = svm->vcpu.arch.pat;
+ svm_set_guest_pat(svm, &save->g_pat);
save->cr3 = 0;
save->cr4 = 0;
}
@@ -1579,7 +1672,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* does not do it - this results in some delay at
* reboot
*/
- if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
@@ -3254,6 +3347,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_IA32_CR_PAT:
+ if (npt_enabled) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
+ }
+ /* fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -4088,11 +4191,6 @@ static bool svm_has_high_real_mode_segbase(void)
return true;
}
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
- return 0;
-}
-
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10d69a6df14f..af30c265412c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
u64 ipat = 0;
/* For VT-d and EPT combination
- * 1. MMIO: always map as UC
+ * 1. MMIO: guest may want to apply WC, trust it.
* 2. EPT with VT-d:
* a. VT-d without snooping control feature: can't guarantee the
- * result, try to trust guest.
+ * result, try to trust guest. So the same as item 1.
* b. VT-d with snooping control feature: snooping control feature of
* VT-d engine can guarantee the cache correctness. Just set it
* to WB to keep consistent with host. So the same as item 3.
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
* consistent with host MTRR
*/
- if (is_mmio) {
- cache = MTRR_TYPE_UNCACHABLE;
- goto exit;
- }
-
- if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+ if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
ipat = VMX_EPT_IPAT_BIT;
cache = MTRR_TYPE_WRBACK;
goto exit;
@@ -8655,7 +8650,10 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
ipat = VMX_EPT_IPAT_BIT;
- cache = MTRR_TYPE_UNCACHABLE;
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ cache = MTRR_TYPE_WRBACK;
+ else
+ cache = MTRR_TYPE_UNCACHABLE;
goto exit;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8d73ec8a2364..a51f94d3193d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3145,8 +3145,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
memcpy(dest, src + offset, size);
- } else
- WARN_ON_ONCE(1);
+ }
valid -= feature;
}
@@ -7303,11 +7302,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu = kvm_x86_ops->vcpu_create(kvm, id);
- /*
- * Activate fpu unconditionally in case the guest needs eager FPU. It will be
- * deactivated soon if it doesn't.
- */
- kvm_x86_ops->fpu_activate(vcpu);
return vcpu;
}
@@ -8206,6 +8200,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
kvm_x86_ops->interrupt_allowed(vcpu);
}
+void kvm_arch_start_assignment(struct kvm *kvm)
+{
+ atomic_inc(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
+
+void kvm_arch_end_assignment(struct kvm *kvm)
+{
+ atomic_dec(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+bool kvm_arch_has_assigned_device(struct kvm *kvm)
+{
+ return atomic_read(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
{
atomic_inc(&kvm->arch.noncoherent_dma_count);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index edc8cdcd786b..0ca2f3e4803c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,11 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
return kvm_register_write(vcpu, reg, val);
}
+static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
+{
+ return !(kvm->arch.disabled_quirks & quirk);
+}
+
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
OpenPOWER on IntegriCloud