From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Thu, 15 Oct 2009 15:21:43 -0700 Subject: KVM: Xen PV-on-HVM guest support Support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch with a hacked-up version of Gerd's userspace code, booting a number of guests (CentOS 5.3 i386 and x86_64, and FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices. [jan: fix i386 build warning] [avi: future proof abi with a flags field] Signed-off-by: Ed Swierk Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f8f8900fc5ec..b694c1d2f918 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -436,6 +436,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -488,6 +491,18 @@ struct kvm_x86_mce { }; #endif +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; +#endif + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) struct kvm_irqfd { @@ -546,6 +561,7 @@ struct kvm_irqfd { #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) /* * ioctls for vcpu fds -- cgit v1.2.3 From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 16 Oct 2009 15:28:36 -0400 Subject: KVM: allow userspace to adjust kvmclock offset When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [marcelo: future-proof abi with a flags field] [jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it] Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 36 +++++++++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++++++++++++++++++++- include/linux/kvm.h | 10 ++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) (limited to 'include/linux/kvm.h') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 3e8684e48506..36594ba57723 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -617,6 +617,42 @@ struct kvm_xen_hvm_config { __u8 pad2[30]; }; +4.27 KVM_GET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (out) +Returns: 0 on success, -1 on error + +Gets the current timestamp of kvmclock as seen by the current guest. In +conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.28 KVM_SET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (in) +Returns: 0 on success, -1 on error + +Sets the current timestamp of kvmclock to the valued specific in its parameter. +In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4d994ad5051a..0558ff8c32ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -413,6 +413,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; struct kvm_xen_hvm_config xen_hvm_config; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 13f30aac460b..e16cdc9ec0c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -680,7 +680,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu->hv_clock.system_time = ts.tv_nsec + - (NSEC_PER_SEC * (u64)ts.tv_sec); + (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -1262,6 +1263,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: + case KVM_CAP_ADJUST_CLOCK: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2468,6 +2470,44 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; + if (copy_from_user(&user_ns, argp, sizeof(user_ns))) + goto out; + + r = -EINVAL; + if (user_ns.flags) + goto out; + + r = 0; + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + delta = user_ns.clock - now_ns; + kvm->arch.kvmclock_offset = delta; + break; + } + case KVM_GET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + user_ns.clock = kvm->arch.kvmclock_offset + now_ns; + user_ns.flags = 0; + + r = -EFAULT; + if (copy_to_user(argp, &user_ns, sizeof(user_ns))) + goto out; + r = 0; + break; + } + default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b694c1d2f918..6ed1a12ed526 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -439,6 +439,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_XEN_HVM #define KVM_CAP_XEN_HVM 38 #endif +#define KVM_CAP_ADJUST_CLOCK 39 #ifdef KVM_CAP_IRQ_ROUTING @@ -512,6 +513,12 @@ struct kvm_irqfd { __u8 pad[20]; }; +struct kvm_clock_data { + __u64 clock; + __u32 flags; + __u32 pad[9]; +}; + /* * ioctls for VM fds */ @@ -562,6 +569,9 @@ struct kvm_irqfd { #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) + /* * ioctls for vcpu fds -- cgit v1.2.3 From c54d2aba27f0c505d61700d656c5943e96982e60 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 2 Nov 2009 17:20:28 +0100 Subject: KVM: Reorder IOCTLs in main kvm.h Obviously, people tend to extend this header at the bottom - more or less blindly. Ensure that deprecated stuff gets its own corner again by moving things to the top. Also add some comments and reindent IOCTLs to make them more readable and reduce the risk of number collisions. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 235 ++++++++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 118 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6ed1a12ed526..ca62b8e056f9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -14,12 +14,76 @@ #define KVM_API_VERSION 12 -/* for KVM_TRACE_ENABLE, deprecated */ +/* *** Deprecated interfaces *** */ + +#define KVM_TRC_SHIFT 16 + +#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) +#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) + +#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) +#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) +#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) + +#define KVM_TRC_HEAD_SIZE 12 +#define KVM_TRC_CYCLE_SIZE 8 +#define KVM_TRC_EXTRA_MAX 7 + +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) + struct kvm_user_trace_setup { - __u32 buf_size; /* sub_buffer size of each per-cpu */ - __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ + __u32 buf_size; + __u32 buf_nr; +}; + +#define __KVM_DEPRECATED_MAIN_W_0x06 \ + _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) +#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) +#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) + +#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) + +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; }; +#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) + +/* *** End of deprecated interfaces *** */ + + /* for KVM_CREATE_MEMORY_REGION */ struct kvm_memory_region { __u32 slot; @@ -329,24 +393,6 @@ struct kvm_ioeventfd { __u8 pad[36]; }; -#define KVM_TRC_SHIFT 16 -/* - * kvm trace categories - */ -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */ - -/* - * kvm trace action - */ -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - #define KVMIO 0xAE /* @@ -367,12 +413,10 @@ struct kvm_ioeventfd { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -/* - * ioctls for kvm trace - */ -#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07) -#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08) +#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 +#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 +#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 + /* * Extension capability list. */ @@ -522,56 +566,57 @@ struct kvm_clock_data { /* * ioctls for VM fds */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ -#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) -#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) -#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) -#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ struct kvm_userspace_memory_region) -#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) -#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) /* Device model IOC */ -#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) -#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) -#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) -#define KVM_CREATE_PIT _IO(KVMIO, 0x64) -#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) -#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) -#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) +#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) +#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) +#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_CREATE_PIT _IO(KVMIO, 0x64) +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) #define KVM_REGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) -#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ - struct kvm_assigned_irq) -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) -#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR \ - _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY \ - _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) -#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) -#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) -#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) -#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) -#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) -#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) -#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) - +#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) +#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) +#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ + struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ + struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) +#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_PIT_STATE2 */ +#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) +#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* * ioctls for vcpu fds @@ -584,7 +629,7 @@ struct kvm_clock_data { #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -596,7 +641,7 @@ struct kvm_clock_data { #define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) #define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) /* Available with KVM_CAP_VAPIC */ -#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) +#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) /* Available with KVM_CAP_VAPIC */ #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) /* valid for virtual machine (for floating interrupt)_and_ vcpu */ @@ -608,67 +653,21 @@ struct kvm_clock_data { /* initial ipl psw for s390 */ #define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) /* initial reset for s390 */ -#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) +#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) /* Available with KVM_CAP_NMI */ -#define KVM_NMI _IO(KVMIO, 0x9a) +#define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) /* MCE for x86 */ #define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) #define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) - -/* - * Deprecated interfaces - */ -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) - +/* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) -#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) -#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) struct kvm_assigned_pci_dev { @@ -722,4 +721,4 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; -#endif +#endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From a9c7399d6cda0a092b347f8ee49bbe44f6e1fe66 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:54:59 +0200 Subject: KVM: Allow internal errors reported to userspace to carry extra data Usually userspace will freeze the guest so we can inspect it, but some internal state is not available. Add extra data to internal error reporting so we can expose it to the debugger. Extra data is specific to the suberror. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/vmx.c | 1 + include/linux/kvm.h | 4 ++++ virt/kvm/kvm_main.c | 1 + 4 files changed, 7 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a9024797b21f..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) case EMULATE_FAIL: vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; return 0; default: BUG(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c9cc9596e1a6..c0e66dd58a47 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3352,6 +3352,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) kvm_report_emulation_failure(vcpu, "emulation failure"); vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; ret = 0; goto out; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ca62b8e056f9..172639e94392 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -251,6 +251,9 @@ struct kvm_run { } dcr; struct { __u32 suberror; + /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ + __u32 ndata; + __u64 data[16]; } internal; /* Fix the size of the union. */ char padding[256]; @@ -484,6 +487,7 @@ struct kvm_ioeventfd { #define KVM_CAP_XEN_HVM 38 #endif #define KVM_CAP_ADJUST_CLOCK 39 +#define KVM_CAP_INTERNAL_ERROR_DATA 40 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bd44fb48ac43..f92ba138007a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1653,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_KVM_APIC_ARCHITECTURE case KVM_CAP_SET_BOOT_CPU_ID: #endif + case KVM_CAP_INTERNAL_ERROR_DATA: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.2.3 From 65ac7264043740572ba804edca03c374d70427c9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:59:01 +0200 Subject: KVM: VMX: Report unexpected simultaneous exceptions as internal errors These happen when we trap an exception when another exception is being delivered; we only expect these with MCEs and page faults. If something unexpected happens, things probably went south and we're better off reporting an internal error and freezing. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 11 ++++++++--- include/linux/kvm.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c0e66dd58a47..22fcd27a0b58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2744,9 +2744,14 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_machine_check(vcpu); if ((vect_info & VECTORING_INFO_VALID_MASK) && - !is_page_fault(intr_info)) - printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __func__, vect_info, intr_info); + !is_page_fault(intr_info)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = vect_info; + vcpu->run->internal.data[1] = intr_info; + return 0; + } if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 172639e94392..976f4d181858 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -163,6 +163,7 @@ struct kvm_pit_config { /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 +#define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { -- cgit v1.2.3 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 49 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm.h | 28 +++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 22 ++++++++++++ arch/x86/kvm/vmx.c | 30 ++++++++++++++++ arch/x86/kvm/x86.c | 77 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 6 ++++ 7 files changed, 214 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 36594ba57723..e1a114161027 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -653,6 +653,55 @@ struct kvm_clock_data { __u32 pad[9]; }; +4.29 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; /* must be zero */ +}; + +4.30 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ef9b4b73cce4..950df434763f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -20,6 +20,7 @@ #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -252,4 +253,31 @@ struct kvm_reinject_control { __u8 pit_reinject; __u8 reserved[31]; }; + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7bb6bc..06e085614dad 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f9e498..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22fcd27a0b58..778f059ae423 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2639,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -3985,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8958dca3c4..35eea30821d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1342,6 +1342,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: case KVM_CAP_ADJUST_CLOCK: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1883,6 +1884,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_load(vcpu); + + events->exception.injected = vcpu->arch.exception.pending; + events->exception.nr = vcpu->arch.exception.nr; + events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.error_code = vcpu->arch.exception.error_code; + + events->interrupt.injected = vcpu->arch.interrupt.pending; + events->interrupt.nr = vcpu->arch.interrupt.nr; + events->interrupt.soft = vcpu->arch.interrupt.soft; + + events->nmi.injected = vcpu->arch.nmi_injected; + events->nmi.pending = vcpu->arch.nmi_pending; + events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + + events->sipi_vector = vcpu->arch.sipi_vector; + + events->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + if (events->flags) + return -EINVAL; + + vcpu_load(vcpu); + + vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.nr = events->exception.nr; + vcpu->arch.exception.has_error_code = events->exception.has_error_code; + vcpu->arch.exception.error_code = events->exception.error_code; + + vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.nr = events->interrupt.nr; + vcpu->arch.interrupt.soft = events->interrupt.soft; + if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); + + vcpu->arch.nmi_injected = events->nmi.injected; + vcpu->arch.nmi_pending = events->nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); + + vcpu->arch.sipi_vector = events->sipi_vector; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2040,6 +2096,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); break; } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); + + r = -EFAULT; + if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) + break; + r = 0; + break; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + r = -EFAULT; + if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) + break; + + r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 976f4d181858..92045a92d714 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_ADJUST_CLOCK 39 #define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -672,6 +675,9 @@ struct kvm_clock_data { /* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From d7b0b5eb3000c6fb902f08c619fcd673a23d8fab Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Thu, 19 Nov 2009 14:21:16 +0100 Subject: KVM: s390: Make psw available on all exits, not just a subset This patch moves s390 processor status word into the base kvm_run struct and keeps it up-to date on all userspace exits. The userspace ABI is broken by this, however there are no applications in the wild using this. A capability check is provided so users can verify the updated API exists. Cc: stable@kernel.org Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/include/asm/kvm.h | 3 ++- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++++-------- include/linux/kvm.h | 8 ++++++-- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 3dfcaeb5d7f4..82b32a100c7d 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -1,6 +1,5 @@ #ifndef __LINUX_KVM_S390_H #define __LINUX_KVM_S390_H - /* * asm-s390/kvm.h - KVM s390 specific structures and definitions * @@ -15,6 +14,8 @@ */ #include +#define __KVM_S390 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 544505893c9f..f8bcaefd7d34 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -117,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { + int r; + switch (ext) { + case KVM_CAP_S390_PSW: + r = 1; + break; default: - return 0; + r = 0; } + return r; } /* Section: vm related */ @@ -420,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) vcpu_load(vcpu); if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) rc = -EBUSY; - else - vcpu->arch.sie_block->gpsw = psw; + else { + vcpu->run->psw_mask = psw.mask; + vcpu->run->psw_addr = psw.addr; + } vcpu_put(vcpu); return rc; } @@ -509,9 +517,6 @@ rerun_vcpu: switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: - vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; - vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; - break; case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: @@ -520,6 +525,9 @@ rerun_vcpu: BUG(); } + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + might_fault(); do { @@ -539,8 +547,6 @@ rerun_vcpu: /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; - kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; rc = 0; @@ -552,6 +558,9 @@ rerun_vcpu: rc = 0; } + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 92045a92d714..2d241da07236 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -181,6 +181,11 @@ struct kvm_run { __u64 cr8; __u64 apic_base; +#ifdef __KVM_S390 + /* the processor status word for s390 */ + __u64 psw_mask; /* psw upper half */ + __u64 psw_addr; /* psw lower half */ +#endif union { /* KVM_EXIT_UNKNOWN */ struct { @@ -232,8 +237,6 @@ struct kvm_run { /* KVM_EXIT_S390_SIEIC */ struct { __u8 icptcode; - __u64 mask; /* psw upper half */ - __u64 addr; /* psw lower half */ __u16 ipa; __u32 ipb; } s390_sieic; @@ -492,6 +495,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_VCPU_EVENTS #define KVM_CAP_VCPU_EVENTS 41 #endif +#define KVM_CAP_S390_PSW 42 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3