From 5fdf5d37f40a3b18c0d613463867f71c017b75ef Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 19 Nov 2015 16:55:45 -0500 Subject: x86/xen: Avoid fast syscall path for Xen PV guests After 32-bit syscall rewrite, and specifically after commit: 5f310f739b4c ("x86/entry/32: Re-implement SYSENTER using the new C path") ... the stack frame that is passed to xen_sysexit is no longer a "standard" one (i.e. it's not pt_regs). Since we end up calling xen_iret from xen_sysexit we don't need to fix up the stack and instead follow entry_SYSENTER_32's IRET path directly to xen_iret. We can do the same thing for compat mode even though stack does not need to be fixed. This will allow us to drop usergs_sysret32 paravirt op (in the subsequent patch) Suggested-by: Andy Lutomirski Signed-off-by: Boris Ostrovsky Reviewed-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1447970147-1733-2-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/entry/entry_64_compat.S | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 3eb572ed3d7a..0870825a9568 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -308,8 +308,9 @@ sysenter_past_esp: movl %esp, %eax call do_fast_syscall_32 - testl %eax, %eax - jz .Lsyscall_32_done + /* XEN PV guests always use IRET path */ + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV /* Opportunistic SYSEXIT */ TRACE_IRQS_ON /* User mode traces as IRQs on. */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index c3201830a85e..402e34a21559 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -121,8 +121,9 @@ sysenter_flags_fixed: movq %rsp, %rdi call do_fast_syscall_32 - testl %eax, %eax - jz .Lsyscall_32_done + /* XEN PV guests always use IRET path */ + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV jmp sysret32_from_system_call sysenter_fix_flags: @@ -200,8 +201,9 @@ ENTRY(entry_SYSCALL_compat) movq %rsp, %rdi call do_fast_syscall_32 - testl %eax, %eax - jz .Lsyscall_32_done + /* XEN PV guests always use IRET path */ + ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ + "jmp .Lsyscall_32_done", X86_FEATURE_XENPV /* Opportunistic SYSRET */ sysret32_from_system_call: -- cgit v1.2.1 From 88c15ec90ff16880efab92b519436ee17b198477 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 19 Nov 2015 16:55:46 -0500 Subject: x86/paravirt: Remove the unused irq_enable_sysexit pv op As result of commit "x86/xen: Avoid fast syscall path for Xen PV guests", the irq_enable_sysexit pv op is not called by Xen PV guests anymore and since they were the only ones who used it we can safely remove it. Signed-off-by: Boris Ostrovsky Reviewed-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1447970147-1733-3-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0870825a9568..9870c972d345 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -329,7 +329,8 @@ sysenter_past_esp: * Return back to the vDSO, which will pop ecx and edx. * Don't bother with DS and ES (they already contain __USER_DS). */ - ENABLE_INTERRUPTS_SYSEXIT + sti + sysexit .pushsection .fixup, "ax" 2: movl $0, PT_FS(%esp) @@ -552,11 +553,6 @@ ENTRY(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) #endif ENTRY(overflow) -- cgit v1.2.1 From 75ef82190dceac3d84cdc209fdf82800a7cc6609 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 19 Nov 2015 16:55:47 -0500 Subject: x86/entry, x86/paravirt: Remove the unused usergs_sysret32 PV op As result of commit "x86/xen: Avoid fast syscall path for Xen PV guests", usergs_sysret32 pv op is not called by Xen PV guests anymore and since they were the only ones who used it we can safely remove it. Signed-off-by: Boris Ostrovsky Reviewed-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1447970147-1733-4-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 402e34a21559..bbcb285ac781 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -18,13 +18,6 @@ .section .entry.text, "ax" -#ifdef CONFIG_PARAVIRT -ENTRY(native_usergs_sysret32) - swapgs - sysretl -ENDPROC(native_usergs_sysret32) -#endif - /* * 32-bit SYSENTER instruction entry. * @@ -238,7 +231,8 @@ sysret32_from_system_call: xorq %r9, %r9 xorq %r10, %r10 movq RSP-ORIG_RAX(%rsp), %rsp - USERGS_SYSRET32 + swapgs + sysretl END(entry_SYSCALL_compat) /* -- cgit v1.2.1 From 478dc89cf316697e8029411a64ea2b30c528434d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 12 Nov 2015 12:59:04 -0800 Subject: x86/entry/64: Bypass enter_from_user_mode on non-context-tracking boots On CONFIG_CONTEXT_TRACKING kernels that have context tracking disabled at runtime (which includes most distro kernels), we still have the overhead of a call to enter_from_user_mode in interrupt and exception entries. If jump labels are available, this uses the jump label infrastructure to skip the call. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/73ee804fff48cd8c66b65b724f9f728a11a8c686.1447361906.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 15 +++++++++++++++ arch/x86/entry/entry_64.S | 8 ++------ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3c71dd947c7b..e32206e09868 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,3 +1,5 @@ +#include + /* x86 function call convention, 64-bit: @@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with #endif /* CONFIG_X86_64 */ +/* + * This does 'call enter_from_user_mode' unless we can avoid it based on + * kernel config or using the static jump infrastructure. + */ +.macro CALL_enter_from_user_mode +#ifdef CONFIG_CONTEXT_TRACKING +#ifdef HAVE_JUMP_LABEL + STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 +#endif + call enter_from_user_mode +.Lafter_call_\@: +#endif +.endm diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a55697d19824..9d34d3cfceb6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -520,9 +520,7 @@ END(irq_entries_start) */ TRACE_IRQS_OFF -#ifdef CONFIG_CONTEXT_TRACKING - call enter_from_user_mode -#endif + CALL_enter_from_user_mode 1: /* @@ -1066,9 +1064,7 @@ ENTRY(error_entry) * (which can take locks). */ TRACE_IRQS_OFF -#ifdef CONFIG_CONTEXT_TRACKING - call enter_from_user_mode -#endif + CALL_enter_from_user_mode ret .Lerror_entry_done: -- cgit v1.2.1 From 6b078f5de7fc0851af4102493c7b5bb07e49c4cb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 10 Dec 2015 19:20:19 -0800 Subject: x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader The pvclock vdso code was too abstracted to understand easily and excessively paranoid. Simplify it for a huge speedup. This opens the door for additional simplifications, as the vdso no longer accesses the pvti for any vcpu other than vcpu 0. Before, vclock_gettime using kvm-clock took about 45ns on my machine. With this change, it takes 29ns, which is almost as fast as the pure TSC implementation. Signed-off-by: Andy Lutomirski Reviewed-by: Paolo Bonzini Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/6b51dcc41f1b101f963945c5ec7093d72bdac429.1449702533.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 81 ++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 35 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index ca94fa649251..c325ba1bdddf 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -78,47 +78,58 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) static notrace cycle_t vread_pvclock(int *mode) { - const struct pvclock_vsyscall_time_info *pvti; + const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti; cycle_t ret; - u64 last; - u32 version; - u8 flags; - unsigned cpu, cpu1; - + u64 tsc, pvti_tsc; + u64 last, delta, pvti_system_time; + u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; /* - * Note: hypervisor must guarantee that: - * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. - * 2. that per-CPU pvclock time info is updated if the - * underlying CPU changes. - * 3. that version is increased whenever underlying CPU - * changes. + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. */ - do { - cpu = __getcpu() & VGETCPU_CPU_MASK; - /* TODO: We can put vcpu id into higher bits of pvti.version. - * This will save a couple of cycles by getting rid of - * __getcpu() calls (Gleb). - */ - - pvti = get_pvti(cpu); - - version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); - - /* - * Test we're still on the cpu as well as the version. - * We could have been migrated just after the first - * vgetcpu but before fetching the version, so we - * wouldn't notice a version change. - */ - cpu1 = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1 || - (pvti->pvti.version & 1) || - pvti->pvti.version != version)); - - if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { *mode = VCLOCK_NONE; + return 0; + } + + do { + version = pvti->version; + + /* This is also a read barrier, so we'll read version first. */ + tsc = rdtsc_ordered(); + + pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; + pvti_tsc_shift = pvti->tsc_shift; + pvti_system_time = pvti->system_time; + pvti_tsc = pvti->tsc_timestamp; + + /* Make sure that the version double-check is last. */ + smp_rmb(); + } while (unlikely((version & 1) || version != pvti->version)); + + delta = tsc - pvti_tsc; + ret = pvti_system_time + + pvclock_scale_delta(delta, pvti_tsc_to_system_mul, + pvti_tsc_shift); /* refer to tsc.c read_tsc() comment for rationale */ last = gtod->cycle_last; -- cgit v1.2.1 From dac16fba6fc590fa7239676b35ed75dae4c4cd2b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 10 Dec 2015 19:20:20 -0800 Subject: x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap Signed-off-by: Andy Lutomirski Reviewed-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/9d37826fdc7e2d2809efe31d5345f97186859284.1449702533.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 20 ++++++++------------ arch/x86/entry/vdso/vdso-layout.lds.S | 3 ++- arch/x86/entry/vdso/vdso2c.c | 3 +++ arch/x86/entry/vdso/vma.c | 13 +++++++++++++ 4 files changed, 26 insertions(+), 13 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index c325ba1bdddf..5dd363d54348 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -36,6 +36,11 @@ static notrace cycle_t vread_hpet(void) } #endif +#ifdef CONFIG_PARAVIRT_CLOCK +extern u8 pvclock_page + __attribute__((visibility("hidden"))); +#endif + #ifndef BUILD_VDSO32 #include @@ -62,23 +67,14 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) #ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) +static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) { - const struct pvclock_vsyscall_time_info *pvti_base; - int idx = cpu / (PAGE_SIZE/PVTI_SIZE); - int offset = cpu % (PAGE_SIZE/PVTI_SIZE); - - BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); - - pvti_base = (struct pvclock_vsyscall_time_info *) - __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); - - return &pvti_base[offset]; + return (const struct pvclock_vsyscall_time_info *)&pvclock_page; } static notrace cycle_t vread_pvclock(int *mode) { - const struct pvclock_vcpu_time_info *pvti = &get_pvti(0)->pvti; + const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; cycle_t ret; u64 tsc, pvti_tsc; u64 last, delta, pvti_system_time; diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index de2c921025f5..4158acc17df0 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 2 * PAGE_SIZE; + vvar_start = . - 3 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -36,6 +36,7 @@ SECTIONS #undef EMIT_VVAR hpet_page = vvar_start + PAGE_SIZE; + pvclock_page = vvar_start + 2 * PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 785d9922b106..491020b2826d 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -73,6 +73,7 @@ enum { sym_vvar_start, sym_vvar_page, sym_hpet_page, + sym_pvclock_page, sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_END, }; @@ -80,6 +81,7 @@ enum { const int special_pages[] = { sym_vvar_page, sym_hpet_page, + sym_pvclock_page, }; struct vdso_sym { @@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_start] = {"vvar_start", true}, [sym_vvar_page] = {"vvar_page", true}, [sym_hpet_page] = {"hpet_page", true}, + [sym_pvclock_page] = {"pvclock_page", true}, [sym_VDSO_FAKE_SECTION_TABLE_START] = { "VDSO_FAKE_SECTION_TABLE_START", false }, diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 64df47148160..aa828191c654 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -100,6 +100,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) .name = "[vvar]", .pages = no_pages, }; + struct pvclock_vsyscall_time_info *pvti; if (calculate_addr) { addr = vdso_addr(current->mm->start_stack, @@ -169,6 +170,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) } #endif + pvti = pvclock_pvti_cpu0_va(); + if (pvti && image->sym_pvclock_page) { + ret = remap_pfn_range(vma, + text_start + image->sym_pvclock_page, + __pa(pvti) >> PAGE_SHIFT, + PAGE_SIZE, + PAGE_READONLY); + + if (ret) + goto up_fail; + } + up_fail: if (ret) current->mm->context.vdso = NULL; -- cgit v1.2.1 From cc1e24fdb064d3126a494716f22ad4fc39306742 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 10 Dec 2015 19:20:21 -0800 Subject: x86/vdso: Remove pvclock fixmap machinery Signed-off-by: Andy Lutomirski Reviewed-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/4933029991103ae44672c82b97a20035f5c1fe4f.1449702533.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 1 - arch/x86/entry/vdso/vma.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 5dd363d54348..59a98c25bde7 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -45,7 +45,6 @@ extern u8 pvclock_page #include #include -#include #include notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index aa828191c654..b8f69e264ac4 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.1 From 76480a6a55a03d0fe5dd6290ccde7f78678ab85e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 10 Dec 2015 19:20:22 -0800 Subject: x86/vdso: Enable vdso pvclock access on all vdso variants Now that pvclock doesn't require access to the fixmap, all vdso variants can use it. The kernel side isn't wired up for 32-bit kernels yet, but this covers 32-bit and x32 userspace on 64-bit kernels. Signed-off-by: Andy Lutomirski Reviewed-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/a7ef693b7a4c88dd2173dc1d4bf6bc27023626eb.1449702533.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 91 ++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 51 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 59a98c25bde7..8602f06c759f 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -17,8 +17,10 @@ #include #include #include +#include #include #include +#include #define gtod (&VVAR(vsyscall_gtod_data)) @@ -43,10 +45,6 @@ extern u8 pvclock_page #ifndef BUILD_VDSO32 -#include -#include -#include - notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; @@ -64,8 +62,42 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) return ret; } -#ifdef CONFIG_PARAVIRT_CLOCK +#else + +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "memory", "edx"); + return ret; +} + +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "memory", "edx"); + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) { return (const struct pvclock_vsyscall_time_info *)&pvclock_page; @@ -109,9 +141,9 @@ static notrace cycle_t vread_pvclock(int *mode) do { version = pvti->version; - /* This is also a read barrier, so we'll read version first. */ - tsc = rdtsc_ordered(); + smp_rmb(); + tsc = rdtsc_ordered(); pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; pvti_tsc_shift = pvti->tsc_shift; pvti_system_time = pvti->system_time; @@ -126,7 +158,7 @@ static notrace cycle_t vread_pvclock(int *mode) pvclock_scale_delta(delta, pvti_tsc_to_system_mul, pvti_tsc_shift); - /* refer to tsc.c read_tsc() comment for rationale */ + /* refer to vread_tsc() comment for rationale */ last = gtod->cycle_last; if (likely(ret >= last)) @@ -136,49 +168,6 @@ static notrace cycle_t vread_pvclock(int *mode) } #endif -#else - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) -{ - long ret; - - asm( - "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) - : "memory", "edx"); - return ret; -} - -notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) -{ - long ret; - - asm( - "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) - : "memory", "edx"); - return ret; -} - -#ifdef CONFIG_PARAVIRT_CLOCK - -static notrace cycle_t vread_pvclock(int *mode) -{ - *mode = VCLOCK_NONE; - return 0; -} -#endif - -#endif - notrace static cycle_t vread_tsc(void) { cycle_t ret = (cycle_t)rdtsc_ordered(); -- cgit v1.2.1 From f74acf0e4326bfaa2c0be1e82f23801fe347cd9c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 Dec 2015 11:27:57 +0100 Subject: x86/entry/64_compat: Make labels local ... so that they don't appear as symbols in the final ELF. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1449916077-6506-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index bbcb285ac781..8d802a109fac 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -96,15 +96,15 @@ ENTRY(entry_SYSENTER_compat) * This needs to happen before enabling interrupts so that * we don't get preempted with NT set. * - * NB.: sysenter_fix_flags is a label with the code under it moved + * NB.: .Lsysenter_fix_flags is a label with the code under it moved * out-of-line as an optimization: NT is unlikely to be set in the * majority of the cases and instead of polluting the I$ unnecessarily, * we're keeping that code behind a branch which will predict as * not-taken and therefore its instructions won't be fetched. */ testl $X86_EFLAGS_NT, EFLAGS(%rsp) - jnz sysenter_fix_flags -sysenter_flags_fixed: + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: /* * User mode is traced as though IRQs are on, and SYSENTER @@ -119,10 +119,10 @@ sysenter_flags_fixed: "jmp .Lsyscall_32_done", X86_FEATURE_XENPV jmp sysret32_from_system_call -sysenter_fix_flags: +.Lsysenter_fix_flags: pushq $X86_EFLAGS_FIXED popfq - jmp sysenter_flags_fixed + jmp .Lsysenter_flags_fixed ENDPROC(entry_SYSENTER_compat) /* -- cgit v1.2.1