summaryrefslogtreecommitdiffstats
path: root/arch/x86/entry/vdso
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry/vdso')
-rw-r--r--arch/x86/entry/vdso/Makefile16
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c220
-rw-r--r--arch/x86/entry/vdso/vgetcpu.c8
-rw-r--r--arch/x86/entry/vdso/vma.c62
4 files changed, 91 insertions, 215 deletions
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-fno-omit-frame-pointer -foptimize-sibling-calls \
- -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+ KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(obj)/vdso32.so.dbg: FORCE \
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..007b3fe9d727 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,50 +43,26 @@ extern u8 hvclock_page
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ asm ("syscall" : "=a" (ret), "=m" (*ts) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+ "rcx", "r11");
return ret;
}
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
- long ret;
-
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
- return ret;
-}
-
-
#else
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm(
+ asm (
"mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
+ "mov %[clock], %%ebx \n"
"call __kernel_vsyscall \n"
"mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
- : "memory", "edx");
- return ret;
-}
-
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
- long ret;
-
- asm(
- "mov %%ebx, %%edx \n"
- "mov %2, %%ebx \n"
- "call __kernel_vsyscall \n"
- "mov %%edx, %%ebx \n"
- : "=a" (ret)
- : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
- : "memory", "edx");
+ : "=a" (ret), "=m" (*ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
+ : "edx");
return ret;
}
@@ -98,12 +74,11 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}
-static notrace u64 vread_pvclock(int *mode)
+static notrace u64 vread_pvclock(void)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
- u64 ret;
- u64 last;
u32 version;
+ u64 ret;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -130,175 +105,112 @@ static notrace u64 vread_pvclock(int *mode)
do {
version = pvclock_read_begin(pvti);
- if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
- *mode = VCLOCK_NONE;
- return 0;
- }
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
} while (pvclock_read_retry(pvti, version));
- /* refer to vread_tsc() comment for rationale */
- last = gtod->cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- return last;
+ return ret;
}
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
-static notrace u64 vread_hvclock(int *mode)
+static notrace u64 vread_hvclock(void)
{
const struct ms_hyperv_tsc_page *tsc_pg =
(const struct ms_hyperv_tsc_page *)&hvclock_page;
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick != U64_MAX)
- return current_tick;
- *mode = VCLOCK_NONE;
- return 0;
+ return hv_read_tsc_page(tsc_pg);
}
#endif
-notrace static u64 vread_tsc(void)
-{
- u64 ret = (u64)rdtsc_ordered();
- u64 last = gtod->cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a function of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
-}
-
-notrace static inline u64 vgetsns(int *mode)
+notrace static inline u64 vgetcyc(int mode)
{
- u64 v;
- cycles_t cycles;
-
- if (gtod->vclock_mode == VCLOCK_TSC)
- cycles = vread_tsc();
+ if (mode == VCLOCK_TSC)
+ return (u64)rdtsc_ordered();
#ifdef CONFIG_PARAVIRT_CLOCK
- else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
- cycles = vread_pvclock(mode);
+ else if (mode == VCLOCK_PVCLOCK)
+ return vread_pvclock();
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
- else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
- cycles = vread_hvclock(mode);
+ else if (mode == VCLOCK_HVCLOCK)
+ return vread_hvclock();
#endif
- else
- return 0;
- v = (cycles - gtod->cycle_last) & gtod->mask;
- return v * gtod->mult;
+ return U64_MAX;
}
-/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
-notrace static int __always_inline do_realtime(struct timespec *ts)
+notrace static int do_hres(clockid_t clk, struct timespec *ts)
{
- unsigned long seq;
- u64 ns;
- int mode;
+ struct vgtod_ts *base = &gtod->basetime[clk];
+ u64 cycles, last, sec, ns;
+ unsigned int seq;
do {
seq = gtod_read_begin(gtod);
- mode = gtod->vclock_mode;
- ts->tv_sec = gtod->wall_time_sec;
- ns = gtod->wall_time_snsec;
- ns += vgetsns(&mode);
+ cycles = vgetcyc(gtod->vclock_mode);
+ ns = base->nsec;
+ last = gtod->cycle_last;
+ if (unlikely((s64)cycles < 0))
+ return vdso_fallback_gettime(clk, ts);
+ if (cycles > last)
+ ns += (cycles - last) * gtod->mult;
ns >>= gtod->shift;
+ sec = base->sec;
} while (unlikely(gtod_read_retry(gtod, seq)));
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
- return mode;
+ return 0;
}
-notrace static int __always_inline do_monotonic(struct timespec *ts)
+notrace static void do_coarse(clockid_t clk, struct timespec *ts)
{
- unsigned long seq;
- u64 ns;
- int mode;
+ struct vgtod_ts *base = &gtod->basetime[clk];
+ unsigned int seq;
do {
seq = gtod_read_begin(gtod);
- mode = gtod->vclock_mode;
- ts->tv_sec = gtod->monotonic_time_sec;
- ns = gtod->monotonic_time_snsec;
- ns += vgetsns(&mode);
- ns >>= gtod->shift;
+ ts->tv_sec = base->sec;
+ ts->tv_nsec = base->nsec;
} while (unlikely(gtod_read_retry(gtod, seq)));
-
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return mode;
}
-notrace static void do_realtime_coarse(struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
- unsigned long seq;
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = gtod->wall_time_coarse_sec;
- ts->tv_nsec = gtod->wall_time_coarse_nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-}
+ unsigned int msk;
-notrace static void do_monotonic_coarse(struct timespec *ts)
-{
- unsigned long seq;
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = gtod->monotonic_time_coarse_sec;
- ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-}
+ /* Sort out negative (CPU/FD) and invalid clocks */
+ if (unlikely((unsigned int) clock >= MAX_CLOCKS))
+ return vdso_fallback_gettime(clock, ts);
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
-{
- switch (clock) {
- case CLOCK_REALTIME:
- if (do_realtime(ts) == VCLOCK_NONE)
- goto fallback;
- break;
- case CLOCK_MONOTONIC:
- if (do_monotonic(ts) == VCLOCK_NONE)
- goto fallback;
- break;
- case CLOCK_REALTIME_COARSE:
- do_realtime_coarse(ts);
- break;
- case CLOCK_MONOTONIC_COARSE:
- do_monotonic_coarse(ts);
- break;
- default:
- goto fallback;
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (likely(msk & VGTOD_HRES)) {
+ return do_hres(clock, ts);
+ } else if (msk & VGTOD_COARSE) {
+ do_coarse(clock, ts);
+ return 0;
}
-
- return 0;
-fallback:
return vdso_fallback_gettime(clock, ts);
}
+
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
if (likely(tv != NULL)) {
- if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
- return vdso_fallback_gtod(tv, tz);
+ struct timespec *ts = (struct timespec *) tv;
+
+ do_hres(CLOCK_REALTIME, ts);
tv->tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
@@ -318,7 +230,7 @@ int gettimeofday(struct timeval *, struct timezone *)
notrace time_t __vdso_time(time_t *t)
{
/* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->wall_time_sec);
+ time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
if (t)
*t = result;
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index 8ec3d1f4ce9a..f86ab0ae1777 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -13,14 +13,8 @@
notrace long
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
- unsigned int p;
+ vdso_read_cpunode(cpu, node);
- p = __getcpu();
-
- if (cpu)
- *cpu = p & VGETCPU_CPU_MASK;
- if (node)
- *node = p >> 12;
return 0;
}
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 5b8b556dbb12..7eb878561910 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -39,7 +39,7 @@ void __init init_vdso_image(const struct vdso_image *image)
struct linux_binprm;
-static int vdso_fault(const struct vm_special_mapping *sm,
+static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
@@ -84,12 +84,11 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-static int vvar_fault(const struct vm_special_mapping *sm,
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
long sym_offset;
- int ret = -EFAULT;
if (!image)
return VM_FAULT_SIGBUS;
@@ -108,29 +107,24 @@ static int vvar_fault(const struct vm_special_mapping *sm,
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
- ret = vm_insert_pfn(vma, vmf->address,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ return vmf_insert_pfn(vma, vmf->address,
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_get_pvti_cpu0_va();
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
- ret = vm_insert_pfn_prot(
- vma,
- vmf->address,
- __pa(pvti) >> PAGE_SHIFT,
- pgprot_decrypted(vma->vm_page_prot));
+ return vmf_insert_pfn_prot(vma, vmf->address,
+ __pa(pvti) >> PAGE_SHIFT,
+ pgprot_decrypted(vma->vm_page_prot));
}
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
- ret = vm_insert_pfn(vma, vmf->address,
- vmalloc_to_pfn(tsc_pg));
+ return vmf_insert_pfn(vma, vmf->address,
+ vmalloc_to_pfn(tsc_pg));
}
- if (ret == 0 || ret == -EBUSY)
- return VM_FAULT_NOPAGE;
-
return VM_FAULT_SIGBUS;
}
@@ -332,40 +326,6 @@ static __init int vdso_setup(char *s)
return 0;
}
__setup("vdso=", vdso_setup);
-#endif
-
-#ifdef CONFIG_X86_64
-static void vgetcpu_cpu_init(void *arg)
-{
- int cpu = smp_processor_id();
- struct desc_struct d = { };
- unsigned long node = 0;
-#ifdef CONFIG_NUMA
- node = cpu_to_node(cpu);
-#endif
- if (static_cpu_has(X86_FEATURE_RDTSCP))
- write_rdtscp_aux((node << 12) | cpu);
-
- /*
- * Store cpu number in limit so that it can be loaded
- * quickly in user space in vgetcpu. (12 bits for the CPU
- * and 8 bits for the node)
- */
- d.limit0 = cpu | ((node & 0xf) << 12);
- d.limit1 = node >> 4;
- d.type = 5; /* RO data, expand down, accessed */
- d.dpl = 3; /* Visible to user code */
- d.s = 1; /* Not a system segment */
- d.p = 1; /* Present */
- d.d = 1; /* 32-bit */
-
- write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
-}
-
-static int vgetcpu_online(unsigned int cpu)
-{
- return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-}
static int __init init_vdso(void)
{
@@ -375,9 +335,7 @@ static int __init init_vdso(void)
init_vdso_image(&vdso_image_x32);
#endif
- /* notifier priority > KVM */
- return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
- "x86/vdso/vma:online", vgetcpu_online, NULL);
+ return 0;
}
subsys_initcall(init_vdso);
#endif /* CONFIG_X86_64 */
OpenPOWER on IntegriCloud