diff options
Diffstat (limited to 'arch/x86/vdso')
-rw-r--r-- | arch/x86/vdso/Makefile | 26 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 256 | ||||
-rw-r--r-- | arch/x86/vdso/vdso-layout.lds.S | 29 | ||||
-rw-r--r-- | arch/x86/vdso/vdso.S | 22 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32-setup.c | 301 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32.S | 21 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32/vclock_gettime.c | 30 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32/vdso32.lds.S | 15 | ||||
-rw-r--r-- | arch/x86/vdso/vdsox32.S | 22 | ||||
-rw-r--r-- | arch/x86/vdso/vma.c | 20 |
10 files changed, 356 insertions, 386 deletions
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index fd14be1d1472..c580d1210ffe 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -2,6 +2,8 @@ # Building vDSO images for x86. # +KBUILD_CFLAGS += $(DISABLE_LTO) + VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y @@ -21,7 +23,8 @@ vobjs-$(VDSOX32-y) += $(vobjx32s-compat) vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) # files to link into kernel -obj-$(VDSO64-y) += vma.o vdso.o +obj-y += vma.o +obj-$(VDSO64-y) += vdso.o obj-$(VDSOX32-y) += vdsox32.o obj-$(VDSO32-y) += vdso32.o vdso32-setup.o @@ -35,7 +38,8 @@ export CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ - -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ + $(DISABLE_LTO) $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so @@ -127,7 +131,7 @@ vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. @@ -135,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds targets += $(vdso32-images) $(vdso32-images:=.dbg) -targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) +targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) extra-y += $(vdso32-images) @@ -145,8 +149,19 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic +KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/note.o \ $(obj)/vdso32/%.o $(call if_changed,vdso) @@ -181,7 +196,8 @@ quiet_cmd_vdso = VDSO $@ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ + $(LTO_CFLAGS) GCOV_PROFILE := n # diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index eb5d7a56f8d4..16d686171e9a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -4,6 +4,9 @@ * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + * * The code should have no internal unresolved relocations. * Check with readelf after changing. */ @@ -11,56 +14,55 @@ /* Disable profiling for userspace code: */ #define DISABLE_BRANCH_PROFILING -#include <linux/kernel.h> -#include <linux/posix-timers.h> -#include <linux/time.h> -#include <linux/string.h> -#include <asm/vsyscall.h> -#include <asm/fixmap.h> +#include <uapi/linux/time.h> #include <asm/vgtod.h> -#include <asm/timex.h> #include <asm/hpet.h> +#include <asm/vvar.h> #include <asm/unistd.h> -#include <asm/io.h> -#include <asm/pvclock.h> +#include <asm/msr.h> +#include <linux/math64.h> +#include <linux/time.h> #define gtod (&VVAR(vsyscall_gtod_data)) -notrace static cycle_t vread_tsc(void) +extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); +extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern time_t __vdso_time(time_t *t); + +#ifdef CONFIG_HPET_TIMER +static inline u32 read_hpet_counter(const volatile void *addr) { - cycle_t ret; - u64 last; + return *(const volatile u32 *) (addr + HPET_COUNTER); +} +#endif - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)vget_cycles(); +#ifndef BUILD_VDSO32 - last = VVAR(vsyscall_gtod_data).clock.cycle_last; +#include <linux/kernel.h> +#include <asm/vsyscall.h> +#include <asm/fixmap.h> +#include <asm/pvclock.h> - if (likely(ret >= last)) - return ret; +static notrace cycle_t vread_hpet(void) +{ + return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); +} - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a funciton of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ + long ret; + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + return ret; } -static notrace cycle_t vread_hpet(void) +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + return ret; } #ifdef CONFIG_PARAVIRT_CLOCK @@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode) *mode = VCLOCK_NONE; /* refer to tsc.c read_tsc() comment for rationale */ - last = VVAR(vsyscall_gtod_data).clock.cycle_last; + last = gtod->cycle_last; if (likely(ret >= last)) return ret; @@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode) } #endif +#else + +extern u8 hpet_page + __attribute__((visibility("hidden"))); + +#ifdef CONFIG_HPET_TIMER +static notrace cycle_t vread_hpet(void) +{ + return read_hpet_counter((const void *)(&hpet_page)); +} +#endif + notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call VDSO32_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "memory", "edx"); return ret; } @@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call VDSO32_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "memory", "edx"); return ret; } +#ifdef CONFIG_PARAVIRT_CLOCK + +static notrace cycle_t vread_pvclock(int *mode) +{ + *mode = VCLOCK_NONE; + return 0; +} +#endif + +#endif + +notrace static cycle_t vread_tsc(void) +{ + cycle_t ret; + u64 last; + + /* + * Empirically, a fence (of type that depends on the CPU) + * before rdtsc is enough to ensure that rdtsc is ordered + * with respect to loads. The various CPU manuals are unclear + * as to whether rdtsc can be reordered with later loads, + * but no one has ever seen it happen. + */ + rdtsc_barrier(); + ret = (cycle_t)__native_read_tsc(); + + last = gtod->cycle_last; + + if (likely(ret >= last)) + return ret; + + /* + * GCC likes to generate cmov here, but this branch is extremely + * predictable (it's just a funciton of time and the likely is + * very likely) and there's a data dependence, so force GCC + * to generate a branch instead. I don't barrier() because + * we don't actually need a barrier, and if this function + * ever gets inlined it will generate worse code. + */ + asm volatile (""); + return last; +} notrace static inline u64 vgetsns(int *mode) { - long v; + u64 v; cycles_t cycles; - if (gtod->clock.vclock_mode == VCLOCK_TSC) + + if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); - else if (gtod->clock.vclock_mode == VCLOCK_HPET) +#ifdef CONFIG_HPET_TIMER + else if (gtod->vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); +#endif #ifdef CONFIG_PARAVIRT_CLOCK - else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) + else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); #endif else return 0; - v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return v * gtod->clock.mult; + v = (cycles - gtod->cycle_last) & gtod->mask; + return v * gtod->mult; } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ @@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts) u64 ns; int mode; - ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(gtod); + mode = gtod->vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->wall_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(gtod, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; - timespec_add_ns(ts, ns); return mode; } -notrace static int do_monotonic(struct timespec *ts) +notrace static int __always_inline do_monotonic(struct timespec *ts) { unsigned long seq; u64 ns; int mode; - ts->tv_nsec = 0; do { - seq = raw_read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; + seq = gtod_read_begin(gtod); + mode = gtod->vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; ns = gtod->monotonic_time_snsec; ns += vgetsns(&mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - timespec_add_ns(ts, ns); + ns >>= gtod->shift; + } while (unlikely(gtod_read_retry(gtod, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; return mode; } -notrace static int do_realtime_coarse(struct timespec *ts) +notrace static void do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); - ts->tv_sec = gtod->wall_time_coarse.tv_sec; - ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - return 0; + seq = gtod_read_begin(gtod); + ts->tv_sec = gtod->wall_time_coarse_sec; + ts->tv_nsec = gtod->wall_time_coarse_nsec; + } while (unlikely(gtod_read_retry(gtod, seq))); } -notrace static int do_monotonic_coarse(struct timespec *ts) +notrace static void do_monotonic_coarse(struct timespec *ts) { unsigned long seq; do { - seq = raw_read_seqcount_begin(>od->seq); - ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; - ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - - return 0; + seq = gtod_read_begin(gtod); + ts->tv_sec = gtod->monotonic_time_coarse_sec; + ts->tv_nsec = gtod->monotonic_time_coarse_nsec; + } while (unlikely(gtod_read_retry(gtod, seq))); } notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - int ret = VCLOCK_NONE; - switch (clock) { case CLOCK_REALTIME: - ret = do_realtime(ts); + if (do_realtime(ts) == VCLOCK_NONE) + goto fallback; break; case CLOCK_MONOTONIC: - ret = do_monotonic(ts); + if (do_monotonic(ts) == VCLOCK_NONE) + goto fallback; break; case CLOCK_REALTIME_COARSE: - return do_realtime_coarse(ts); + do_realtime_coarse(ts); + break; case CLOCK_MONOTONIC_COARSE: - return do_monotonic_coarse(ts); + do_monotonic_coarse(ts); + break; + default: + goto fallback; } - if (ret == VCLOCK_NONE) - return vdso_fallback_gettime(clock, ts); return 0; +fallback: + return vdso_fallback_gettime(clock, ts); } int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { - long ret = VCLOCK_NONE; - if (likely(tv != NULL)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); - ret = do_realtime((struct timespec *)tv); + if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) + return vdso_fallback_gtod(tv, tz); tv->tv_usec /= 1000; } if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; - tz->tz_dsttime = gtod->sys_tz.tz_dsttime; + tz->tz_minuteswest = gtod->tz_minuteswest; + tz->tz_dsttime = gtod->tz_dsttime; } - if (ret == VCLOCK_NONE) - return vdso_fallback_gtod(tv, tz); return 0; } int gettimeofday(struct timeval *, struct timezone *) @@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *) */ notrace time_t __vdso_time(time_t *t) { - /* This is atomic on x86_64 so we don't need any locks. */ - time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); + /* This is atomic on x86 so we don't need any locks. */ + time_t result = ACCESS_ONCE(gtod->wall_time_sec); if (t) *t = result; diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S index 634a2cf62046..2e263f367b13 100644 --- a/arch/x86/vdso/vdso-layout.lds.S +++ b/arch/x86/vdso/vdso-layout.lds.S @@ -6,7 +6,25 @@ SECTIONS { - . = VDSO_PRELINK + SIZEOF_HEADERS; +#ifdef BUILD_VDSO32 +#include <asm/vdso32.h> + + .hpet_sect : { + hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE); + } :text :hpet_sect + + .vvar_sect : { + vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE); + + /* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset; +#define __VVAR_KERNEL_LDS +#include <asm/vvar.h> +#undef __VVAR_KERNEL_LDS +#undef EMIT_VVAR + } :text :vvar_sect +#endif + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -44,6 +62,11 @@ SECTIONS . = ALIGN(0x100); .text : { *(.text*) } :text =0x90909090 + + /DISCARD/ : { + *(.discard) + *(.discard.*) + } } /* @@ -61,4 +84,8 @@ PHDRS dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ eh_frame_hdr PT_GNU_EH_FRAME; +#ifdef BUILD_VDSO32 + vvar_sect PT_NULL FLAGS(4); /* PF_R */ + hpet_sect PT_NULL FLAGS(4); /* PF_R */ +#endif } diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 1e13eb8c9656..be3f23b09af5 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S @@ -1,21 +1,3 @@ -#include <asm/page_types.h> -#include <linux/linkage.h> +#include <asm/vdso.h> -__PAGE_ALIGNED_DATA - - .globl vdso_start, vdso_end - .align PAGE_SIZE -vdso_start: - .incbin "arch/x86/vdso/vdso.so" -vdso_end: - .align PAGE_SIZE /* extra data here leaks to userspace. */ - -.previous - - .globl vdso_pages - .bss - .align 8 - .type vdso_pages, @object -vdso_pages: - .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 - .size vdso_pages, .-vdso_pages +DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so") diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index d6bfb876cfb0..00348980a3a6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/slab.h> #include <asm/cpufeature.h> #include <asm/msr.h> @@ -25,17 +26,14 @@ #include <asm/tlbflush.h> #include <asm/vdso.h> #include <asm/proto.h> - -enum { - VDSO_DISABLED = 0, - VDSO_ENABLED = 1, - VDSO_COMPAT = 2, -}; +#include <asm/fixmap.h> +#include <asm/hpet.h> +#include <asm/vvar.h> #ifdef CONFIG_COMPAT_VDSO -#define VDSO_DEFAULT VDSO_COMPAT +#define VDSO_DEFAULT 0 #else -#define VDSO_DEFAULT VDSO_ENABLED +#define VDSO_DEFAULT 1 #endif #ifdef CONFIG_X86_64 @@ -44,13 +42,6 @@ enum { #endif /* - * This is the difference between the prelinked addresses in the vDSO images - * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO - * in the user address space. - */ -#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) - -/* * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ @@ -60,6 +51,9 @@ static int __init vdso_setup(char *s) { vdso_enabled = simple_strtoul(s, NULL, 0); + if (vdso_enabled > 1) + pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); + return 1; } @@ -76,124 +70,8 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0); EXPORT_SYMBOL_GPL(vdso_enabled); #endif -static __init void reloc_symtab(Elf32_Ehdr *ehdr, - unsigned offset, unsigned size) -{ - Elf32_Sym *sym = (void *)ehdr + offset; - unsigned nsym = size / sizeof(*sym); - unsigned i; - - for(i = 0; i < nsym; i++, sym++) { - if (sym->st_shndx == SHN_UNDEF || - sym->st_shndx == SHN_ABS) - continue; /* skip */ - - if (sym->st_shndx > SHN_LORESERVE) { - printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", - sym->st_shndx); - continue; - } - - switch(ELF_ST_TYPE(sym->st_info)) { - case STT_OBJECT: - case STT_FUNC: - case STT_SECTION: - case STT_FILE: - sym->st_value += VDSO_ADDR_ADJUST; - } - } -} - -static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) -{ - Elf32_Dyn *dyn = (void *)ehdr + offset; - - for(; dyn->d_tag != DT_NULL; dyn++) - switch(dyn->d_tag) { - case DT_PLTGOT: - case DT_HASH: - case DT_STRTAB: - case DT_SYMTAB: - case DT_RELA: - case DT_INIT: - case DT_FINI: - case DT_REL: - case DT_DEBUG: - case DT_JMPREL: - case DT_VERSYM: - case DT_VERDEF: - case DT_VERNEED: - case DT_ADDRRNGLO ... DT_ADDRRNGHI: - /* definitely pointers needing relocation */ - dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; - break; - - case DT_ENCODING ... OLD_DT_LOOS-1: - case DT_LOOS ... DT_HIOS-1: - /* Tags above DT_ENCODING are pointers if - they're even */ - if (dyn->d_tag >= DT_ENCODING && - (dyn->d_tag & 1) == 0) - dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; - break; - - case DT_VERDEFNUM: - case DT_VERNEEDNUM: - case DT_FLAGS_1: - case DT_RELACOUNT: - case DT_RELCOUNT: - case DT_VALRNGLO ... DT_VALRNGHI: - /* definitely not pointers */ - break; - - case OLD_DT_LOOS ... DT_LOOS-1: - case DT_HIOS ... DT_VALRNGLO-1: - default: - if (dyn->d_tag > DT_ENCODING) - printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", - dyn->d_tag); - break; - } -} - -static __init void relocate_vdso(Elf32_Ehdr *ehdr) -{ - Elf32_Phdr *phdr; - Elf32_Shdr *shdr; - int i; - - BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || - !elf_check_arch_ia32(ehdr) || - ehdr->e_type != ET_DYN); - - ehdr->e_entry += VDSO_ADDR_ADJUST; - - /* rebase phdrs */ - phdr = (void *)ehdr + ehdr->e_phoff; - for (i = 0; i < ehdr->e_phnum; i++) { - phdr[i].p_vaddr += VDSO_ADDR_ADJUST; - - /* relocate dynamic stuff */ - if (phdr[i].p_type == PT_DYNAMIC) - reloc_dyn(ehdr, phdr[i].p_offset); - } - - /* rebase sections */ - shdr = (void *)ehdr + ehdr->e_shoff; - for(i = 0; i < ehdr->e_shnum; i++) { - if (!(shdr[i].sh_flags & SHF_ALLOC)) - continue; - - shdr[i].sh_addr += VDSO_ADDR_ADJUST; - - if (shdr[i].sh_type == SHT_SYMTAB || - shdr[i].sh_type == SHT_DYNSYM) - reloc_symtab(ehdr, shdr[i].sh_offset, - shdr[i].sh_size); - } -} - -static struct page *vdso32_pages[1]; +static struct page **vdso32_pages; +static unsigned vdso32_size; #ifdef CONFIG_X86_64 @@ -212,12 +90,6 @@ void syscall32_cpu_init(void) wrmsrl(MSR_CSTAR, ia32_cstar_target); } -#define compat_uses_vma 1 - -static inline void map_compat_vdso(int map) -{ -} - #else /* CONFIG_X86_32 */ #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) @@ -241,64 +113,36 @@ void enable_sep_cpu(void) put_cpu(); } -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - gate_vma.vm_mm = NULL; - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; - gate_vma.vm_page_prot = __P101; - - return 0; -} - -#define compat_uses_vma 0 - -static void map_compat_vdso(int map) -{ - static int vdso_mapped; - - if (map == vdso_mapped) - return; - - vdso_mapped = map; - - __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, - map ? PAGE_READONLY_EXEC : PAGE_NONE); - - /* flush stray tlbs */ - flush_tlb_all(); -} - #endif /* CONFIG_X86_64 */ int __init sysenter_setup(void) { - void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); - const void *vsyscall; - size_t vsyscall_len; - - vdso32_pages[0] = virt_to_page(syscall_page); - -#ifdef CONFIG_X86_32 - gate_vma_init(); -#endif + char *vdso32_start, *vdso32_end; + int npages, i; +#ifdef CONFIG_COMPAT if (vdso32_syscall()) { - vsyscall = &vdso32_syscall_start; - vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; - } else if (vdso32_sysenter()){ - vsyscall = &vdso32_sysenter_start; - vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; + vdso32_start = vdso32_syscall_start; + vdso32_end = vdso32_syscall_end; + vdso32_pages = vdso32_syscall_pages; + } else +#endif + if (vdso32_sysenter()) { + vdso32_start = vdso32_sysenter_start; + vdso32_end = vdso32_sysenter_end; + vdso32_pages = vdso32_sysenter_pages; } else { - vsyscall = &vdso32_int80_start; - vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; + vdso32_start = vdso32_int80_start; + vdso32_end = vdso32_int80_end; + vdso32_pages = vdso32_int80_pages; } - memcpy(syscall_page, vsyscall, vsyscall_len); - relocate_vdso(syscall_page); + npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE; + vdso32_size = npages << PAGE_SHIFT; + for (i = 0; i < npages; i++) + vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE); + + patch_vdso32(vdso32_start, vdso32_size); return 0; } @@ -309,48 +153,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct mm_struct *mm = current->mm; unsigned long addr; int ret = 0; - bool compat; + struct vm_area_struct *vma; #ifdef CONFIG_X86_X32_ABI if (test_thread_flag(TIF_X32)) return x32_setup_additional_pages(bprm, uses_interp); #endif - if (vdso_enabled == VDSO_DISABLED) + if (vdso_enabled != 1) /* Other values all mean "disabled" */ return 0; down_write(&mm->mmap_sem); - /* Test compat mode once here, in case someone - changes it via sysctl */ - compat = (vdso_enabled == VDSO_COMPAT); + addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + addr += VDSO_OFFSET(VDSO_PREV_PAGES); - map_compat_vdso(compat); + current->mm->context.vdso = (void *)addr; - if (compat) - addr = VDSO_HIGH_BASE; - else { - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); - if (IS_ERR_VALUE(addr)) { - ret = addr; - goto up_fail; - } + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + ret = install_special_mapping(mm, + addr, + vdso32_size, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso32_pages); + + if (ret) + goto up_fail; + + vma = _install_special_mapping(mm, + addr - VDSO_OFFSET(VDSO_PREV_PAGES), + VDSO_OFFSET(VDSO_PREV_PAGES), + VM_READ, + NULL); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; } - current->mm->context.vdso = (void *)addr; + ret = remap_pfn_range(vma, + addr - VDSO_OFFSET(VDSO_VVAR_PAGE), + __pa_symbol(&__vvar_page) >> PAGE_SHIFT, + PAGE_SIZE, + PAGE_READONLY); + + if (ret) + goto up_fail; - if (compat_uses_vma || !compat) { - /* - * MAYWRITE to allow gdb to COW and set breakpoints - */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso32_pages); +#ifdef CONFIG_HPET_TIMER + if (hpet_address) { + ret = io_remap_pfn_range(vma, + addr - VDSO_OFFSET(VDSO_HPET_PAGE), + hpet_address >> PAGE_SHIFT, + PAGE_SIZE, + pgprot_noncached(PAGE_READONLY)); if (ret) goto up_fail; } +#endif current_thread_info()->sysenter_return = VDSO32_SYMBOL(addr, SYSENTER_RETURN); @@ -411,20 +280,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - /* - * Check to see if the corresponding task was created in compat vdso - * mode. - */ - if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) - return &gate_vma; return NULL; } int in_gate_area(struct mm_struct *mm, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(mm); - - return vma && addr >= vma->vm_start && addr < vma->vm_end; + return 0; } int in_gate_area_no_mm(unsigned long addr) diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 2ce5f82c333b..018bcd9f97b4 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S @@ -1,22 +1,9 @@ -#include <linux/init.h> +#include <asm/vdso.h> -__INITDATA +DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so") - .globl vdso32_int80_start, vdso32_int80_end -vdso32_int80_start: - .incbin "arch/x86/vdso/vdso32-int80.so" -vdso32_int80_end: - - .globl vdso32_syscall_start, vdso32_syscall_end -vdso32_syscall_start: #ifdef CONFIG_COMPAT - .incbin "arch/x86/vdso/vdso32-syscall.so" +DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so") #endif -vdso32_syscall_end: - - .globl vdso32_sysenter_start, vdso32_sysenter_end -vdso32_sysenter_start: - .incbin "arch/x86/vdso/vdso32-sysenter.so" -vdso32_sysenter_end: -__FINIT +DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so") diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000000..175cc72c0f68 --- /dev/null +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,30 @@ +#define BUILD_VDSO32 + +#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE +#undef CONFIG_OPTIMIZE_INLINING +#endif + +#undef CONFIG_X86_PPRO_FENCE + +#ifdef CONFIG_X86_64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_X86_64 +#undef CONFIG_ILLEGAL_POINTER_VALUE +#undef CONFIG_SPARSEMEM_VMEMMAP +#undef CONFIG_NR_CPUS + +#define CONFIG_X86_32 1 +#define CONFIG_PAGE_OFFSET 0 +#define CONFIG_ILLEGAL_POINTER_VALUE 0 +#define CONFIG_NR_CPUS 1 + +#define BUILD_VDSO32_64 + +#endif + +#include "../vclock_gettime.c" diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index 976124bb5f92..aadb8b9994cd 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S @@ -8,7 +8,11 @@ * values visible using the asm-x86/vdso.h macros from the kernel proper. */ +#include <asm/page.h> + +#define BUILD_VDSO32 #define VDSO_PRELINK 0 + #include "../vdso-layout.lds.S" /* The ELF entry point can be used to set the AT_SYSINFO value. */ @@ -19,6 +23,13 @@ ENTRY(__kernel_vsyscall); */ VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_time; + }; + LINUX_2.5 { global: __kernel_vsyscall; @@ -31,7 +42,9 @@ VERSION /* * Symbols we define here called VDSO* get their values into vdso32-syms.h. */ -VDSO32_PRELINK = VDSO_PRELINK; VDSO32_vsyscall = __kernel_vsyscall; VDSO32_sigreturn = __kernel_sigreturn; VDSO32_rt_sigreturn = __kernel_rt_sigreturn; +VDSO32_clock_gettime = clock_gettime; +VDSO32_gettimeofday = gettimeofday; +VDSO32_time = time; diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S index 295f1c7543d8..f4aa34e7f370 100644 --- a/arch/x86/vdso/vdsox32.S +++ b/arch/x86/vdso/vdsox32.S @@ -1,21 +1,3 @@ -#include <asm/page_types.h> -#include <linux/linkage.h> +#include <asm/vdso.h> -__PAGE_ALIGNED_DATA - - .globl vdsox32_start, vdsox32_end - .align PAGE_SIZE -vdsox32_start: - .incbin "arch/x86/vdso/vdsox32.so" -vdsox32_end: - .align PAGE_SIZE /* extra data here leaks to userspace. */ - -.previous - - .globl vdsox32_pages - .bss - .align 8 - .type vdsox32_pages, @object -vdsox32_pages: - .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 - .size vdsox32_pages, .-vdsox32_pages +DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so") diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e87544411..1ad102613127 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -16,20 +16,22 @@ #include <asm/vdso.h> #include <asm/page.h> +#if defined(CONFIG_X86_64) unsigned int __read_mostly vdso_enabled = 1; -extern char vdso_start[], vdso_end[]; +DECLARE_VDSO_IMAGE(vdso); extern unsigned short vdso_sync_cpuid; - -extern struct page *vdso_pages[]; static unsigned vdso_size; #ifdef CONFIG_X86_X32_ABI -extern char vdsox32_start[], vdsox32_end[]; -extern struct page *vdsox32_pages[]; +DECLARE_VDSO_IMAGE(vdsox32); static unsigned vdsox32_size; +#endif +#endif -static void __init patch_vdsox32(void *vdso, size_t len) +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \ + defined(CONFIG_COMPAT) +void __init patch_vdso32(void *vdso, size_t len) { Elf32_Ehdr *hdr = vdso; Elf32_Shdr *sechdrs, *alt_sec = 0; @@ -52,7 +54,7 @@ static void __init patch_vdsox32(void *vdso, size_t len) } /* If we get here, it's probably a bug. */ - pr_warning("patch_vdsox32: .altinstructions not found\n"); + pr_warning("patch_vdso32: .altinstructions not found\n"); return; /* nothing to patch */ found: @@ -61,6 +63,7 @@ found: } #endif +#if defined(CONFIG_X86_64) static void __init patch_vdso64(void *vdso, size_t len) { Elf64_Ehdr *hdr = vdso; @@ -104,7 +107,7 @@ static int __init init_vdso(void) vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); #ifdef CONFIG_X86_X32_ABI - patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); + patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start); npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; vdsox32_size = npages << PAGE_SHIFT; for (i = 0; i < npages; i++) @@ -204,3 +207,4 @@ static __init int vdso_setup(char *s) return 0; } __setup("vdso=", vdso_setup); +#endif |