diff options
Diffstat (limited to 'arch/x86')
79 files changed, 1829 insertions, 1094 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3ad653de7100..1d14cc6b79ad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,7 +69,6 @@ config X86 select HAVE_ARCH_JUMP_LABEL select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS - select HAVE_SPARSE_IRQ select SPARSE_IRQ select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE @@ -2164,9 +2163,9 @@ config IA32_EMULATION depends on X86_64 select COMPAT_BINFMT_ELF ---help--- - Include code to run 32-bit programs under a 64-bit kernel. You should - likely turn this on, unless you're 100% sure that you don't have any - 32-bit programs left. + Include code to run legacy 32-bit programs under a + 64-bit kernel. You should likely turn this on, unless you're + 100% sure that you don't have any 32-bit programs left. config IA32_AOUT tristate "IA32 a.out support" @@ -2174,9 +2173,23 @@ config IA32_AOUT ---help--- Support old a.out binaries in the 32bit emulation. +config X86_X32 + bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" + depends on X86_64 && IA32_EMULATION && EXPERIMENTAL + ---help--- + Include code to run binaries for the x32 native 32-bit ABI + for 64-bit processors. An x32 process gets access to the + full 64-bit register file and wide data path while leaving + pointers at 32 bits for smaller memory footprint. + + You will need a recent binutils (2.22 or later) with + elf32_x86_64 support enabled to compile a kernel with this + option set. + config COMPAT def_bool y - depends on IA32_EMULATION + depends on IA32_EMULATION || X86_X32 + select ARCH_WANT_OLD_COMPAT_IPC config COMPAT_FOR_U64_ALIGNMENT def_bool COMPAT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 209ba1294592..968dbe24a255 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -82,6 +82,22 @@ ifdef CONFIG_CC_STACKPROTECTOR endif endif +ifdef CONFIG_X86_X32 + x32_ld_ok := $(call try-run,\ + /bin/echo -e '1: .quad 1b' | \ + $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \ + $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \ + $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n) + ifeq ($(x32_ld_ok),y) + CONFIG_X86_X32_ABI := y + KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI + KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI + else + $(warning CONFIG_X86_X32 enabled but no binutils support) + endif +endif +export CONFIG_X86_X32_ABI + # Don't unroll struct assignments with kmemcheck enabled ifeq ($(CONFIG_KMEMCHECK),y) KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 2bf18059fbea..119db67dcb03 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -15,23 +15,28 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_SCHED=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y -CONFIG_SPARSE_IRQ=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y CONFIG_SCHED_SMT=y @@ -51,14 +56,12 @@ CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set -CONFIG_PM=y +CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y -CONFIG_HIBERNATION=y CONFIG_ACPI_PROCFS=y CONFIG_ACPI_DOCK=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_DEBUG=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y @@ -69,7 +72,6 @@ CONFIG_PCI_MSI=y CONFIG_PCCARD=y CONFIG_YENTA=y CONFIG_HOTPLUG_PCI=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_NET=y CONFIG_PACKET=y @@ -120,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_LOG=y CONFIG_IP_NF_TARGET_ULOG=y CONFIG_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y @@ -128,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_TARGET_LOG=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y @@ -169,25 +169,20 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_NETCONSOLE=y +CONFIG_BNX2=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y -CONFIG_NET_PCI=y -CONFIG_FORCEDETH=y CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set -CONFIG_E1000=y -CONFIG_E1000E=y CONFIG_R8169=y -CONFIG_SKY2=y -CONFIG_TIGON3=y -CONFIG_BNX2=y -CONFIG_TR=y -CONFIG_NET_PCMCIA=y CONFIG_FDDI=y -CONFIG_NETCONSOLE=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y @@ -196,6 +191,7 @@ CONFIG_INPUT_TABLET=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -205,7 +201,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_HPET=y @@ -220,7 +215,6 @@ CONFIG_DRM_I915=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y @@ -283,7 +277,6 @@ CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y @@ -291,18 +284,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_AMIGA_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_KARMA_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y @@ -317,13 +298,12 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_DEBUG_STACK_USAGE=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 058a35b8286c..76eb2903809f 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,4 +1,3 @@ -CONFIG_64BIT=y CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y @@ -16,26 +15,29 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_SCHED=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y -CONFIG_SPARSE_IRQ=y CONFIG_CALGARY_IOMMU=y -CONFIG_AMD_IOMMU=y -CONFIG_AMD_IOMMU_STATS=y CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_PREEMPT_VOLUNTARY=y @@ -53,27 +55,22 @@ CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set -CONFIG_PM=y +CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y -CONFIG_HIBERNATION=y CONFIG_ACPI_PROCFS=y CONFIG_ACPI_DOCK=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_DEBUG=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y -CONFIG_INTEL_IOMMU=y -# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_PCIEPORTBUS=y CONFIG_PCCARD=y CONFIG_YENTA=y CONFIG_HOTPLUG_PCI=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y CONFIG_NET=y @@ -125,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_LOG=y CONFIG_IP_NF_TARGET_ULOG=y CONFIG_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y @@ -133,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_TARGET_LOG=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y @@ -172,20 +167,15 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y -CONFIG_NET_PCI=y -CONFIG_FORCEDETH=y CONFIG_E100=y -CONFIG_8139TOO=y CONFIG_E1000=y CONFIG_SKY2=y -CONFIG_TIGON3=y -CONFIG_TR=y -CONFIG_NET_PCMCIA=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y CONFIG_FDDI=y -CONFIG_NETCONSOLE=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y @@ -194,6 +184,7 @@ CONFIG_INPUT_TABLET=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -203,7 +194,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_INTEL is not set # CONFIG_HW_RANDOM_AMD is not set @@ -221,7 +211,6 @@ CONFIG_DRM_I915_KMS=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y @@ -268,6 +257,10 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y +CONFIG_AMD_IOMMU=y +CONFIG_AMD_IOMMU_STATS=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -284,7 +277,6 @@ CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y @@ -292,18 +284,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_AMIGA_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_KARMA_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y @@ -317,13 +297,12 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_DEBUG_STACK_USAGE=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 5563ba1cf513..a69245ba27e3 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -12,10 +12,8 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/kernel.h> -#include <linux/signal.h> #include <linux/errno.h> #include <linux/wait.h> -#include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/personality.h> @@ -32,20 +30,15 @@ #include <asm/proto.h> #include <asm/vdso.h> #include <asm/sigframe.h> +#include <asm/sighandling.h> #include <asm/sys_ia32.h> -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ - X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ - X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ - X86_EFLAGS_CF) - -void signal_fault(struct pt_regs *regs, void __user *frame, char *where); +#define FIX_EFLAGS __FIX_EFLAGS int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err = 0; + bool ia32 = is_ia32_task(); if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; @@ -75,8 +68,13 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) case __SI_FAULT >> 16: break; case __SI_CHLD >> 16: - put_user_ex(from->si_utime, &to->si_utime); - put_user_ex(from->si_stime, &to->si_stime); + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } put_user_ex(from->si_status, &to->si_status); /* FALL THROUGH */ default: @@ -348,7 +346,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, put_user_ex(regs->dx, &sc->dx); put_user_ex(regs->cx, &sc->cx); put_user_ex(regs->ax, &sc->ax); - put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.trap_nr, &sc->trapno); put_user_ex(current->thread.error_code, &sc->err); put_user_ex(regs->ip, &sc->ip); put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f6f5c53dc903..aec2202a596c 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,46 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, - unsigned int sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (copy_from_user(&s32, set, sizeof(compat_sigset_t))) - return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - } - set_fs(KERNEL_DS); - ret = sys_rt_sigprocmask(how, - set ? (sigset_t __user *)&s : NULL, - oset ? (sigset_t __user *)&s : NULL, - sigsetsize); - set_fs(old_fs); - if (ret) - return ret; - if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user(oset, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return 0; -} - asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index b57e6a43a37a..f9c0d3ba9e84 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -14,6 +14,7 @@ header-y += msr.h header-y += mtrr.h header-y += posix_types_32.h header-y += posix_types_64.h +header-y += posix_types_x32.h header-y += prctl.h header-y += processor-flags.h header-y += ptrace-abi.h @@ -24,3 +25,4 @@ header-y += vsyscall.h genhdr-y += unistd_32.h genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4b2caeefe1a2..d85410171260 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -534,7 +534,7 @@ static inline unsigned int read_apic_id(void) static inline int default_apic_id_valid(int apicid) { - return x2apic_mode || (apicid < 255); + return (apicid < 255); } extern void default_setup_apic_routing(void); diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 30d737ef2a42..d6805798d6fc 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -6,7 +6,9 @@ */ #include <linux/types.h> #include <linux/sched.h> +#include <asm/processor.h> #include <asm/user32.h> +#include <asm/unistd.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" @@ -186,7 +188,20 @@ struct compat_shmid64_ds { /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ +#ifdef CONFIG_X86_X32_ABI +typedef struct user_regs_struct compat_elf_gregset_t; + +#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216) +#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296) +#define SET_PR_FPVALID(S,V) \ + do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \ + while (0) + +#define COMPAT_USE_64BIT_TIME \ + (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) +#else typedef struct user_regs_struct32 compat_elf_gregset_t; +#endif /* * A pointer passed in from user mode. This should not @@ -208,13 +223,30 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline void __user *arch_compat_alloc_user_space(long len) { - struct pt_regs *regs = task_pt_regs(current); - return (void __user *)regs->sp - len; + compat_uptr_t sp; + + if (test_thread_flag(TIF_IA32)) { + sp = task_pt_regs(current)->sp; + } else { + /* -128 for the x32 ABI redzone */ + sp = percpu_read(old_rsp) - 128; + } + + return (void __user *)round_down(sp - len, 16); +} + +static inline bool is_x32_task(void) +{ +#ifdef CONFIG_X86_X32_ABI + if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) + return true; +#endif + return false; } -static inline int is_compat_task(void) +static inline bool is_compat_task(void) { - return current_thread_info()->status & TS_COMPAT; + return is_ia32_task() || is_x32_task(); } #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f27f79abe021..5939f44fe0c0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -155,7 +155,12 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) elf_check_arch_ia32(x) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) + +#if __USER32_DS != __USER_DS +# error "The following code assumes __USER32_DS == __USER_DS" +#endif static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) @@ -178,8 +183,9 @@ static inline void elf_common_init(struct thread_struct *t, void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -void set_personality_ia32(void); -#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() +void set_personality_ia32(bool); +#define COMPAT_SET_PERSONALITY(ex) \ + set_personality_ia32((ex).e_machine == EM_X86_64) #define COMPAT_ELF_PLATFORM ("i686") @@ -286,7 +292,7 @@ do { \ #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ /* 1GB for 64bit, 8MB for 32bit */ -#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) +#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) #define ARCH_DLINFO \ do { \ @@ -295,9 +301,20 @@ do { \ (unsigned long)current->mm->context.vdso); \ } while (0) +#define ARCH_DLINFO_X32 \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + #define AT_SYSINFO 32 -#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) +#define COMPAT_ARCH_DLINFO \ +if (test_thread_flag(TIF_X32)) \ + ARCH_DLINFO_X32; \ +else \ + ARCH_DLINFO_IA32(sysctl_vsyscall32) #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) @@ -313,6 +330,8 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); +extern int x32_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages @@ -329,7 +348,7 @@ static inline int mmap_is_ia32(void) return 1; #endif #ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) + if (test_thread_flag(TIF_ADDR32)) return 1; #endif return 0; diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 1f7e62517284..ee52760549f0 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -43,6 +43,15 @@ struct ucontext_ia32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ @@ -116,6 +125,15 @@ typedef struct compat_siginfo { compat_clock_t _stime; } _sigchld; + /* SIGCHLD (x32 version) */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_s64 _utime; + compat_s64 _stime; + } _sigchld_x32; + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index f49253d75710..c5d1785373ed 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -14,6 +14,7 @@ void exit_idle(void); #else /* !CONFIG_X86_64 */ static inline void enter_idle(void) { } static inline void exit_idle(void) { } +static inline void __exit_idle(void) { } #endif /* CONFIG_X86_64 */ void amd_e400_remove_cpu(int cpu); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 690d1cc9a877..2c4943de5150 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -21,6 +21,15 @@ #define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) #define IO_APIC_REDIR_MASKED (1 << 16) +struct io_apic_ops { + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); +}; + +void __init set_io_apic_ops(const struct io_apic_ops *); + /* * The structure of the IO-APIC: */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 4365ffdb461f..7e3f17f92c66 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -29,18 +29,18 @@ #define MTRR_IOCTL_BASE 'M' -struct mtrr_sentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int type; /* Type of region */ -}; - /* Warning: this structure has a different order from i386 on x86-64. The 32bit emulation code takes care of that. But you need to use this for 64bit, otherwise your X server will break. */ #ifdef __i386__ +struct mtrr_sentry { + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + struct mtrr_gentry { unsigned int regnum; /* Register number */ unsigned long base; /* Base address */ @@ -50,12 +50,20 @@ struct mtrr_gentry { #else /* __i386__ */ +struct mtrr_sentry { + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 type; /* Type of region */ +}; + struct mtrr_gentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int regnum; /* Register number */ - unsigned int type; /* Type of region */ + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 regnum; /* Register number */ + __u32 type; /* Type of region */ + __u32 _pad; /* Unused */ }; + #endif /* !__i386__ */ struct mtrr_var_range { diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index bb7133dc155d..3427b7798dbc 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,7 +7,9 @@ #else # ifdef __i386__ # include "posix_types_32.h" -# else +# elif defined(__LP64__) # include "posix_types_64.h" +# else +# include "posix_types_x32.h" # endif #endif diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index f7d9adf82e53..99f262e04b91 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -7,79 +7,22 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; +#define __kernel_mode_t __kernel_mode_t + typedef unsigned short __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; +#define __kernel_ipc_pid_t __kernel_ipc_pid_t + typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef int __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; -typedef unsigned int __kernel_uid32_t; -typedef unsigned int __kernel_gid32_t; +#define __kernel_uid_t __kernel_uid_t -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - -#if defined(__KERNEL__) - -#undef __FD_SET -#define __FD_SET(fd,fdsetp) \ - asm volatile("btsl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int)(fd))) - -#undef __FD_CLR -#define __FD_CLR(fd,fdsetp) \ - asm volatile("btrl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int) (fd))) - -#undef __FD_ISSET -#define __FD_ISSET(fd,fdsetp) \ - (__extension__ \ - ({ \ - unsigned char __result; \ - asm volatile("btl %1,%2 ; setb %0" \ - : "=q" (__result) \ - : "r" ((int)(fd)), \ - "m" (*(__kernel_fd_set *)(fdsetp))); \ - __result; \ -})) - -#undef __FD_ZERO -#define __FD_ZERO(fdsetp) \ -do { \ - int __d0, __d1; \ - asm volatile("cld ; rep ; stosl" \ - : "=m" (*(__kernel_fd_set *)(fdsetp)), \ - "=&c" (__d0), "=&D" (__d1) \ - : "a" (0), "1" (__FDSET_LONGS), \ - "2" ((__kernel_fd_set *)(fdsetp)) \ - : "memory"); \ -} while (0) - -#endif /* defined(__KERNEL__) */ +#include <asm-generic/posix_types.h> #endif /* _ASM_X86_POSIX_TYPES_32_H */ diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/asm/posix_types_64.h index eb8d2d92b63e..cba0c1ead162 100644 --- a/arch/x86/include/asm/posix_types_64.h +++ b/arch/x86/include/asm/posix_types_64.h @@ -7,113 +7,13 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; -typedef unsigned int __kernel_mode_t; -typedef unsigned long __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; -typedef int __kernel_ipc_pid_t; -typedef unsigned int __kernel_uid_t; -typedef unsigned int __kernel_gid_t; -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; -typedef __kernel_uid_t __kernel_uid32_t; -typedef __kernel_gid_t __kernel_gid32_t; +#define __kernel_old_uid_t __kernel_old_uid_t typedef unsigned long __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __KERNEL__ - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -/* - * This will unroll the loop for the normal constant cases (8 or 32 longs, - * for 256 and 1024-bit fd_sets respectively) - */ -#undef __FD_ZERO -static inline void __FD_ZERO(__kernel_fd_set *p) -{ - unsigned long *tmp = p->fds_bits; - int i; - - if (__builtin_constant_p(__FDSET_LONGS)) { - switch (__FDSET_LONGS) { - case 32: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; - tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; - tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; - tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; - return; - case 16: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - return; - case 8: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - return; - case 4: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - return; - } - } - i = __FDSET_LONGS; - while (i) { - i--; - *tmp = 0; - tmp++; - } -} - -#endif /* defined(__KERNEL__) */ +#include <asm-generic/posix_types.h> #endif /* _ASM_X86_POSIX_TYPES_64_H */ diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/asm/posix_types_x32.h new file mode 100644 index 000000000000..85f9bdafa93c --- /dev/null +++ b/arch/x86/include/asm/posix_types_x32.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_POSIX_TYPES_X32_H +#define _ASM_X86_POSIX_TYPES_X32_H + +/* + * This file is only used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * These types should generally match the ones used by the 64-bit kernel, + * + */ + +typedef long long __kernel_long_t; +typedef unsigned long long __kernel_ulong_t; +#define __kernel_long_t __kernel_long_t + +#include <asm/posix_types_64.h> + +#endif /* _ASM_X86_POSIX_TYPES_X32_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a19542c1685e..7284c9a6a0b5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -463,7 +463,7 @@ struct thread_struct { unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; - unsigned long trap_no; + unsigned long trap_nr; unsigned long error_code; /* floating point and extended processor state */ struct fpu fpu; @@ -873,9 +873,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ 0xc0000000 : 0xFFFFe000) -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE @@ -897,6 +897,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); + +/* + * User space RSP while inside the SYSCALL fast path + */ +DECLARE_PER_CPU(unsigned long, old_rsp); + #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 35664547125b..dcfde52979c3 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -145,7 +145,6 @@ extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); -void signal_fault(struct pt_regs *regs, void __user *frame, char *where); extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 04459d25e66e..4a085383af27 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -230,34 +230,37 @@ struct sigcontext { * User-space might still rely on the old definition: */ struct sigcontext { - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long rdi; - unsigned long rsi; - unsigned long rbp; - unsigned long rbx; - unsigned long rdx; - unsigned long rax; - unsigned long rcx; - unsigned long rsp; - unsigned long rip; - unsigned long eflags; /* RFLAGS */ - unsigned short cs; - unsigned short gs; - unsigned short fs; - unsigned short __pad0; - unsigned long err; - unsigned long trapno; - unsigned long oldmask; - unsigned long cr2; + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; + __u64 rdi; + __u64 rsi; + __u64 rbp; + __u64 rbx; + __u64 rdx; + __u64 rax; + __u64 rcx; + __u64 rsp; + __u64 rip; + __u64 eflags; /* RFLAGS */ + __u16 cs; + __u16 gs; + __u16 fs; + __u16 __pad0; + __u64 err; + __u64 trapno; + __u64 oldmask; + __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ - unsigned long reserved1[8]; +#ifndef __LP64__ + __u32 __fpstate_pad; +#endif + __u64 reserved1[8]; }; #endif /* !__KERNEL__ */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 4e0fe26d27d3..7c7c27c97daa 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -59,12 +59,25 @@ struct rt_sigframe_ia32 { #endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ #ifdef CONFIG_X86_64 + struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; /* fp state follows here */ }; + +#ifdef CONFIG_X86_X32_ABI + +struct rt_sigframe_x32 { + u64 pretcode; + struct ucontext_x32 uc; + compat_siginfo_t info; + /* fp state follows here */ +}; + +#endif /* CONFIG_X86_X32_ABI */ + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h new file mode 100644 index 000000000000..ada93b3b8c66 --- /dev/null +++ b/arch/x86/include/asm/sighandling.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_SIGHANDLING_H +#define _ASM_X86_SIGHANDLING_H + +#include <linux/compiler.h> +#include <linux/ptrace.h> +#include <linux/signal.h> + +#include <asm/processor-flags.h> + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax); +int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, + struct pt_regs *regs, unsigned long mask); + +#endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index cb238526a9f1..3fda9db48819 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_SYS_IA32_H #define _ASM_X86_SYS_IA32_H +#ifdef CONFIG_COMPAT + #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/types.h> @@ -36,8 +38,6 @@ asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, struct sigaction32 __user *, unsigned int); asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); -asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, - compat_sigset_t __user *, unsigned int); asmlinkage long sys32_alarm(unsigned int); asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); @@ -83,4 +83,7 @@ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, const char __user *); + +#endif /* CONFIG_COMPAT */ + #endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d962e5652a73..386b78686c4d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/err.h> #include <asm/asm-offsets.h> /* For NR_syscalls */ +#include <asm/unistd.h> extern const unsigned long sys_call_table[]; @@ -26,13 +27,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax; + return regs->orig_ax & __SYSCALL_MASK; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax; + regs->ax = regs->orig_ax & __SYSCALL_MASK; } static inline long syscall_get_error(struct task_struct *task, diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index cfd8144d5527..ad6df8ccd715 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -86,7 +86,7 @@ struct thread_info { #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_IA32 17 /* 32bit process */ +#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ @@ -95,6 +95,8 @@ struct thread_info { #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ +#define TIF_X32 30 /* 32-bit native x86-64 binary */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -116,6 +118,8 @@ struct thread_info { #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_ADDR32 (1 << TIF_ADDR32) +#define _TIF_X32 (1 << TIF_X32) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -262,6 +266,18 @@ static inline void set_restore_sigmask(void) ti->status |= TS_RESTORE_SIGMASK; set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); } + +static inline bool is_ia32_task(void) +{ +#ifdef CONFIG_X86_32 + return true; +#endif +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & TS_COMPAT) + return true; +#endif + return false; +} #endif /* !__ASSEMBLY__ */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0012d0902c5f..88eae2aec619 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -89,4 +89,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 21f77b89e47a..37cdc9d99bb1 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -1,7 +1,17 @@ #ifndef _ASM_X86_UNISTD_H #define _ASM_X86_UNISTD_H 1 +/* x32 syscall flag bit */ +#define __X32_SYSCALL_BIT 0x40000000 + #ifdef __KERNEL__ + +# ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) +# else +# define __SYSCALL_MASK (~0) +# endif + # ifdef CONFIG_X86_32 # include <asm/unistd_32.h> @@ -14,6 +24,7 @@ # else # include <asm/unistd_64.h> +# include <asm/unistd_64_x32.h> # define __ARCH_WANT_COMPAT_SYS_TIME # endif @@ -52,8 +63,10 @@ #else # ifdef __i386__ # include <asm/unistd_32.h> -# else +# elif defined(__LP64__) # include <asm/unistd_64.h> +# else +# include <asm/unistd_x32.h> # endif #endif diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 815285bcaceb..8b38be2de9e1 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,13 +5,8 @@ #include <linux/clocksource.h> struct vsyscall_gtod_data { - seqlock_t lock; + seqcount_t seq; - /* open coded 'struct timespec' */ - time_t wall_time_sec; - u32 wall_time_nsec; - - struct timezone sys_tz; struct { /* extract of a clocksource struct */ int vclock_mode; cycle_t cycle_last; @@ -19,8 +14,16 @@ struct vsyscall_gtod_data { u32 mult; u32 shift; } clock; - struct timespec wall_to_monotonic; + + /* open coded 'struct timespec' */ + time_t wall_time_sec; + u32 wall_time_nsec; + u32 monotonic_time_nsec; + time_t monotonic_time_sec; + + struct timezone sys_tz; struct timespec wall_time_coarse; + struct timespec monotonic_time_coarse; }; extern struct vsyscall_gtod_data vsyscall_gtod_data; diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 6bf5b8e478c0..92e54abf89e0 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void) return cpu_online_mask; } +static int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + static int x2apic_apic_id_registered(void) { return 1; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 406ed77216d0..a415b1f44365 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,7 +239,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) * to not preallocating memory for all NR_CPUS * when we use CPU hotplug. */ - if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) + if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else acpi_register_lapic(apic_id, enabled); @@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) kfree(buffer.pointer); buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; + lapic = NULL; if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) goto out; @@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) goto free_tmp_map; cpumask_copy(tmp_map, cpu_present_mask); - acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2eec05b6d1b8..11544d8f1e97 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -383,20 +383,25 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) static unsigned int reserve_eilvt_offset(int offset, unsigned int new) { - unsigned int rsvd; /* 0: uninitialized */ + unsigned int rsvd, vector; if (offset >= APIC_EILVT_NR_MAX) return ~0; - rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; + rsvd = atomic_read(&eilvt_offsets[offset]); do { - if (rsvd && - !eilvt_entry_is_changeable(rsvd, new)) + vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ + if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); } while (rsvd != new); + rsvd &= ~APIC_EILVT_MASKED; + if (rsvd && rsvd != vector) + pr_info("LVT offset %d assigned for vector 0x%02x\n", + offset, rsvd); + return new; } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index d9ea5f331ac5..899803e03214 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -229,11 +229,10 @@ static int __init numachip_system_init(void) } early_initcall(numachip_system_init); -static int __cpuinit numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "NUMASC", 6)) { numachip_system = 1; - setup_force_cpu_cap(X86_FEATURE_X2APIC); return 1; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d10a66fc5a9..e88300d8e80a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -64,9 +64,28 @@ #include <asm/apic.h> #define __apicdebuginit(type) static type __init + #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) +static void __init __ioapic_init_mappings(void); + +static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); +static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); +static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); + +static struct io_apic_ops io_apic_ops = { + .init = __ioapic_init_mappings, + .read = __io_apic_read, + .write = __io_apic_write, + .modify = __io_apic_modify, +}; + +void __init set_io_apic_ops(const struct io_apic_ops *ops) +{ + io_apic_ops = *ops; +} + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -294,6 +313,22 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg) irq_free_desc(at); } +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + return io_apic_ops.read(apic, reg); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + io_apic_ops.write(apic, reg, value); +} + +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +{ + io_apic_ops.modify(apic, reg, value); +} + + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -314,16 +349,17 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) writel(vector, &io_apic->eoi); } -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(reg, &io_apic->index); return readl(&io_apic->data); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -334,7 +370,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i * * Older SiS APIC requires we rewrite the index register */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -377,6 +413,7 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + return eu.entry; } @@ -384,9 +421,11 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); eu.entry = __ioapic_read_entry(apic, pin); raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; } @@ -396,8 +435,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { union entry_union eu = {{0, 0}}; @@ -409,6 +447,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); raw_spin_unlock_irqrestore(&ioapic_lock, flags); @@ -435,8 +474,7 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int -__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list **last, *entry; @@ -521,6 +559,7 @@ static void io_apic_sync(struct irq_pin_list *entry) * a dummy read from the IO-APIC */ struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); readl(&io_apic->data); } @@ -2512,21 +2551,73 @@ static void ack_apic_edge(struct irq_data *data) atomic_t irq_mis_count; -static void ack_apic_level(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - int i, do_unmask_irq = 0, irq = data->irq; - unsigned long v; - - irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - do_unmask_irq = 1; mask_ioapic(cfg); + return true; } + return false; +} + +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ + if (unlikely(masked)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(cfg)) + irq_move_masked_irq(data); + unmask_ioapic(cfg); + } +} +#else +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ + return false; +} +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ +} #endif +static void ack_apic_level(struct irq_data *data) +{ + struct irq_cfg *cfg = data->chip_data; + int i, irq = data->irq; + unsigned long v; + bool masked; + + irq_complete_move(cfg); + masked = ioapic_irqd_mask(data, cfg); + /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -2581,38 +2672,7 @@ static void ack_apic_level(struct irq_data *data) eoi_ioapic_irq(irq, cfg); } - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(cfg)) - irq_move_masked_irq(data); - unmask_ioapic(cfg); - } + ioapic_irqd_unmask(data, cfg, masked); } #ifdef CONFIG_IRQ_REMAP @@ -3873,6 +3933,11 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) void __init ioapic_and_gsi_init(void) { + io_apic_ops.init(); +} + +static void __init __ioapic_init_mappings(void) +{ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; int i; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 9193713060a9..48f3103b3c93 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -213,7 +213,7 @@ static struct apic apic_x2apic_cluster = { .name = "cluster x2apic", .probe = x2apic_cluster_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index bcd1db6eaca9..8a778db45e3a 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -119,7 +119,7 @@ static struct apic apic_x2apic_phys = { .name = "physical x2apic", .probe = x2apic_phys_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index fc4771425852..87bfa69e216e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -266,6 +266,11 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } +static int uv_apic_id_valid(int apicid) +{ + return 1; +} + static int uv_apic_id_registered(void) { return 1; @@ -351,7 +356,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", .probe = uv_probe, .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .apic_id_valid = default_apic_id_valid, + .apic_id_valid = uv_apic_id_valid, .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 834e897b1e25..1b4754f82ba7 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -1,6 +1,12 @@ #include <asm/ia32.h> #define __SYSCALL_64(nr, sym, compat) [nr] = 1, +#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, +#ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, +#else +# define __SYSCALL_X32(nr, sym, compat) /* nothing */ +#endif static char syscalls_64[] = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e49477444fff..67e258362a3d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -999,7 +999,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "\n"); - __print_cpu_msr(); + print_cpu_msr(c); } void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 79289632cb27..a041e094b8b9 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -167,6 +167,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { int err = 0; mtrr_type type; + unsigned long base; unsigned long size; struct mtrr_sentry sentry; struct mtrr_gentry gentry; @@ -267,14 +268,14 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + mtrr_if->get(gentry.regnum, &base, &size, &type); /* Hide entries that go above 4GB */ - if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) gentry.base = gentry.size = gentry.type = 0; else { - gentry.base <<= PAGE_SHIFT; + gentry.base = base << PAGE_SHIFT; gentry.size = size << PAGE_SHIFT; gentry.type = type; } @@ -321,11 +322,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + mtrr_if->get(gentry.regnum, &base, &size, &type); /* Hide entries that would overflow */ if (size != (__typeof__(gentry.size))size) gentry.base = gentry.size = gentry.type = 0; else { + gentry.base = base; gentry.size = size; gentry.type = type; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fa2900c0e398..bb8e03407e18 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,7 +29,6 @@ #include <asm/apic.h> #include <asm/stacktrace.h> #include <asm/nmi.h> -#include <asm/compat.h> #include <asm/smp.h> #include <asm/alternative.h> #include <asm/timer.h> @@ -1314,6 +1313,11 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } +static struct attribute_group x86_pmu_format_group = { + .name = "format", + .attrs = NULL, +}; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1388,6 +1392,7 @@ static int __init init_hw_perf_events(void) } x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + x86_pmu_format_group.attrs = x86_pmu.format_attrs; pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); @@ -1616,6 +1621,9 @@ static int x86_pmu_event_idx(struct perf_event *event) { int idx = event->hw.idx; + if (!x86_pmu.attr_rdpmc) + return 0; + if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { idx -= X86_PMC_IDX_FIXED; idx |= 1 << 30; @@ -1668,6 +1676,7 @@ static struct attribute_group x86_pmu_attr_group = { static const struct attribute_group *x86_pmu_attr_groups[] = { &x86_pmu_attr_group, + &x86_pmu_format_group, NULL, }; @@ -1699,14 +1708,19 @@ static struct pmu pmu = { .flush_branch_stack = x86_pmu_flush_branch_stack, }; -void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { + userpg->cap_usr_time = 0; + userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->pmc_width = x86_pmu.cntval_bits; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; + userpg->cap_usr_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; @@ -1748,6 +1762,9 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) } #ifdef CONFIG_COMPAT + +#include <asm/compat.h> + static inline int perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) { diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8484e77c211e..6638aaf54493 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -339,6 +339,7 @@ struct x86_pmu { * sysfs attrs */ int attr_rdpmc; + struct attribute **format_attrs; /* * CPU Hotplug hooks diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index dd002faff7a6..95e7fe1c5f0b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -404,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu) } } +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *amd_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, @@ -426,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = { .get_event_constraints = amd_get_event_constraints, .put_event_constraints = amd_put_event_constraints, + .format_attrs = amd_format_attr, + .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, @@ -596,6 +613,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .cpu_dead = amd_pmu_cpu_dead, #endif .cpu_starting = amd_pmu_cpu_starting, + .format_attrs = amd_format_attr, }; __init int amd_pmu_init(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6a84e7f28f05..26b3e2fef104 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1431,6 +1431,24 @@ static void core_pmu_enable_all(int added) } } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -1455,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = { .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1553,6 +1572,21 @@ static void intel_pmu_flush_branch_stack(void) intel_pmu_lbr_reset(); } +PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); + +static struct attribute *intel_arch3_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_any.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + + &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ + NULL, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1576,6 +1610,8 @@ static __initconst const struct x86_pmu intel_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .format_attrs = intel_arch3_formats_attr, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index c7181befecde..32bcfc7dd230 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event) (void)checking_wrmsrl(hwc->config_base, val); } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_p6_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = x86_pmu_handle_irq, @@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = { .cntval_mask = (1ULL << 32) - 1, .get_event_constraints = x86_get_event_constraints, .event_constraints = p6_event_constraints, + + .format_attrs = intel_p6_formats_attr, }; __init int p6_pmu_init(void) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 4025fe4f928f..1b81839b6c88 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -37,13 +37,16 @@ print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, struct thread_info *tinfo, int *graph) { - struct task_struct *task = tinfo->task; + struct task_struct *task; unsigned long ret_addr; - int index = task->curr_ret_stack; + int index; if (addr != (unsigned long)return_to_handler) return; + task = tinfo->task; + index = task->curr_ret_stack; + if (!task->ret_stack || index < *graph) return; @@ -265,7 +268,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; show_registers(regs); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 734ebd1d3caa..cdc79b5cfcd9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -481,7 +481,12 @@ GLOBAL(system_call_after_swapgs) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz tracesys system_call_fastpath: +#if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax +#else + andl $__SYSCALL_MASK,%eax + cmpl $__NR_syscall_max,%eax +#endif ja badsys movq %r10,%rcx call *sys_call_table(,%rax,8) # XXX: rip relative @@ -595,7 +600,12 @@ tracesys: */ LOAD_ARGS ARGOFFSET, 1 RESTORE_REST +#if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax +#else + andl $__SYSCALL_MASK,%eax + cmpl $__NR_syscall_max,%eax +#endif ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) @@ -735,6 +745,40 @@ ENTRY(stub_rt_sigreturn) CFI_ENDPROC END(stub_rt_sigreturn) +#ifdef CONFIG_X86_X32_ABI + PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx + +ENTRY(stub_x32_rt_sigreturn) + CFI_STARTPROC + addq $8, %rsp + PARTIAL_FRAME 0 + SAVE_REST + movq %rsp,%rdi + FIXUP_TOP_OF_STACK %r11 + call sys32_x32_rt_sigreturn + movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_x32_rt_sigreturn) + +ENTRY(stub_x32_execve) + CFI_STARTPROC + addq $8, %rsp + PARTIAL_FRAME 0 + SAVE_REST + FIXUP_TOP_OF_STACK %r11 + movq %rsp, %rcx + call sys32_execve + RESTORE_TOP_OF_STACK %r11 + movq %rax,RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_x32_execve) + +#endif + /* * Build the entry stubs and pointer table with some assembler magic. * We pack 7 stubs into a single 32-byte chunk, which will fit in a diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6d5fc8cfd5d6..252981afd6c4 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -60,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) outb(0, 0xF0); if (ignore_fpu_irq || !boot_cpu_data.hard_math) return IRQ_NONE; - math_error(get_irq_regs(), 0, 16); + math_error(get_irq_regs(), 0, X86_TRAP_MF); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9b24f36eb55f..a33afaa5ddb7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -12,6 +12,9 @@ #include <linux/user-return-notifier.h> #include <linux/dmi.h> #include <linux/utsname.h> +#include <linux/stackprotector.h> +#include <linux/tick.h> +#include <linux/cpuidle.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> @@ -22,6 +25,24 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/debugreg.h> +#include <asm/nmi.h> + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU(unsigned char, is_idle); +static ATOMIC_NOTIFIER_HEAD(idle_notifier); + +void idle_notifier_register(struct notifier_block *n) +{ + atomic_notifier_chain_register(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); +#endif struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -370,6 +391,99 @@ static inline int hlt_use_halt(void) } #endif +#ifndef CONFIG_SMP +static inline void play_dead(void) +{ + BUG(); +} +#endif + +#ifdef CONFIG_X86_64 +void enter_idle(void) +{ + percpu_write(is_idle, 1); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); +} + +static void __exit_idle(void) +{ + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) + return; + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); +} + +/* Called from interrupts to signify idle end */ +void exit_idle(void) +{ + /* idle loop has pid 0 */ + if (current->pid) + return; + __exit_idle(); +} +#endif + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle(void) +{ + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); + current_thread_info()->status |= TS_POLLING; + + while (1) { + tick_nohz_idle_enter(); + + while (!need_resched()) { + rmb(); + + if (cpu_is_offline(smp_processor_id())) + play_dead(); + + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_touch_nmi(); + local_irq_disable(); + + enter_idle(); + + /* Don't trace irqs off for idle */ + stop_critical_timings(); + + /* enter_idle() needs rcu for notifiers */ + rcu_idle_enter(); + + if (cpuidle_idle_call()) + pm_idle(); + + rcu_idle_exit(); + start_critical_timings(); + + /* In many cases the interrupt that ended idle + has already called exit_idle. But some idle + loops can be woken up without interrupt. */ + __exit_idle(); + } + + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } +} + /* * We use this if we don't have any better * idle routine.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index aae4f4bbbe88..ae6847303e26 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -9,7 +9,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -31,14 +30,12 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/personality.h> -#include <linux/tick.h> #include <linux/percpu.h> #include <linux/prctl.h> #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> -#include <linux/cpuidle.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -57,7 +54,6 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <asm/nmi.h> #include <asm/switch_to.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -70,60 +66,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - int cpu = smp_processor_id(); - - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - - current_thread_info()->status |= TS_POLLING; - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_idle_enter(); - rcu_idle_enter(); - while (!need_resched()) { - - check_pgt_cache(); - rmb(); - - if (cpu_is_offline(cpu)) - play_dead(); - - local_touch_nmi(); - local_irq_disable(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - if (cpuidle_idle_call()) - pm_idle(); - start_critical_timings(); - } - rcu_idle_exit(); - tick_nohz_idle_exit(); - schedule_preempt_disabled(); - } -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61270e8d428a..733ca39f367e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -14,7 +14,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -32,12 +31,10 @@ #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <linux/tick.h> #include <linux/prctl.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> -#include <linux/cpuidle.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -51,115 +48,11 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <asm/nmi.h> #include <asm/switch_to.h> asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); -static DEFINE_PER_CPU(unsigned char, is_idle); - -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); - -void enter_idle(void) -{ - percpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); -} - -static void __exit_idle(void) -{ - if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) - return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); -} - -/* Called from interrupts to signify idle end */ -void exit_idle(void) -{ - /* idle loop has pid 0 */ - if (current->pid) - return; - __exit_idle(); -} - -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - current_thread_info()->status |= TS_POLLING; - - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_idle_enter(); - while (!need_resched()) { - - rmb(); - - if (cpu_is_offline(smp_processor_id())) - play_dead(); - /* - * Idle routines should keep interrupts disabled - * from here on, until they go to idle. - * Otherwise, idle callbacks can misfire. - */ - local_touch_nmi(); - local_irq_disable(); - enter_idle(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - - /* enter_idle() needs rcu for notifiers */ - rcu_idle_enter(); - - if (cpuidle_idle_call()) - pm_idle(); - - rcu_idle_exit(); - start_critical_timings(); - - /* In many cases the interrupt that ended idle - has already called exit_idle. But some idle - loops can be woken up without interrupt. */ - __exit_idle(); - } - - tick_nohz_idle_exit(); - schedule_preempt_disabled(); - } -} /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, int all) @@ -365,7 +258,9 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, - __USER32_CS, __USER32_DS, __USER32_DS); + test_thread_flag(TIF_X32) + ? __USER_CS : __USER32_CS, + __USER_DS, __USER_DS); } #endif @@ -488,6 +383,8 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); + clear_thread_flag(TIF_ADDR32); + clear_thread_flag(TIF_X32); /* Ensure the corresponding mm is not marked. */ if (current->mm) @@ -500,20 +397,31 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -void set_personality_ia32(void) +void set_personality_ia32(bool x32) { /* inherit personality from parent */ /* Make sure to be in 32bit mode */ - set_thread_flag(TIF_IA32); - current->personality |= force_personality32; + set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ if (current->mm) current->mm->context.ia32_compat = 1; - /* Prepare the first "return" to user space */ - current_thread_info()->status |= TS_COMPAT; + if (x32) { + clear_thread_flag(TIF_IA32); + set_thread_flag(TIF_X32); + current->personality &= ~READ_IMPLIES_EXEC; + /* is_compat_task() uses the presence of the x32 + syscall bit flag to determine compat status */ + current_thread_info()->status &= ~TS_COMPAT; + } else { + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_X32); + current->personality |= force_personality32; + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; + } } unsigned long get_wchan(struct task_struct *p) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 8a634c887652..685845cf16e0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -33,6 +33,7 @@ #include <asm/prctl.h> #include <asm/proto.h> #include <asm/hw_breakpoint.h> +#include <asm/traps.h> #include "tls.h" @@ -1130,6 +1131,100 @@ static int genregs32_set(struct task_struct *target, return ret; } +#ifdef CONFIG_X86_X32_ABI +static long x32_arch_ptrace(struct task_struct *child, + compat_long_t request, compat_ulong_t caddr, + compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + + switch (request) { + /* Read 32bits at location addr in the USER area. Only allow + to return the lower 32bits of segment and debug registers. */ + case PTRACE_PEEKUSR: { + u32 tmp; + + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + tmp = 0; /* Default return condition */ + if (addr < sizeof(struct user_regs_struct)) + tmp = getreg(child, addr); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + tmp = ptrace_get_debugreg(child, addr / sizeof(data)); + } + ret = put_user(tmp, (__u32 __user *)datap); + break; + } + + /* Write the word at location addr in the USER area. Only allow + to update segment and debug registers with the upper 32bits + zero-extended. */ + case PTRACE_POKEUSR: + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || + addr < offsetof(struct user_regs_struct, cs)) + break; + + if (addr < sizeof(struct user_regs_struct)) + ret = putreg(child, addr, data); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + ret = ptrace_set_debugreg(child, + addr / sizeof(data), data); + } + break; + + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); + + /* normal 64bit interface to access TLS data. + Works just like arch_prctl, except that the arguments + are reversed. */ + case PTRACE_ARCH_PRCTL: + return do_arch_prctl(child, data, addr); + + default: + return compat_ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif + long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { @@ -1139,6 +1234,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, int ret; __u32 val; +#ifdef CONFIG_X86_X32_ABI + if (!is_ia32_task()) + return x32_arch_ptrace(child, request, caddr, cdata); +#endif + switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); @@ -1326,7 +1426,7 @@ static void fill_sigtrap_info(struct task_struct *tsk, int error_code, int si_code, struct siginfo *info) { - tsk->thread.trap_no = 1; + tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; memset(info, 0, sizeof(*info)); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 25edcfc9ba5b..115eac431483 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -10,10 +10,8 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/kernel.h> -#include <linux/signal.h> #include <linux/errno.h> #include <linux/wait.h> -#include <linux/ptrace.h> #include <linux/tracehook.h> #include <linux/unistd.h> #include <linux/stddef.h> @@ -27,10 +25,12 @@ #include <asm/fpu-internal.h> #include <asm/vdso.h> #include <asm/mce.h> +#include <asm/sighandling.h> #ifdef CONFIG_X86_64 #include <asm/proto.h> #include <asm/ia32_unistd.h> +#include <asm/sys_ia32.h> #endif /* CONFIG_X86_64 */ #include <asm/syscall.h> @@ -38,13 +38,6 @@ #include <asm/sigframe.h> -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ - X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ - X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ - X86_EFLAGS_CF) - #ifdef CONFIG_X86_32 # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) #else @@ -69,9 +62,8 @@ regs->seg = GET_SEG(seg) | 3; \ } while (0) -static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, - unsigned long *pax) +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax) { void __user *buf; unsigned int tmpflags; @@ -126,9 +118,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, return err; } -static int -setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, - struct pt_regs *regs, unsigned long mask) +int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, + struct pt_regs *regs, unsigned long mask) { int err = 0; @@ -160,7 +151,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->r15, &sc->r15); #endif /* CONFIG_X86_64 */ - put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.trap_nr, &sc->trapno); put_user_ex(current->thread.error_code, &sc->err); put_user_ex(regs->ip, &sc->ip); #ifdef CONFIG_X86_32 @@ -643,6 +634,16 @@ static int signr_convert(int sig) #define is_ia32 0 #endif /* CONFIG_IA32_EMULATION */ +#ifdef CONFIG_X86_X32_ABI +#define is_x32 test_thread_flag(TIF_X32) + +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs); +#else /* !CONFIG_X86_X32_ABI */ +#define is_x32 0 +#endif /* CONFIG_X86_X32_ABI */ + int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs); int ia32_setup_frame(int sig, struct k_sigaction *ka, @@ -667,8 +668,14 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else ret = ia32_setup_frame(usig, ka, set, regs); - } else +#ifdef CONFIG_X86_X32_ABI + } else if (is_x32) { + ret = x32_setup_rt_frame(usig, ka, info, + (compat_sigset_t *)set, regs); +#endif + } else { ret = __setup_rt_frame(sig, ka, info, set, regs); + } if (ret) { force_sigsegv(sig, current); @@ -851,3 +858,102 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) force_sig(SIGSEGV, me); } + +#ifdef CONFIG_X86_X32_ABI +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs) +{ + struct rt_sigframe_x32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, info)) + return -EFAULT; + } + + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + put_user_ex(0, &frame->uc.uc__pad0); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* could use a vstub here */ + restorer = NULL; + err |= -EFAULT; + } + put_user_ex(restorer, &frame->pretcode); + } put_user_catch(err); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* We use the x32 calling convention here... */ + regs->di = sig; + regs->si = (unsigned long) &frame->info; + regs->dx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + regs->cs = __USER_CS; + regs->ss = __USER_DS; + + return 0; +} + +asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe_x32 __user *frame; + sigset_t set; + unsigned long ax; + struct pt_regs tregs; + + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) + goto badframe; + + tregs = *regs; + if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) + goto badframe; + + return ax; + +badframe: + signal_fault(regs, frame, "x32 rt_sigreturn"); + return 0; +} +#endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5104a2b685cf..6e1e406038c2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -50,6 +50,7 @@ #include <linux/tboot.h> #include <linux/stackprotector.h> #include <linux/gfp.h> +#include <linux/cpuidle.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -219,14 +220,9 @@ static void __cpuinit smp_callin(void) * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. - * - * Need to enable IRQs because it can take longer and then - * the NMI watchdog might kill us. */ - local_irq_enable(); calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; - local_irq_disable(); pr_debug("Stack at about %p\n", &cpuid); /* @@ -1409,7 +1405,8 @@ void native_play_dead(void) tboot_shutdown(TB_SHUTDOWN_WFS); mwait_play_dead(); /* Only returns on failure */ - hlt_play_dead(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index ef59642ff1bf..b4d3c3927dd8 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -98,7 +98,7 @@ out: static void find_start_end(unsigned long flags, unsigned long *begin, unsigned long *end) { - if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { + if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) { unsigned long new_begin; /* This is usually used needed to map code in small model, so it needs to be in the first 31bit. Limit @@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) + if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32)) && len <= mm->cached_hole_size) { mm->cached_hole_size = 0; mm->free_area_cache = begin; @@ -205,7 +205,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; /* for MAP_32BIT mappings we force the legact mmap base */ - if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) + if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) goto bottomup; /* requesting a specific address */ diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 7ac7943be02c..5c7f8c20da74 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -5,6 +5,14 @@ #include <linux/cache.h> #include <asm/asm-offsets.h> +#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) + +#ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat) +#else +# define __SYSCALL_X32(nr, sym, compat) /* nothing */ +#endif + #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; #include <asm/syscalls_64.h> #undef __SYSCALL_64 diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e2410e27f97e..6410744ac5cb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) offsetof(struct acpi_table_facs, firmware_waking_vector); } -void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) +static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) { static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { /* S0,1,2: */ -1, -1, -1, @@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) /* S5: */ TB_SHUTDOWN_S5 }; if (!tboot_enabled()) - return; + return 0; tboot_copy_fadt(&acpi_gbl_FADT); tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; @@ -292,10 +292,11 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) if (sleep_state >= ACPI_S_STATE_COUNT || acpi_shutdown_map[sleep_state] == -1) { pr_warning("unsupported sleep state 0x%x\n", sleep_state); - return; + return -1; } tboot_shutdown(acpi_shutdown_map[sleep_state]); + return 0; } static atomic_t ap_wfs_count; @@ -345,6 +346,8 @@ static __init int tboot_late_init(void) atomic_set(&ap_wfs_count, 0); register_hotcpu_notifier(&tboot_cpu_notifier); + + acpi_os_set_prepare_sleep(&tboot_sleep); return 0; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 73920e4c6dc5..9d9d2f9e77a5 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -162,7 +162,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset, { const struct desc_struct *tls; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; @@ -197,7 +197,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 860f126ca233..ff9281f16029 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -119,7 +119,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < 6) + if (trapnr < X86_TRAP_UD) goto vm86_trap; goto trap_signal; } @@ -132,7 +132,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, trap_signal: #endif /* - * We want error_code and trap_no set for userspace faults and + * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not * kernelspace faults which are fixed up. die() gives the * process no chance to handle the signal and notice the @@ -141,7 +141,7 @@ trap_signal: * delivered, faults. See also do_general_protection below. */ tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; + tsk->thread.trap_nr = trapnr; #ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && @@ -164,7 +164,7 @@ trap_signal: kernel_trap: if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; + tsk->thread.trap_nr = trapnr; die(str, regs, error_code); } return; @@ -203,27 +203,31 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } -DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_ERROR(4, SIGSEGV, "overflow", overflow) -DO_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, + regs->ip) +DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, + regs->ip) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", + coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, + BUS_ADRALN, 0) #ifdef CONFIG_X86_64 /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - 12, SIGBUS) == NOTIFY_STOP) + X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) return; preempt_conditional_sti(regs); - do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); preempt_conditional_cli(regs); } @@ -233,10 +237,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) struct task_struct *tsk = current; /* Return not checked because double check cannot be ignored */ - notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); + notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; - tsk->thread.trap_no = 8; + tsk->thread.trap_nr = X86_TRAP_DF; /* * This is always a kernel trap and never fixable (and thus must @@ -264,7 +268,7 @@ do_general_protection(struct pt_regs *regs, long error_code) goto gp_in_kernel; tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; + tsk->thread.trap_nr = X86_TRAP_GP; if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { @@ -291,9 +295,9 @@ gp_in_kernel: return; tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) return; die("general protection fault", regs, error_code); } @@ -302,13 +306,13 @@ gp_in_kernel: dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP - if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) + if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, + SIGTRAP) == NOTIFY_STOP) return; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) + if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, + SIGTRAP) == NOTIFY_STOP) return; /* @@ -317,7 +321,7 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) */ debug_stack_usage_inc(); preempt_conditional_sti(regs); - do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); } @@ -422,8 +426,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_sti(regs); if (regs->flags & X86_VM_MASK) { - handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, 1); + handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, + X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); return; @@ -460,7 +464,8 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) struct task_struct *task = current; siginfo_t info; unsigned short err; - char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; + char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : + "simd exception"; if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) return; @@ -470,7 +475,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) { if (!fixup_exception(regs)) { task->thread.error_code = error_code; - task->thread.trap_no = trapnr; + task->thread.trap_nr = trapnr; die(str, regs, error_code); } return; @@ -480,12 +485,12 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) * Save the info for the exception handler and clear the error. */ save_init_fpu(task); - task->thread.trap_no = trapnr; + task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; info.si_addr = (void __user *)regs->ip; - if (trapnr == 16) { + if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* * (~cwd & swd) will mask out exceptions that are not set to unmasked @@ -529,10 +534,11 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) info.si_code = FPE_FLTRES; } else { /* - * If we're using IRQ 13, or supposedly even some trap 16 - * implementations, it's possible we get a spurious trap... + * If we're using IRQ 13, or supposedly even some trap + * X86_TRAP_MF implementations, it's possible + * we get a spurious trap, which is not an error. */ - return; /* Spurious trap, no error */ + return; } force_sig_info(SIGFPE, &info, task); } @@ -543,13 +549,13 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) ignore_fpu_irq = 1; #endif - math_error(regs, error_code, 16); + math_error(regs, error_code, X86_TRAP_MF); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - math_error(regs, error_code, 19); + math_error(regs, error_code, X86_TRAP_XF); } dotraplinkage void @@ -643,20 +649,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_errno = 0; info.si_code = ILL_BADSTK; info.si_addr = NULL; - if (notify_die(DIE_TRAP, "iret exception", - regs, error_code, 32, SIGILL) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, "iret exception", regs, error_code, + X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) return; - do_trap(32, SIGILL, "iret exception", regs, error_code, &info); + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); } #endif /* Set of traps needed for early debugging. */ void __init early_trap_init(void) { - set_intr_gate_ist(1, &debug, DEBUG_STACK); + set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); /* int3 can be called from all */ - set_system_intr_gate_ist(3, &int3, DEBUG_STACK); - set_intr_gate(14, &page_fault); + set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + set_intr_gate(X86_TRAP_PF, &page_fault); load_idt(&idt_descr); } @@ -672,30 +679,30 @@ void __init trap_init(void) early_iounmap(p, 4); #endif - set_intr_gate(0, ÷_error); - set_intr_gate_ist(2, &nmi, NMI_STACK); + set_intr_gate(X86_TRAP_DE, ÷_error); + set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); /* int4 can be called from all */ - set_system_intr_gate(4, &overflow); - set_intr_gate(5, &bounds); - set_intr_gate(6, &invalid_op); - set_intr_gate(7, &device_not_available); + set_system_intr_gate(X86_TRAP_OF, &overflow); + set_intr_gate(X86_TRAP_BR, &bounds); + set_intr_gate(X86_TRAP_UD, &invalid_op); + set_intr_gate(X86_TRAP_NM, &device_not_available); #ifdef CONFIG_X86_32 - set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); + set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); #else - set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); + set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); #endif - set_intr_gate(9, &coprocessor_segment_overrun); - set_intr_gate(10, &invalid_TSS); - set_intr_gate(11, &segment_not_present); - set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); - set_intr_gate(13, &general_protection); - set_intr_gate(15, &spurious_interrupt_bug); - set_intr_gate(16, &coprocessor_error); - set_intr_gate(17, &alignment_check); + set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun); + set_intr_gate(X86_TRAP_TS, &invalid_TSS); + set_intr_gate(X86_TRAP_NP, &segment_not_present); + set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); + set_intr_gate(X86_TRAP_GP, &general_protection); + set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug); + set_intr_gate(X86_TRAP_MF, &coprocessor_error); + set_intr_gate(X86_TRAP_AC, &alignment_check); #ifdef CONFIG_X86_MCE - set_intr_gate_ist(18, &machine_check, MCE_STACK); + set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); #endif - set_intr_gate(19, &simd_coprocessor_error); + set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error); /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) @@ -720,7 +727,7 @@ void __init trap_init(void) #ifdef CONFIG_X86_64 memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); - set_nmi_gate(1, &debug); - set_nmi_gate(3, &int3); + set_nmi_gate(X86_TRAP_DB, &debug); + set_nmi_gate(X86_TRAP_BP, &int3); #endif } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 899a03f2d181..fc0a147e3727 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -933,6 +933,16 @@ static int __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } + + /* + * Trust the results of the earlier calibration on systems + * exporting a reliable TSC. + */ + if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { + clocksource_register_khz(&clocksource_tsc, tsc_khz); + return 0; + } + schedule_delayed_work(&tsc_irqwork, 0); return 0; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 328cb37bb827..255f58ae71e8 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -569,7 +569,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) } if (trapno != 1) return 1; /* we let this handle by the calling routine */ - current->thread.trap_no = trapno; + current->thread.trap_nr = trapno; current->thread.error_code = error_code; force_sig(SIGTRAP, current); return 0; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index b07ba9393564..f386dc49f988 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -52,10 +52,7 @@ #include "vsyscall_trace.h" DEFINE_VVAR(int, vgetcpu_mode); -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = -{ - .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), -}; +DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; @@ -80,20 +77,15 @@ early_param("vsyscall", vsyscall_setup); void update_vsyscall_tz(void) { - unsigned long flags; - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); - /* sys_tz has changed */ vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { - unsigned long flags; + struct timespec monotonic; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_seqcount_begin(&vsyscall_gtod_data.seq); /* copy vsyscall data */ vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; @@ -101,12 +93,19 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.shift = clock->shift; + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = *wtm; + + monotonic = timespec_add(*wall_time, *wtm); + vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; + vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); + vsyscall_gtod_data.monotonic_time_coarse = + timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_seqcount_end(&vsyscall_gtod_data.seq); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, @@ -153,7 +152,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->error_code = 6; /* user fault, no page, write */ thread->cr2 = ptr; - thread->trap_no = 14; + thread->trap_nr = X86_TRAP_PF; memset(&info, 0, sizeof(info)); info.si_signo = SIGSEGV; diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 7718541541d4..9b868124128d 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -28,6 +28,7 @@ #include <linux/regset.h> #include <asm/uaccess.h> +#include <asm/traps.h> #include <asm/desc.h> #include <asm/user.h> #include <asm/i387.h> @@ -269,7 +270,7 @@ void math_emulate(struct math_emu_info *info) FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ RE_ENTRANT_CHECK_OFF; - current->thread.trap_no = 16; + current->thread.trap_nr = X86_TRAP_MF; current->thread.error_code = 0; send_sig(SIGFPE, current, 1); return; @@ -662,7 +663,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, void math_abort(struct math_emu_info *info, unsigned int signal) { FPU_EIP = FPU_ORIG_EIP; - current->thread.trap_no = 16; + current->thread.trap_nr = X86_TRAP_MF; current->thread.error_code = 0; send_sig(signal, current, 1); RE_ENTRANT_CHECK_OFF; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f0b4caf85c1a..3ecfd1aaf214 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -615,7 +615,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, dump_pagetable(address); tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; + tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code; if (__die("Bad pagetable", regs, error_code)) @@ -636,7 +636,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { if (current_thread_info()->sig_on_uaccess_error && signal) { - tsk->thread.trap_no = 14; + tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | PF_USER; tsk->thread.cr2 = address; @@ -676,7 +676,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; + tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code; sig = SIGKILL; @@ -754,7 +754,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; + tsk->thread.trap_nr = X86_TRAP_PF; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); @@ -838,7 +838,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, tsk->thread.cr2 = address; tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; + tsk->thread.trap_nr = X86_TRAP_PF; #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 1c1c4f46a7c1..efb5b4b93711 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -70,7 +70,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) return; pxm = pa->proximity_domain; apic_id = pa->apic_id; - if (!cpu_has_x2apic && (apic_id >= 0xff)) { + if (!apic->apic_id_valid(apic_id)) { printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", pxm, apic_id); return; diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index bff89dfe3619..d6aa6e8315d1 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -67,7 +67,7 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) { struct stack_frame_ia32 *head; - /* User process is 32-bit */ + /* User process is IA32 */ if (!current || !test_thread_flag(TIF_IA32)) return 0; diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index 7cce722667b8..a4bee53c2e54 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -20,6 +20,8 @@ #include <linux/platform_device.h> #include <linux/of.h> #include <linux/syscore_ops.h> +#include <linux/debugfs.h> +#include <linux/mutex.h> #include <asm/geode.h> #include <asm/setup.h> @@ -31,6 +33,15 @@ EXPORT_SYMBOL_GPL(olpc_platform_info); static DEFINE_SPINLOCK(ec_lock); +/* debugfs interface to EC commands */ +#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */ +#define EC_MAX_CMD_REPLY (8) + +static struct dentry *ec_debugfs_dir; +static DEFINE_MUTEX(ec_debugfs_cmd_lock); +static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY]; +static unsigned int ec_debugfs_resp_bytes; + /* EC event mask to be applied during suspend (defining wakeup sources). */ static u16 ec_wakeup_mask; @@ -269,6 +280,91 @@ int olpc_ec_sci_query(u16 *sci_value) } EXPORT_SYMBOL_GPL(olpc_ec_sci_query); +static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) +{ + int i, m; + unsigned char ec_cmd[EC_MAX_CMD_ARGS]; + unsigned int ec_cmd_int[EC_MAX_CMD_ARGS]; + char cmdbuf[64]; + int ec_cmd_bytes; + + mutex_lock(&ec_debugfs_cmd_lock); + + size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size); + + m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0], + &ec_debugfs_resp_bytes, + &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3], + &ec_cmd_int[4], &ec_cmd_int[5]); + if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) { + /* reset to prevent overflow on read */ + ec_debugfs_resp_bytes = 0; + + printk(KERN_DEBUG "olpc-ec: bad ec cmd: " + "cmd:response-count [arg1 [arg2 ...]]\n"); + size = -EINVAL; + goto out; + } + + /* convert scanf'd ints to char */ + ec_cmd_bytes = m - 2; + for (i = 0; i <= ec_cmd_bytes; i++) + ec_cmd[i] = ec_cmd_int[i]; + + printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args " + "%02x %02x %02x %02x %02x, want %d returns\n", + ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3], + ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes); + + olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1], + ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes); + + printk(KERN_DEBUG "olpc-ec: response " + "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n", + ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2], + ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5], + ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes); + +out: + mutex_unlock(&ec_debugfs_cmd_lock); + return size; +} + +static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + unsigned int i, r; + char *rp; + char respbuf[64]; + + mutex_lock(&ec_debugfs_cmd_lock); + rp = respbuf; + rp += sprintf(rp, "%02x", ec_debugfs_resp[0]); + for (i = 1; i < ec_debugfs_resp_bytes; i++) + rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]); + mutex_unlock(&ec_debugfs_cmd_lock); + rp += sprintf(rp, "\n"); + + r = rp - respbuf; + return simple_read_from_buffer(buf, size, ppos, respbuf, r); +} + +static const struct file_operations ec_debugfs_genops = { + .write = ec_debugfs_cmd_write, + .read = ec_debugfs_cmd_read, +}; + +static void setup_debugfs(void) +{ + ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0); + if (ec_debugfs_dir == ERR_PTR(-ENODEV)) + return; + + debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL, + &ec_debugfs_genops); +} + static int olpc_ec_suspend(void) { return olpc_ec_mask_write(ec_wakeup_mask); @@ -372,6 +468,7 @@ static int __init olpc_init(void) } register_syscore_ops(&olpc_syscore_ops); + setup_debugfs(); return 0; } diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile index 564b2476fede..3236aebc828d 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/syscalls/Makefile @@ -10,8 +10,10 @@ syshdr := $(srctree)/$(src)/syscallhdr.sh systbl := $(srctree)/$(src)/syscalltbl.sh quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' $< $@ \ - $(syshdr_abi_$(basetarget)) $(syshdr_pfx_$(basetarget)) + cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \ + '$(syshdr_abi_$(basetarget))' \ + '$(syshdr_pfx_$(basetarget))' \ + '$(syshdr_offset_$(basetarget))' quiet_cmd_systbl = SYSTBL $@ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ @@ -24,18 +26,28 @@ syshdr_pfx_unistd_32_ia32 := ia32_ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) $(call if_changed,syshdr) -syshdr_abi_unistd_64 := 64 +syshdr_abi_unistd_x32 := common,x32 +syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT +$(out)/unistd_x32.h: $(syscall64) $(syshdr) + $(call if_changed,syshdr) + +syshdr_abi_unistd_64 := common,64 $(out)/unistd_64.h: $(syscall64) $(syshdr) $(call if_changed,syshdr) +syshdr_abi_unistd_64_x32 := x32 +syshdr_pfx_unistd_64_x32 := x32_ +$(out)/unistd_64_x32.h: $(syscall64) $(syshdr) + $(call if_changed,syshdr) + $(out)/syscalls_32.h: $(syscall32) $(systbl) $(call if_changed,systbl) $(out)/syscalls_64.h: $(syscall64) $(systbl) $(call if_changed,systbl) -syshdr-y += unistd_32.h unistd_64.h +syshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h -syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h +syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h targets += $(syshdr-y) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index e7e67cc3c14b..29f9f0554f7d 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -181,7 +181,7 @@ 172 i386 prctl sys_prctl 173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction -175 i386 rt_sigprocmask sys_rt_sigprocmask sys32_rt_sigprocmask +175 i386 rt_sigprocmask sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index b440a8f7eefa..dd29a9ea27c5 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -4,317 +4,350 @@ # The format is: # <number> <abi> <name> <entry point> # -# The abi is always "64" for this file (for now.) +# The abi is "common", "64" or "x32" for this file. # -0 64 read sys_read -1 64 write sys_write -2 64 open sys_open -3 64 close sys_close -4 64 stat sys_newstat -5 64 fstat sys_newfstat -6 64 lstat sys_newlstat -7 64 poll sys_poll -8 64 lseek sys_lseek -9 64 mmap sys_mmap -10 64 mprotect sys_mprotect -11 64 munmap sys_munmap -12 64 brk sys_brk +0 common read sys_read +1 common write sys_write +2 common open sys_open +3 common close sys_close +4 common stat sys_newstat +5 common fstat sys_newfstat +6 common lstat sys_newlstat +7 common poll sys_poll +8 common lseek sys_lseek +9 common mmap sys_mmap +10 common mprotect sys_mprotect +11 common munmap sys_munmap +12 common brk sys_brk 13 64 rt_sigaction sys_rt_sigaction -14 64 rt_sigprocmask sys_rt_sigprocmask +14 common rt_sigprocmask sys_rt_sigprocmask 15 64 rt_sigreturn stub_rt_sigreturn 16 64 ioctl sys_ioctl -17 64 pread64 sys_pread64 -18 64 pwrite64 sys_pwrite64 +17 common pread64 sys_pread64 +18 common pwrite64 sys_pwrite64 19 64 readv sys_readv 20 64 writev sys_writev -21 64 access sys_access -22 64 pipe sys_pipe -23 64 select sys_select -24 64 sched_yield sys_sched_yield -25 64 mremap sys_mremap -26 64 msync sys_msync -27 64 mincore sys_mincore -28 64 madvise sys_madvise -29 64 shmget sys_shmget -30 64 shmat sys_shmat -31 64 shmctl sys_shmctl -32 64 dup sys_dup -33 64 dup2 sys_dup2 -34 64 pause sys_pause -35 64 nanosleep sys_nanosleep -36 64 getitimer sys_getitimer -37 64 alarm sys_alarm -38 64 setitimer sys_setitimer -39 64 getpid sys_getpid -40 64 sendfile sys_sendfile64 -41 64 socket sys_socket -42 64 connect sys_connect -43 64 accept sys_accept -44 64 sendto sys_sendto +21 common access sys_access +22 common pipe sys_pipe +23 common select sys_select +24 common sched_yield sys_sched_yield +25 common mremap sys_mremap +26 common msync sys_msync +27 common mincore sys_mincore +28 common madvise sys_madvise +29 common shmget sys_shmget +30 common shmat sys_shmat +31 common shmctl sys_shmctl +32 common dup sys_dup +33 common dup2 sys_dup2 +34 common pause sys_pause +35 common nanosleep sys_nanosleep +36 common getitimer sys_getitimer +37 common alarm sys_alarm +38 common setitimer sys_setitimer +39 common getpid sys_getpid +40 common sendfile sys_sendfile64 +41 common socket sys_socket +42 common connect sys_connect +43 common accept sys_accept +44 common sendto sys_sendto 45 64 recvfrom sys_recvfrom 46 64 sendmsg sys_sendmsg 47 64 recvmsg sys_recvmsg -48 64 shutdown sys_shutdown -49 64 bind sys_bind -50 64 listen sys_listen -51 64 getsockname sys_getsockname -52 64 getpeername sys_getpeername -53 64 socketpair sys_socketpair -54 64 setsockopt sys_setsockopt -55 64 getsockopt sys_getsockopt -56 64 clone stub_clone -57 64 fork stub_fork -58 64 vfork stub_vfork +48 common shutdown sys_shutdown +49 common bind sys_bind +50 common listen sys_listen +51 common getsockname sys_getsockname +52 common getpeername sys_getpeername +53 common socketpair sys_socketpair +54 common setsockopt sys_setsockopt +55 common getsockopt sys_getsockopt +56 common clone stub_clone +57 common fork stub_fork +58 common vfork stub_vfork 59 64 execve stub_execve -60 64 exit sys_exit -61 64 wait4 sys_wait4 -62 64 kill sys_kill -63 64 uname sys_newuname -64 64 semget sys_semget -65 64 semop sys_semop -66 64 semctl sys_semctl -67 64 shmdt sys_shmdt -68 64 msgget sys_msgget -69 64 msgsnd sys_msgsnd -70 64 msgrcv sys_msgrcv -71 64 msgctl sys_msgctl -72 64 fcntl sys_fcntl -73 64 flock sys_flock -74 64 fsync sys_fsync -75 64 fdatasync sys_fdatasync -76 64 truncate sys_truncate -77 64 ftruncate sys_ftruncate -78 64 getdents sys_getdents -79 64 getcwd sys_getcwd -80 64 chdir sys_chdir -81 64 fchdir sys_fchdir -82 64 rename sys_rename -83 64 mkdir sys_mkdir -84 64 rmdir sys_rmdir -85 64 creat sys_creat -86 64 link sys_link -87 64 unlink sys_unlink -88 64 symlink sys_symlink -89 64 readlink sys_readlink -90 64 chmod sys_chmod -91 64 fchmod sys_fchmod -92 64 chown sys_chown -93 64 fchown sys_fchown -94 64 lchown sys_lchown -95 64 umask sys_umask -96 64 gettimeofday sys_gettimeofday -97 64 getrlimit sys_getrlimit -98 64 getrusage sys_getrusage -99 64 sysinfo sys_sysinfo -100 64 times sys_times +60 common exit sys_exit +61 common wait4 sys_wait4 +62 common kill sys_kill +63 common uname sys_newuname +64 common semget sys_semget +65 common semop sys_semop +66 common semctl sys_semctl +67 common shmdt sys_shmdt +68 common msgget sys_msgget +69 common msgsnd sys_msgsnd +70 common msgrcv sys_msgrcv +71 common msgctl sys_msgctl +72 common fcntl sys_fcntl +73 common flock sys_flock +74 common fsync sys_fsync +75 common fdatasync sys_fdatasync +76 common truncate sys_truncate +77 common ftruncate sys_ftruncate +78 common getdents sys_getdents +79 common getcwd sys_getcwd +80 common chdir sys_chdir +81 common fchdir sys_fchdir +82 common rename sys_rename +83 common mkdir sys_mkdir +84 common rmdir sys_rmdir +85 common creat sys_creat +86 common link sys_link +87 common unlink sys_unlink +88 common symlink sys_symlink +89 common readlink sys_readlink +90 common chmod sys_chmod +91 common fchmod sys_fchmod +92 common chown sys_chown +93 common fchown sys_fchown +94 common lchown sys_lchown +95 common umask sys_umask +96 common gettimeofday sys_gettimeofday +97 common getrlimit sys_getrlimit +98 common getrusage sys_getrusage +99 common sysinfo sys_sysinfo +100 common times sys_times 101 64 ptrace sys_ptrace -102 64 getuid sys_getuid -103 64 syslog sys_syslog -104 64 getgid sys_getgid -105 64 setuid sys_setuid -106 64 setgid sys_setgid -107 64 geteuid sys_geteuid -108 64 getegid sys_getegid -109 64 setpgid sys_setpgid -110 64 getppid sys_getppid -111 64 getpgrp sys_getpgrp -112 64 setsid sys_setsid -113 64 setreuid sys_setreuid -114 64 setregid sys_setregid -115 64 getgroups sys_getgroups -116 64 setgroups sys_setgroups -117 64 setresuid sys_setresuid -118 64 getresuid sys_getresuid -119 64 setresgid sys_setresgid -120 64 getresgid sys_getresgid -121 64 getpgid sys_getpgid -122 64 setfsuid sys_setfsuid -123 64 setfsgid sys_setfsgid -124 64 getsid sys_getsid -125 64 capget sys_capget -126 64 capset sys_capset +102 common getuid sys_getuid +103 common syslog sys_syslog +104 common getgid sys_getgid +105 common setuid sys_setuid +106 common setgid sys_setgid +107 common geteuid sys_geteuid +108 common getegid sys_getegid +109 common setpgid sys_setpgid +110 common getppid sys_getppid +111 common getpgrp sys_getpgrp +112 common setsid sys_setsid +113 common setreuid sys_setreuid +114 common setregid sys_setregid +115 common getgroups sys_getgroups +116 common setgroups sys_setgroups +117 common setresuid sys_setresuid +118 common getresuid sys_getresuid +119 common setresgid sys_setresgid +120 common getresgid sys_getresgid +121 common getpgid sys_getpgid +122 common setfsuid sys_setfsuid +123 common setfsgid sys_setfsgid +124 common getsid sys_getsid +125 common capget sys_capget +126 common capset sys_capset 127 64 rt_sigpending sys_rt_sigpending 128 64 rt_sigtimedwait sys_rt_sigtimedwait 129 64 rt_sigqueueinfo sys_rt_sigqueueinfo -130 64 rt_sigsuspend sys_rt_sigsuspend +130 common rt_sigsuspend sys_rt_sigsuspend 131 64 sigaltstack stub_sigaltstack -132 64 utime sys_utime -133 64 mknod sys_mknod +132 common utime sys_utime +133 common mknod sys_mknod 134 64 uselib -135 64 personality sys_personality -136 64 ustat sys_ustat -137 64 statfs sys_statfs -138 64 fstatfs sys_fstatfs -139 64 sysfs sys_sysfs -140 64 getpriority sys_getpriority -141 64 setpriority sys_setpriority -142 64 sched_setparam sys_sched_setparam -143 64 sched_getparam sys_sched_getparam -144 64 sched_setscheduler sys_sched_setscheduler -145 64 sched_getscheduler sys_sched_getscheduler -146 64 sched_get_priority_max sys_sched_get_priority_max -147 64 sched_get_priority_min sys_sched_get_priority_min -148 64 sched_rr_get_interval sys_sched_rr_get_interval -149 64 mlock sys_mlock -150 64 munlock sys_munlock -151 64 mlockall sys_mlockall -152 64 munlockall sys_munlockall -153 64 vhangup sys_vhangup -154 64 modify_ldt sys_modify_ldt -155 64 pivot_root sys_pivot_root +135 common personality sys_personality +136 common ustat sys_ustat +137 common statfs sys_statfs +138 common fstatfs sys_fstatfs +139 common sysfs sys_sysfs +140 common getpriority sys_getpriority +141 common setpriority sys_setpriority +142 common sched_setparam sys_sched_setparam +143 common sched_getparam sys_sched_getparam +144 common sched_setscheduler sys_sched_setscheduler +145 common sched_getscheduler sys_sched_getscheduler +146 common sched_get_priority_max sys_sched_get_priority_max +147 common sched_get_priority_min sys_sched_get_priority_min +148 common sched_rr_get_interval sys_sched_rr_get_interval +149 common mlock sys_mlock +150 common munlock sys_munlock +151 common mlockall sys_mlockall +152 common munlockall sys_munlockall +153 common vhangup sys_vhangup +154 common modify_ldt sys_modify_ldt +155 common pivot_root sys_pivot_root 156 64 _sysctl sys_sysctl -157 64 prctl sys_prctl -158 64 arch_prctl sys_arch_prctl -159 64 adjtimex sys_adjtimex -160 64 setrlimit sys_setrlimit -161 64 chroot sys_chroot -162 64 sync sys_sync -163 64 acct sys_acct -164 64 settimeofday sys_settimeofday -165 64 mount sys_mount -166 64 umount2 sys_umount -167 64 swapon sys_swapon -168 64 swapoff sys_swapoff -169 64 reboot sys_reboot -170 64 sethostname sys_sethostname -171 64 setdomainname sys_setdomainname -172 64 iopl stub_iopl -173 64 ioperm sys_ioperm +157 common prctl sys_prctl +158 common arch_prctl sys_arch_prctl +159 common adjtimex sys_adjtimex +160 common setrlimit sys_setrlimit +161 common chroot sys_chroot +162 common sync sys_sync +163 common acct sys_acct +164 common settimeofday sys_settimeofday +165 common mount sys_mount +166 common umount2 sys_umount +167 common swapon sys_swapon +168 common swapoff sys_swapoff +169 common reboot sys_reboot +170 common sethostname sys_sethostname +171 common setdomainname sys_setdomainname +172 common iopl stub_iopl +173 common ioperm sys_ioperm 174 64 create_module -175 64 init_module sys_init_module -176 64 delete_module sys_delete_module +175 common init_module sys_init_module +176 common delete_module sys_delete_module 177 64 get_kernel_syms 178 64 query_module -179 64 quotactl sys_quotactl +179 common quotactl sys_quotactl 180 64 nfsservctl -181 64 getpmsg -182 64 putpmsg -183 64 afs_syscall -184 64 tuxcall -185 64 security -186 64 gettid sys_gettid -187 64 readahead sys_readahead -188 64 setxattr sys_setxattr -189 64 lsetxattr sys_lsetxattr -190 64 fsetxattr sys_fsetxattr -191 64 getxattr sys_getxattr -192 64 lgetxattr sys_lgetxattr -193 64 fgetxattr sys_fgetxattr -194 64 listxattr sys_listxattr -195 64 llistxattr sys_llistxattr -196 64 flistxattr sys_flistxattr -197 64 removexattr sys_removexattr -198 64 lremovexattr sys_lremovexattr -199 64 fremovexattr sys_fremovexattr -200 64 tkill sys_tkill -201 64 time sys_time -202 64 futex sys_futex -203 64 sched_setaffinity sys_sched_setaffinity -204 64 sched_getaffinity sys_sched_getaffinity +181 common getpmsg +182 common putpmsg +183 common afs_syscall +184 common tuxcall +185 common security +186 common gettid sys_gettid +187 common readahead sys_readahead +188 common setxattr sys_setxattr +189 common lsetxattr sys_lsetxattr +190 common fsetxattr sys_fsetxattr +191 common getxattr sys_getxattr +192 common lgetxattr sys_lgetxattr +193 common fgetxattr sys_fgetxattr +194 common listxattr sys_listxattr +195 common llistxattr sys_llistxattr +196 common flistxattr sys_flistxattr +197 common removexattr sys_removexattr +198 common lremovexattr sys_lremovexattr +199 common fremovexattr sys_fremovexattr +200 common tkill sys_tkill +201 common time sys_time +202 common futex sys_futex +203 common sched_setaffinity sys_sched_setaffinity +204 common sched_getaffinity sys_sched_getaffinity 205 64 set_thread_area -206 64 io_setup sys_io_setup -207 64 io_destroy sys_io_destroy -208 64 io_getevents sys_io_getevents -209 64 io_submit sys_io_submit -210 64 io_cancel sys_io_cancel +206 common io_setup sys_io_setup +207 common io_destroy sys_io_destroy +208 common io_getevents sys_io_getevents +209 common io_submit sys_io_submit +210 common io_cancel sys_io_cancel 211 64 get_thread_area -212 64 lookup_dcookie sys_lookup_dcookie -213 64 epoll_create sys_epoll_create +212 common lookup_dcookie sys_lookup_dcookie +213 common epoll_create sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -216 64 remap_file_pages sys_remap_file_pages -217 64 getdents64 sys_getdents64 -218 64 set_tid_address sys_set_tid_address -219 64 restart_syscall sys_restart_syscall -220 64 semtimedop sys_semtimedop -221 64 fadvise64 sys_fadvise64 +216 common remap_file_pages sys_remap_file_pages +217 common getdents64 sys_getdents64 +218 common set_tid_address sys_set_tid_address +219 common restart_syscall sys_restart_syscall +220 common semtimedop sys_semtimedop +221 common fadvise64 sys_fadvise64 222 64 timer_create sys_timer_create -223 64 timer_settime sys_timer_settime -224 64 timer_gettime sys_timer_gettime -225 64 timer_getoverrun sys_timer_getoverrun -226 64 timer_delete sys_timer_delete -227 64 clock_settime sys_clock_settime -228 64 clock_gettime sys_clock_gettime -229 64 clock_getres sys_clock_getres -230 64 clock_nanosleep sys_clock_nanosleep -231 64 exit_group sys_exit_group -232 64 epoll_wait sys_epoll_wait -233 64 epoll_ctl sys_epoll_ctl -234 64 tgkill sys_tgkill -235 64 utimes sys_utimes +223 common timer_settime sys_timer_settime +224 common timer_gettime sys_timer_gettime +225 common timer_getoverrun sys_timer_getoverrun +226 common timer_delete sys_timer_delete +227 common clock_settime sys_clock_settime +228 common clock_gettime sys_clock_gettime +229 common clock_getres sys_clock_getres +230 common clock_nanosleep sys_clock_nanosleep +231 common exit_group sys_exit_group +232 common epoll_wait sys_epoll_wait +233 common epoll_ctl sys_epoll_ctl +234 common tgkill sys_tgkill +235 common utimes sys_utimes 236 64 vserver -237 64 mbind sys_mbind -238 64 set_mempolicy sys_set_mempolicy -239 64 get_mempolicy sys_get_mempolicy -240 64 mq_open sys_mq_open -241 64 mq_unlink sys_mq_unlink -242 64 mq_timedsend sys_mq_timedsend -243 64 mq_timedreceive sys_mq_timedreceive +237 common mbind sys_mbind +238 common set_mempolicy sys_set_mempolicy +239 common get_mempolicy sys_get_mempolicy +240 common mq_open sys_mq_open +241 common mq_unlink sys_mq_unlink +242 common mq_timedsend sys_mq_timedsend +243 common mq_timedreceive sys_mq_timedreceive 244 64 mq_notify sys_mq_notify -245 64 mq_getsetattr sys_mq_getsetattr +245 common mq_getsetattr sys_mq_getsetattr 246 64 kexec_load sys_kexec_load 247 64 waitid sys_waitid -248 64 add_key sys_add_key -249 64 request_key sys_request_key -250 64 keyctl sys_keyctl -251 64 ioprio_set sys_ioprio_set -252 64 ioprio_get sys_ioprio_get -253 64 inotify_init sys_inotify_init -254 64 inotify_add_watch sys_inotify_add_watch -255 64 inotify_rm_watch sys_inotify_rm_watch -256 64 migrate_pages sys_migrate_pages -257 64 openat sys_openat -258 64 mkdirat sys_mkdirat -259 64 mknodat sys_mknodat -260 64 fchownat sys_fchownat -261 64 futimesat sys_futimesat -262 64 newfstatat sys_newfstatat -263 64 unlinkat sys_unlinkat -264 64 renameat sys_renameat -265 64 linkat sys_linkat -266 64 symlinkat sys_symlinkat -267 64 readlinkat sys_readlinkat -268 64 fchmodat sys_fchmodat -269 64 faccessat sys_faccessat -270 64 pselect6 sys_pselect6 -271 64 ppoll sys_ppoll -272 64 unshare sys_unshare +248 common add_key sys_add_key +249 common request_key sys_request_key +250 common keyctl sys_keyctl +251 common ioprio_set sys_ioprio_set +252 common ioprio_get sys_ioprio_get +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare 273 64 set_robust_list sys_set_robust_list 274 64 get_robust_list sys_get_robust_list -275 64 splice sys_splice -276 64 tee sys_tee -277 64 sync_file_range sys_sync_file_range +275 common splice sys_splice +276 common tee sys_tee +277 common sync_file_range sys_sync_file_range 278 64 vmsplice sys_vmsplice 279 64 move_pages sys_move_pages -280 64 utimensat sys_utimensat -281 64 epoll_pwait sys_epoll_pwait -282 64 signalfd sys_signalfd -283 64 timerfd_create sys_timerfd_create -284 64 eventfd sys_eventfd -285 64 fallocate sys_fallocate -286 64 timerfd_settime sys_timerfd_settime -287 64 timerfd_gettime sys_timerfd_gettime -288 64 accept4 sys_accept4 -289 64 signalfd4 sys_signalfd4 -290 64 eventfd2 sys_eventfd2 -291 64 epoll_create1 sys_epoll_create1 -292 64 dup3 sys_dup3 -293 64 pipe2 sys_pipe2 -294 64 inotify_init1 sys_inotify_init1 +280 common utimensat sys_utimensat +281 common epoll_pwait sys_epoll_pwait +282 common signalfd sys_signalfd +283 common timerfd_create sys_timerfd_create +284 common eventfd sys_eventfd +285 common fallocate sys_fallocate +286 common timerfd_settime sys_timerfd_settime +287 common timerfd_gettime sys_timerfd_gettime +288 common accept4 sys_accept4 +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 295 64 preadv sys_preadv 296 64 pwritev sys_pwritev 297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo -298 64 perf_event_open sys_perf_event_open +298 common perf_event_open sys_perf_event_open 299 64 recvmmsg sys_recvmmsg -300 64 fanotify_init sys_fanotify_init -301 64 fanotify_mark sys_fanotify_mark -302 64 prlimit64 sys_prlimit64 -303 64 name_to_handle_at sys_name_to_handle_at -304 64 open_by_handle_at sys_open_by_handle_at -305 64 clock_adjtime sys_clock_adjtime -306 64 syncfs sys_syncfs +300 common fanotify_init sys_fanotify_init +301 common fanotify_mark sys_fanotify_mark +302 common prlimit64 sys_prlimit64 +303 common name_to_handle_at sys_name_to_handle_at +304 common open_by_handle_at sys_open_by_handle_at +305 common clock_adjtime sys_clock_adjtime +306 common syncfs sys_syncfs 307 64 sendmmsg sys_sendmmsg -308 64 setns sys_setns -309 64 getcpu sys_getcpu +308 common setns sys_setns +309 common getcpu sys_getcpu 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev +# +# x32-specific system call numbers start at 512 to avoid cache impact +# for native 64-bit operation. +# +512 x32 rt_sigaction sys32_rt_sigaction +513 x32 rt_sigreturn stub_x32_rt_sigreturn +514 x32 ioctl compat_sys_ioctl +515 x32 readv compat_sys_readv +516 x32 writev compat_sys_writev +517 x32 recvfrom compat_sys_recvfrom +518 x32 sendmsg compat_sys_sendmsg +519 x32 recvmsg compat_sys_recvmsg +520 x32 execve stub_x32_execve +521 x32 ptrace compat_sys_ptrace +522 x32 rt_sigpending sys32_rt_sigpending +523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo +525 x32 sigaltstack stub_x32_sigaltstack +526 x32 timer_create compat_sys_timer_create +527 x32 mq_notify compat_sys_mq_notify +528 x32 kexec_load compat_sys_kexec_load +529 x32 waitid compat_sys_waitid +530 x32 set_robust_list compat_sys_set_robust_list +531 x32 get_robust_list compat_sys_get_robust_list +532 x32 vmsplice compat_sys_vmsplice +533 x32 move_pages compat_sys_move_pages +534 x32 preadv compat_sys_preadv64 +535 x32 pwritev compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg compat_sys_recvmmsg +538 x32 sendmmsg compat_sys_sendmmsg +539 x32 process_vm_readv compat_sys_process_vm_readv +540 x32 process_vm_writev compat_sys_process_vm_writev diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index fe626c3ba01b..9924776f4265 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -35,6 +35,9 @@ #define stub_sigaltstack sys_sigaltstack #define stub_rt_sigreturn sys_rt_sigreturn +#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) +#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ + #define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; #include <asm/syscalls_64.h> diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 5edf4f4bbf53..ce7e3607a870 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -15,6 +15,8 @@ static char syscalls[] = { }; #else #define __SYSCALL_64(nr, sym, compat) [nr] = 1, +#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, +#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ static char syscalls[] = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore index 60274d5746e1..3282874bc61d 100644 --- a/arch/x86/vdso/.gitignore +++ b/arch/x86/vdso/.gitignore @@ -1,5 +1,7 @@ vdso.lds vdso-syms.lds +vdsox32.lds +vdsox32-syms.lds vdso32-syms.lds vdso32-syscall-syms.lds vdso32-sysenter-syms.lds diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5d179502a52c..fd14be1d1472 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -3,21 +3,29 @@ # VDSO64-$(CONFIG_X86_64) := y +VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y vdso-install-$(VDSO64-y) += vdso.so +vdso-install-$(VDSOX32-y) += vdsox32.so vdso-install-$(VDSO32-y) += $(vdso32-images) # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o +vobjs-$(VDSOX32-y) += $(vobjx32s-compat) + +# Filter out x32 objects. +vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) + # files to link into kernel obj-$(VDSO64-y) += vma.o vdso.o +obj-$(VDSOX32-y) += vdsox32.o obj-$(VDSO32-y) += vdso32.o vdso32-setup.o -vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) +vobjs := $(foreach F,$(vobj64s),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so @@ -73,6 +81,42 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE $(call if_changed,vdsosym) # +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +targets += vdsox32-syms.lds +obj-$(VDSOX32-y) += vdsox32-syms.lds + +CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ + -Wl,-soname=linux-vdso.so.1 \ + -Wl,-z,max-page-size=4096 \ + -Wl,-z,common-page-size=4096 + +vobjx32s-y := $(vobj64s:.o=-x32.o) +vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y) + +$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so + +$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE + $(call if_changed,vdso) + +# # Build multiple 32-bit vDSO images to choose from at boot time. # obj-$(VDSO32-y) += vdso32-syms.lds diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6bc0e723b6e8..885eff49d6ab 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + return ret; +} + + notrace static inline long vgetns(void) { long v; cycles_t cycles; if (gtod->clock.vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); - else + else if (gtod->clock.vclock_mode == VCLOCK_HPET) cycles = vread_hpet(); + else + return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; return (v * gtod->clock.mult) >> gtod->clock.shift; } -notrace static noinline int do_realtime(struct timespec *ts) +/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ +notrace static int __always_inline do_realtime(struct timespec *ts) { unsigned long seq, ns; + int mode; + do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); + mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + timespec_add_ns(ts, ns); - return 0; + return mode; } -notrace static noinline int do_monotonic(struct timespec *ts) +notrace static int do_monotonic(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq, ns; + int mode; + do { - seq = read_seqbegin(>od->lock); - secs = gtod->wall_time_sec; - ns = gtod->wall_time_nsec + vgetns(); - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); - - /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec - * are all guaranteed to be nonnegative. - */ - while (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; + seq = read_seqcount_begin(>od->seq); + mode = gtod->clock.vclock_mode; + ts->tv_sec = gtod->monotonic_time_sec; + ts->tv_nsec = gtod->monotonic_time_nsec; + ns = vgetns(); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + timespec_add_ns(ts, ns); - return 0; + return mode; } -notrace static noinline int do_realtime_coarse(struct timespec *ts) +notrace static int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); return 0; } -notrace static noinline int do_monotonic_coarse(struct timespec *ts) +notrace static int do_monotonic_coarse(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq; do { - seq = read_seqbegin(>od->lock); - secs = gtod->wall_time_coarse.tv_sec; - ns = gtod->wall_time_coarse.tv_nsec; - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); - - /* wall_time_nsec and wall_to_monotonic.tv_nsec are - * guaranteed to be between 0 and NSEC_PER_SEC. - */ - if (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; + seq = read_seqcount_begin(>od->seq); + ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; + ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; + } while (unlikely(read_seqcount_retry(>od->seq, seq))); return 0; } notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { + int ret = VCLOCK_NONE; + switch (clock) { case CLOCK_REALTIME: - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) - return do_realtime(ts); + ret = do_realtime(ts); break; case CLOCK_MONOTONIC: - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) - return do_monotonic(ts); + ret = do_monotonic(ts); break; case CLOCK_REALTIME_COARSE: return do_realtime_coarse(ts); @@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) return do_monotonic_coarse(ts); } - return vdso_fallback_gettime(clock, ts); + if (ret == VCLOCK_NONE) + return vdso_fallback_gettime(clock, ts); + return 0; } int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { - long ret; - if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { - if (likely(tv != NULL)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); - do_realtime((struct timespec *)tv); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; - tz->tz_dsttime = gtod->sys_tz.tz_dsttime; - } - return 0; + long ret = VCLOCK_NONE; + + if (likely(tv != NULL)) { + BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != + offsetof(struct timespec, tv_nsec) || + sizeof(*tv) != sizeof(struct timespec)); + ret = do_realtime((struct timespec *)tv); + tv->tv_usec /= 1000; } - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); - return ret; + if (unlikely(tz != NULL)) { + /* Avoid memcpy. Some old compilers fail to inline it */ + tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; + tz->tz_dsttime = gtod->sys_tz.tz_dsttime; + } + + if (ret == VCLOCK_NONE) + return vdso_fallback_gtod(tv, tz); + return 0; } int gettimeofday(struct timeval *, struct timezone *) __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index a944020fa859..66e6d9359826 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -311,6 +311,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) int ret = 0; bool compat; +#ifdef CONFIG_X86_X32_ABI + if (test_thread_flag(TIF_X32)) + return x32_setup_additional_pages(bprm, uses_interp); +#endif + if (vdso_enabled == VDSO_DISABLED) return 0; diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S new file mode 100644 index 000000000000..d6b9a7f42a8a --- /dev/null +++ b/arch/x86/vdso/vdsox32.S @@ -0,0 +1,22 @@ +#include <asm/page_types.h> +#include <linux/linkage.h> +#include <linux/init.h> + +__PAGE_ALIGNED_DATA + + .globl vdsox32_start, vdsox32_end + .align PAGE_SIZE +vdsox32_start: + .incbin "arch/x86/vdso/vdsox32.so" +vdsox32_end: + .align PAGE_SIZE /* extra data here leaks to userspace. */ + +.previous + + .globl vdsox32_pages + .bss + .align 8 + .type vdsox32_pages, @object +vdsox32_pages: + .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 + .size vdsox32_pages, .-vdsox32_pages diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S new file mode 100644 index 000000000000..62272aa2ae0a --- /dev/null +++ b/arch/x86/vdso/vdsox32.lds.S @@ -0,0 +1,28 @@ +/* + * Linker script for x32 vDSO. + * We #include the file to define the layout details. + * Here we only choose the prelinked virtual address. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. We can define local symbols here called VDSO* to make their + * values visible using the asm-x86/vdso.h macros from the kernel proper. + */ + +#define VDSO_PRELINK 0 +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_getcpu; + __vdso_time; + local: *; + }; +} + +VDSOX32_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 17e18279649f..00aaf047b39f 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -24,7 +24,44 @@ extern unsigned short vdso_sync_cpuid; extern struct page *vdso_pages[]; static unsigned vdso_size; -static void __init patch_vdso(void *vdso, size_t len) +#ifdef CONFIG_X86_X32_ABI +extern char vdsox32_start[], vdsox32_end[]; +extern struct page *vdsox32_pages[]; +static unsigned vdsox32_size; + +static void __init patch_vdsox32(void *vdso, size_t len) +{ + Elf32_Ehdr *hdr = vdso; + Elf32_Shdr *sechdrs, *alt_sec = 0; + char *secstrings; + void *alt_data; + int i; + + BUG_ON(len < sizeof(Elf32_Ehdr)); + BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); + + sechdrs = (void *)hdr + hdr->e_shoff; + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (i = 1; i < hdr->e_shnum; i++) { + Elf32_Shdr *shdr = &sechdrs[i]; + if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { + alt_sec = shdr; + goto found; + } + } + + /* If we get here, it's probably a bug. */ + pr_warning("patch_vdsox32: .altinstructions not found\n"); + return; /* nothing to patch */ + +found: + alt_data = (void *)hdr + alt_sec->sh_offset; + apply_alternatives(alt_data, alt_data + alt_sec->sh_size); +} +#endif + +static void __init patch_vdso64(void *vdso, size_t len) { Elf64_Ehdr *hdr = vdso; Elf64_Shdr *sechdrs, *alt_sec = 0; @@ -47,7 +84,7 @@ static void __init patch_vdso(void *vdso, size_t len) } /* If we get here, it's probably a bug. */ - pr_warning("patch_vdso: .altinstructions not found\n"); + pr_warning("patch_vdso64: .altinstructions not found\n"); return; /* nothing to patch */ found: @@ -60,12 +97,20 @@ static int __init init_vdso(void) int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; int i; - patch_vdso(vdso_start, vdso_end - vdso_start); + patch_vdso64(vdso_start, vdso_end - vdso_start); vdso_size = npages << PAGE_SHIFT; for (i = 0; i < npages; i++) vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); +#ifdef CONFIG_X86_X32_ABI + patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); + npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; + vdsox32_size = npages << PAGE_SHIFT; + for (i = 0; i < npages; i++) + vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE); +#endif + return 0; } subsys_initcall(init_vdso); @@ -103,7 +148,10 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) /* Setup a VMA at program startup for the vsyscall page. Not called for compat tasks */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int setup_additional_pages(struct linux_binprm *bprm, + int uses_interp, + struct page **pages, + unsigned size) { struct mm_struct *mm = current->mm; unsigned long addr; @@ -113,8 +161,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return 0; down_write(&mm->mmap_sem); - addr = vdso_addr(mm->start_stack, vdso_size); - addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); + addr = vdso_addr(mm->start_stack, size); + addr = get_unmapped_area(NULL, addr, size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -122,10 +170,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) current->mm->context.vdso = (void *)addr; - ret = install_special_mapping(mm, addr, vdso_size, + ret = install_special_mapping(mm, addr, size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pages); + pages); if (ret) { current->mm->context.vdso = NULL; goto up_fail; @@ -136,6 +184,20 @@ up_fail: return ret; } +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + return setup_additional_pages(bprm, uses_interp, vdso_pages, + vdso_size); +} + +#ifdef CONFIG_X86_X32_ABI +int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + return setup_additional_pages(bprm, uses_interp, vdsox32_pages, + vdsox32_size); +} +#endif + static __init int vdso_setup(char *s) { vdso_enabled = simple_strtoul(s, NULL, 0); |