From a8d03c3f300eefff3b5c14798409e4b43e37dd9b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 3 Jul 2019 13:34:02 -0700 Subject: x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask() I'm working on some code that detects at build time if there's a COMPAT_SYSCALL_DEFINE() that is not referenced in the x86 syscall tables. It catches three offenders: rt_sigsuspend(), rt_sigprocmask(), and sendfile64(). For rt_sigsuspend() and rt_sigprocmask(), the only potential difference between the native and compat versions is that the compat version converts the sigset_t, but, on little endian architectures, the conversion is a no-op. This is why they both currently work on x86. To make the code more consistent, and to make the upcoming patches work, rewire x86 to use the compat vesions. sendfile64() is more complicated, and will be addressed separately. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/51643ac3157b5921eae0e172a8a0b1d953e68ebb.1562185330.git.luto@kernel.org --- arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c00019abd076..3fe02546aed3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -186,11 +186,11 @@ 172 i386 prctl sys_prctl __ia32_sys_prctl 173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction -175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask +175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo -179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend +179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread 181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite 182 i386 chown sys_chown16 __ia32_sys_chown16 -- cgit v1.2.1 From f85a8573ceb225e606fcf38a9320782316f47c71 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 3 Jul 2019 13:34:03 -0700 Subject: x86/syscalls: Disallow compat entries for all types of 64-bit syscalls A "compat" entry in the syscall tables means to use a different entry on 32-bit and 64-bit builds. This only makes sense for syscalls that exist in the first place in 32-bit builds, so disallow it for anything other than i386. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/4b7565954c5a06530ac01d98cb1592538fd8ae51.1562185330.git.luto@kernel.org --- arch/x86/entry/syscalls/syscalltbl.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 94fcd1951aca..53c8c1a9adf9 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -27,8 +27,8 @@ emit() { compat="$4" umlentry="" - if [ "$abi" = "64" -a -n "$compat" ]; then - echo "a compat entry for a 64-bit syscall makes no sense" >&2 + if [ "$abi" != "I386" -a -n "$compat" ]; then + echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2 exit 1 fi -- cgit v1.2.1 From 6365b842aae4490ebfafadfc6bb27a6d3cc54757 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 3 Jul 2019 13:34:04 -0700 Subject: x86/syscalls: Split the x32 syscalls into their own table For unfortunate historical reasons, the x32 syscalls and the x86_64 syscalls are not all numbered the same. As an example, ioctl() is nr 16 on x86_64 but 514 on x32. This has potentially nasty consequences, since it means that there are two valid RAX values to do ioctl(2) and two invalid RAX values. The valid values are 16 (i.e. ioctl(2) using the x86_64 ABI) and (514 | 0x40000000) (i.e. ioctl(2) using the x32 ABI). The invalid values are 514 and (16 | 0x40000000). 514 will enter the "COMPAT_SYSCALL_DEFINE3(ioctl, ...)" entry point with in_compat_syscall() and in_x32_syscall() returning false, whereas (16 | 0x40000000) will enter the native entry point with in_compat_syscall() and in_x32_syscall() returning true. Both are bogus, and both will exercise code paths in the kernel and in any running seccomp filters that really ought to be unreachable. Splitting out the x32 syscalls into their own tables, allows both bogus invocations to return -ENOSYS. I've checked glibc, musl, and Bionic, and all of them appear to call syscalls with their correct numbers, so this change should have no effect on them. There is an added benefit going forward: new syscalls that need special handling on x32 can share the same number on x32 and x86_64. This means that the special syscall range 512-547 can be treated as a legacy wart instead of something that may need to be extended in the future. Also add a selftest to verify the new behavior. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/208024256b764312598f014ebfb0a42472c19354.1562185330.git.luto@kernel.org --- arch/x86/entry/common.c | 13 +++++++------ arch/x86/entry/syscall_64.c | 25 +++++++++++++++++++++++++ arch/x86/entry/syscalls/syscalltbl.sh | 31 +++++++++++++++++-------------- 3 files changed, 49 insertions(+), 20 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 536b574b6161..3f8e22615812 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) nr = syscall_trace_enter(regs); - /* - * NB: Native and x32 syscalls are dispatched from the same - * table. The only functional difference is the x32 bit in - * regs->orig_ax, which changes the behavior of some syscalls. - */ - nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); regs->ax = sys_call_table[nr](regs); +#ifdef CONFIG_X86_X32_ABI + } else if (likely((nr & __X32_SYSCALL_BIT) && + (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { + nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, + X32_NR_syscalls); + regs->ax = x32_sys_call_table[nr](regs); +#endif } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index d5252bc1e380..b1bf31713374 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -10,10 +10,13 @@ /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ extern asmlinkage long sys_ni_syscall(const struct pt_regs *); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); +#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include #undef __SYSCALL_64 +#undef __SYSCALL_X32 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, +#define __SYSCALL_X32(nr, sym, qual) asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* @@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { [0 ... __NR_syscall_max] = &sys_ni_syscall, #include }; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#ifdef CONFIG_X86_X32_ABI + +#define __SYSCALL_64(nr, sym, qual) +#define __SYSCALL_X32(nr, sym, qual) [nr] = sym, + +asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { + /* + * Smells like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, +#include +}; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#endif diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 53c8c1a9adf9..1af2be39e7d9 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -1,13 +1,13 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 in="$1" out="$2" syscall_macro() { - abi="$1" - nr="$2" - entry="$3" + local abi="$1" + local nr="$2" + local entry="$3" # Entry can be either just a function name or "function/qualifier" real_entry="${entry%%/*}" @@ -21,11 +21,11 @@ syscall_macro() { } emit() { - abi="$1" - nr="$2" - entry="$3" - compat="$4" - umlentry="" + local abi="$1" + local nr="$2" + local entry="$3" + local compat="$4" + local umlentry="" if [ "$abi" != "I386" -a -n "$compat" ]; then echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2 @@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | ( while read nr abi name entry compat; do abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then - # COMMON is the same as 64, except that we don't expect X32 - # programs to use it. Our expectation has nothing to do with - # any generated code, so treat them the same. emit 64 "$nr" "$entry" "$compat" + if [ "$abi" = "COMMON" ]; then + # COMMON means that this syscall exists in the same form for + # 64-bit and X32. + echo "#ifdef CONFIG_X86_X32_ABI" + emit X32 "$nr" "$entry" "$compat" + echo "#endif" + fi elif [ "$abi" = "X32" ]; then - # X32 is equivalent to 64 on an X32-compatible kernel. echo "#ifdef CONFIG_X86_X32_ABI" - emit 64 "$nr" "$entry" "$compat" + emit X32 "$nr" "$entry" "$compat" echo "#endif" elif [ "$abi" = "I386" ]; then emit "$abi" "$nr" "$entry" "$compat" -- cgit v1.2.1