From ffecee4f2442bb8cb6b34c3335fef4eb50c22fdd Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Sat, 8 Oct 2016 11:18:07 +0200
Subject: iov_iter: kernel-doc import_iovec() and rw_copy_check_uvector()

Both import_iovec() and rw_copy_check_uvector() take an array
(typically small and on-stack) which is used to hold an iovec array copy
from userspace. This is to avoid an expensive memory allocation in the
fast path (i.e. few iovec elements).

The caller may have to check whether these functions actually used
the provided buffer or allocated a new one -- but this differs between
the too. Let's just add a kernel doc to clarify what the semantics are
for each function.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index 66215a7b17cf..190e0d362581 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -730,6 +730,35 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.1


From 655042cc1406fcec20aa7ffd7d790ada18ac5211 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 14 Oct 2016 03:03:36 +0200
Subject: overlayfs: Fix setting IOP_XATTR flag

ovl_fill_super calls ovl_new_inode to create a root inode for the new
superblock before initializing sb->s_xattr.  This wrongly causes
IOP_XATTR to be cleared in i_opflags of the new inode, causing SELinux
to log the following message:

  SELinux: (dev overlay, type overlay) has no xattr support

Fix this by initializing sb->s_xattr and similar fields before calling
ovl_new_inode.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/overlayfs/super.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7e3f0127fc1a..0ffc8da1aa3f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1292,6 +1292,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!oe)
 		goto out_put_cred;
 
+	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+	sb->s_op = &ovl_super_operations;
+	sb->s_xattr = ovl_xattr_handlers;
+	sb->s_fs_info = ufs;
+	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+
 	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR));
 	if (!root_dentry)
 		goto out_free_oe;
@@ -1315,12 +1321,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
 	ovl_copyattr(realinode, d_inode(root_dentry));
 
-	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-	sb->s_op = &ovl_super_operations;
-	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_root = root_dentry;
-	sb->s_fs_info = ufs;
-	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
 
 	return 0;
 
-- 
cgit v1.2.1


From 89f39af129382a40d7cd1f6914617282cfeee28e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 26 Sep 2016 18:07:48 +0200
Subject: fs/super.c: fix race between freeze_super() and thaw_super()

Change thaw_super() to check frozen != SB_FREEZE_COMPLETE rather than
frozen == SB_UNFROZEN, otherwise it can race with freeze_super() which
drops sb->s_umount after SB_FREEZE_WRITE to preserve the lock ordering.

In this case thaw_super() will wrongly call s_op->unfreeze_fs() before
it was actually frozen, and call sb_freeze_unlock() which leads to the
unbalanced percpu_up_write(). Unfortunately lockdep can't detect this,
so this triggers misc BUG_ON()'s in kernel/rcu/sync.c.

Reported-and-tested-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index c2ff475c1711..47d11e0462d0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1379,8 +1379,8 @@ int freeze_super(struct super_block *sb)
 		}
 	}
 	/*
-	 * This is just for debugging purposes so that fs can warn if it
-	 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+	 * For debugging purposes so that fs can warn if it sees write activity
+	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 	up_write(&sb->s_umount);
@@ -1399,7 +1399,7 @@ int thaw_super(struct super_block *sb)
 	int error;
 
 	down_write(&sb->s_umount);
-	if (sb->s_writers.frozen == SB_UNFROZEN) {
+	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
 		up_write(&sb->s_umount);
 		return -EINVAL;
 	}
-- 
cgit v1.2.1


From f1a9622037cd370460fd06bb7e28d0f01ceb8ef1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 26 Sep 2016 18:55:25 +0200
Subject: fs/super.c: don't fool lockdep in freeze_super() and thaw_super()
 paths

sb_wait_write()->percpu_rwsem_release() fools lockdep to avoid the
false-positives. Now that xfs was fixed by Dave's commit dbad7c993053
("xfs: stop holding ILOCK over filldir callbacks") we can remove it and
change freeze_super() and thaw_super() to run with s_writers.rw_sem locks
held; we add two trivial helpers for that, lockdep_sb_freeze_release()
and lockdep_sb_freeze_acquire().

xfstests-dev/check `grep -il freeze tests/*/???` does not trigger any
warning from lockdep.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index 47d11e0462d0..c183835566c1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1269,25 +1269,34 @@ EXPORT_SYMBOL(__sb_start_write);
 static void sb_wait_write(struct super_block *sb, int level)
 {
 	percpu_down_write(sb->s_writers.rw_sem + level-1);
-	/*
-	 * We are going to return to userspace and forget about this lock, the
-	 * ownership goes to the caller of thaw_super() which does unlock.
-	 *
-	 * FIXME: we should do this before return from freeze_super() after we
-	 * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
-	 * should re-acquire these locks before s_op->unfreeze_fs(sb). However
-	 * this leads to lockdep false-positives, so currently we do the early
-	 * release right after acquire.
-	 */
-	percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
 }
 
-static void sb_freeze_unlock(struct super_block *sb)
+/*
+ * We are going to return to userspace and forget about these locks, the
+ * ownership goes to the caller of thaw_super() which does unlock().
+ */
+static void lockdep_sb_freeze_release(struct super_block *sb)
+{
+	int level;
+
+	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+		percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+/*
+ * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
+ */
+static void lockdep_sb_freeze_acquire(struct super_block *sb)
 {
 	int level;
 
 	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
 		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+static void sb_freeze_unlock(struct super_block *sb)
+{
+	int level;
 
 	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
 		percpu_up_write(sb->s_writers.rw_sem + level);
@@ -1383,6 +1392,7 @@ int freeze_super(struct super_block *sb)
 	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
 	return 0;
 }
@@ -1409,11 +1419,14 @@ int thaw_super(struct super_block *sb)
 		goto out;
 	}
 
+	lockdep_sb_freeze_acquire(sb);
+
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
 		if (error) {
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
+			lockdep_sb_freeze_release(sb);
 			up_write(&sb->s_umount);
 			return error;
 		}
-- 
cgit v1.2.1