From ffecee4f2442bb8cb6b34c3335fef4eb50c22fdd Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 8 Oct 2016 11:18:07 +0200 Subject: iov_iter: kernel-doc import_iovec() and rw_copy_check_uvector() Both import_iovec() and rw_copy_check_uvector() take an array (typically small and on-stack) which is used to hold an iovec array copy from userspace. This is to avoid an expensive memory allocation in the fast path (i.e. few iovec elements). The caller may have to check whether these functions actually used the provided buffer or allocated a new one -- but this differs between the too. Let's just add a kernel doc to clarify what the semantics are for each function. Signed-off-by: Vegard Nossum Signed-off-by: Al Viro --- fs/read_write.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 66215a7b17cf..190e0d362581 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -730,6 +730,35 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) +/** + * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace + * into the kernel and check that it is valid. + * + * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE. + * @uvector: Pointer to the userspace array. + * @nr_segs: Number of elements in userspace array. + * @fast_segs: Number of elements in @fast_pointer. + * @fast_pointer: Pointer to (usually small on-stack) kernel array. + * @ret_pointer: (output parameter) Pointer to a variable that will point to + * either @fast_pointer, a newly allocated kernel array, or NULL, + * depending on which array was used. + * + * This function copies an array of &struct iovec of @nr_segs from + * userspace into the kernel and checks that each element is valid (e.g. + * it does not point to a kernel address or cause overflow by being too + * large, etc.). + * + * As an optimization, the caller may provide a pointer to a small + * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long + * (the size of this array, or 0 if unused, should be given in @fast_segs). + * + * @ret_pointer will always point to the array that was used, so the + * caller must take care not to call kfree() on it e.g. in case the + * @fast_pointer array was used and it was allocated on the stack. + * + * Return: The total number of bytes covered by the iovec array on success + * or a negative error code on error. + */ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, -- cgit v1.2.1 From 655042cc1406fcec20aa7ffd7d790ada18ac5211 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 14 Oct 2016 03:03:36 +0200 Subject: overlayfs: Fix setting IOP_XATTR flag ovl_fill_super calls ovl_new_inode to create a root inode for the new superblock before initializing sb->s_xattr. This wrongly causes IOP_XATTR to be cleared in i_opflags of the new inode, causing SELinux to log the following message: SELinux: (dev overlay, type overlay) has no xattr support Fix this by initializing sb->s_xattr and similar fields before calling ovl_new_inode. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/overlayfs/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7e3f0127fc1a..0ffc8da1aa3f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1292,6 +1292,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!oe) goto out_put_cred; + sb->s_magic = OVERLAYFS_SUPER_MAGIC; + sb->s_op = &ovl_super_operations; + sb->s_xattr = ovl_xattr_handlers; + sb->s_fs_info = ufs; + sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; + root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR)); if (!root_dentry) goto out_free_oe; @@ -1315,12 +1321,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry); ovl_copyattr(realinode, d_inode(root_dentry)); - sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_op = &ovl_super_operations; - sb->s_xattr = ovl_xattr_handlers; sb->s_root = root_dentry; - sb->s_fs_info = ufs; - sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; return 0; -- cgit v1.2.1 From 89f39af129382a40d7cd1f6914617282cfeee28e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 Sep 2016 18:07:48 +0200 Subject: fs/super.c: fix race between freeze_super() and thaw_super() Change thaw_super() to check frozen != SB_FREEZE_COMPLETE rather than frozen == SB_UNFROZEN, otherwise it can race with freeze_super() which drops sb->s_umount after SB_FREEZE_WRITE to preserve the lock ordering. In this case thaw_super() will wrongly call s_op->unfreeze_fs() before it was actually frozen, and call sb_freeze_unlock() which leads to the unbalanced percpu_up_write(). Unfortunately lockdep can't detect this, so this triggers misc BUG_ON()'s in kernel/rcu/sync.c. Reported-and-tested-by: Nikolay Borisov Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index c2ff475c1711..47d11e0462d0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1379,8 +1379,8 @@ int freeze_super(struct super_block *sb) } } /* - * This is just for debugging purposes so that fs can warn if it - * sees write activity when frozen is set to SB_FREEZE_COMPLETE. + * For debugging purposes so that fs can warn if it sees write activity + * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). */ sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); @@ -1399,7 +1399,7 @@ int thaw_super(struct super_block *sb) int error; down_write(&sb->s_umount); - if (sb->s_writers.frozen == SB_UNFROZEN) { + if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { up_write(&sb->s_umount); return -EINVAL; } -- cgit v1.2.1 From f1a9622037cd370460fd06bb7e28d0f01ceb8ef1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 Sep 2016 18:55:25 +0200 Subject: fs/super.c: don't fool lockdep in freeze_super() and thaw_super() paths sb_wait_write()->percpu_rwsem_release() fools lockdep to avoid the false-positives. Now that xfs was fixed by Dave's commit dbad7c993053 ("xfs: stop holding ILOCK over filldir callbacks") we can remove it and change freeze_super() and thaw_super() to run with s_writers.rw_sem locks held; we add two trivial helpers for that, lockdep_sb_freeze_release() and lockdep_sb_freeze_acquire(). xfstests-dev/check `grep -il freeze tests/*/???` does not trigger any warning from lockdep. Signed-off-by: Oleg Nesterov Signed-off-by: Al Viro --- fs/super.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 47d11e0462d0..c183835566c1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1269,25 +1269,34 @@ EXPORT_SYMBOL(__sb_start_write); static void sb_wait_write(struct super_block *sb, int level) { percpu_down_write(sb->s_writers.rw_sem + level-1); - /* - * We are going to return to userspace and forget about this lock, the - * ownership goes to the caller of thaw_super() which does unlock. - * - * FIXME: we should do this before return from freeze_super() after we - * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super() - * should re-acquire these locks before s_op->unfreeze_fs(sb). However - * this leads to lockdep false-positives, so currently we do the early - * release right after acquire. - */ - percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_); } -static void sb_freeze_unlock(struct super_block *sb) +/* + * We are going to return to userspace and forget about these locks, the + * ownership goes to the caller of thaw_super() which does unlock(). + */ +static void lockdep_sb_freeze_release(struct super_block *sb) +{ + int level; + + for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_); +} + +/* + * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb). + */ +static void lockdep_sb_freeze_acquire(struct super_block *sb) { int level; for (level = 0; level < SB_FREEZE_LEVELS; ++level) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); +} + +static void sb_freeze_unlock(struct super_block *sb) +{ + int level; for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); @@ -1383,6 +1392,7 @@ int freeze_super(struct super_block *sb) * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super(). */ sb->s_writers.frozen = SB_FREEZE_COMPLETE; + lockdep_sb_freeze_release(sb); up_write(&sb->s_umount); return 0; } @@ -1409,11 +1419,14 @@ int thaw_super(struct super_block *sb) goto out; } + lockdep_sb_freeze_acquire(sb); + if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); if (error) { printk(KERN_ERR "VFS:Filesystem thaw failed\n"); + lockdep_sb_freeze_release(sb); up_write(&sb->s_umount); return error; } -- cgit v1.2.1