summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-23 10:52:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-23 10:52:43 -0800
commita307d0a0074c18bcbea5dec368c9f047be9dade3 (patch)
treea55e2ebfe213a45f58ec492aadd0c32457015638 /fs
parentfc26901b12f1deedc351bbe9fd9a018d61485c57 (diff)
parentfaf0dcebd7b387187f29ff811d47df465ea4c9f9 (diff)
downloadblackbird-op-linux-a307d0a0074c18bcbea5dec368c9f047be9dade3.tar.gz
blackbird-op-linux-a307d0a0074c18bcbea5dec368c9f047be9dade3.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull final vfs updates from Al Viro: "Assorted cleanups and fixes all over the place" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: sg_write()/bsg_write() is not fit to be called under KERNEL_DS ufs: fix function declaration for ufs_truncate_blocks fs: exec: apply CLOEXEC before changing dumpable task flags seq_file: reset iterator to first record for zero offset vfs: fix isize/pos/len checks for reflink & dedupe [iov_iter] fix iterate_all_kinds() on empty iterators move aio compat to fs/aio.c reorganize do_make_slave() clone_private_mount() doesn't need to touch namespace_sem remove a bogus claim about namespace_sem being held by callers of mnt_alloc_id()
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c97
-rw-r--r--fs/compat.c75
-rw-r--r--fs/exec.c10
-rw-r--r--fs/namespace.c8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/pnode.c74
-rw-r--r--fs/read_write.c18
-rw-r--r--fs/seq_file.c7
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/xfs_reflink.c2
10 files changed, 162 insertions, 133 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8edf253484af..8c79e1a53af9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1367,6 +1367,39 @@ out:
return ret;
}
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
+{
+ struct kioctx *ioctx = NULL;
+ unsigned long ctx;
+ long ret;
+
+ ret = get_user(ctx, ctx32p);
+ if (unlikely(ret))
+ goto out;
+
+ ret = -EINVAL;
+ if (unlikely(ctx || nr_events == 0)) {
+ pr_debug("EINVAL: ctx %lu nr_events %u\n",
+ ctx, nr_events);
+ goto out;
+ }
+
+ ioctx = ioctx_alloc(nr_events);
+ ret = PTR_ERR(ioctx);
+ if (!IS_ERR(ioctx)) {
+ /* truncating is ok because it's a user address */
+ ret = put_user((u32)ioctx->user_id, ctx32p);
+ if (ret)
+ kill_ioctx(current->mm, ioctx, NULL);
+ percpu_ref_put(&ioctx->users);
+ }
+
+out:
+ return ret;
+}
+#endif
+
/* sys_io_destroy:
* Destroy the aio_context specified. May cancel any outstanding
* AIOs and block on completion. Will fail with -ENOSYS if not
@@ -1591,8 +1624,8 @@ out_put_req:
return ret;
}
-long do_io_submit(aio_context_t ctx_id, long nr,
- struct iocb __user *__user *iocbpp, bool compat)
+static long do_io_submit(aio_context_t ctx_id, long nr,
+ struct iocb __user *__user *iocbpp, bool compat)
{
struct kioctx *ctx;
long ret = 0;
@@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
return do_io_submit(ctx_id, nr, iocbpp, 0);
}
+#ifdef CONFIG_COMPAT
+static inline long
+copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+{
+ compat_uptr_t uptr;
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ if (get_user(uptr, ptr32 + i))
+ return -EFAULT;
+ if (put_user(compat_ptr(uptr), ptr64 + i))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
+
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+ int, nr, u32 __user *, iocb)
+{
+ struct iocb __user * __user *iocb64;
+ long ret;
+
+ if (unlikely(nr < 0))
+ return -EINVAL;
+
+ if (nr > MAX_AIO_SUBMITS)
+ nr = MAX_AIO_SUBMITS;
+
+ iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
+ ret = copy_iocb(nr, iocb, iocb64);
+ if (!ret)
+ ret = do_io_submit(ctx_id, nr, iocb64, 1);
+ return ret;
+}
+#endif
+
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
@@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
}
return ret;
}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct compat_timespec __user *, timeout)
+{
+ struct timespec t;
+ struct timespec __user *ut = NULL;
+
+ if (timeout) {
+ if (compat_get_timespec(&t, timeout))
+ return -EFAULT;
+
+ ut = compat_alloc_user_space(sizeof(*ut));
+ if (copy_to_user(ut, &t, sizeof(t)))
+ return -EFAULT;
+ }
+ return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+}
+#endif
diff --git a/fs/compat.c b/fs/compat.c
index 543b48c29ac3..3f4908c28698 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
return compat_sys_fcntl64(fd, cmd, arg);
}
-COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
-{
- long ret;
- aio_context_t ctx64;
-
- mm_segment_t oldfs = get_fs();
- if (unlikely(get_user(ctx64, ctx32p)))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- /* The __user pointer cast is valid because of the set_fs() */
- ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64);
- set_fs(oldfs);
- /* truncating is ok because it's a user address */
- if (!ret)
- ret = put_user((u32) ctx64, ctx32p);
- return ret;
-}
-
-COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
- compat_long_t, min_nr,
- compat_long_t, nr,
- struct io_event __user *, events,
- struct compat_timespec __user *, timeout)
-{
- struct timespec t;
- struct timespec __user *ut = NULL;
-
- if (timeout) {
- if (compat_get_timespec(&t, timeout))
- return -EFAULT;
-
- ut = compat_alloc_user_space(sizeof(*ut));
- if (copy_to_user(ut, &t, sizeof(t)) )
- return -EFAULT;
- }
- return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
-}
-
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
@@ -602,42 +563,6 @@ out:
return ret;
}
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
- compat_uptr_t uptr;
- int i;
-
- for (i = 0; i < nr; ++i) {
- if (get_user(uptr, ptr32 + i))
- return -EFAULT;
- if (put_user(compat_ptr(uptr), ptr64 + i))
- return -EFAULT;
- }
- return 0;
-}
-
-#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
-
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
- int, nr, u32 __user *, iocb)
-{
- struct iocb __user * __user *iocb64;
- long ret;
-
- if (unlikely(nr < 0))
- return -EINVAL;
-
- if (nr > MAX_AIO_SUBMITS)
- nr = MAX_AIO_SUBMITS;
-
- iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
- ret = copy_iocb(nr, iocb, iocb64);
- if (!ret)
- ret = do_io_submit(ctx_id, nr, iocb64, 1);
- return ret;
-}
-
struct compat_ncp_mount_data {
compat_int_t version;
compat_uint_t ncp_fd;
diff --git a/fs/exec.c b/fs/exec.c
index 8112eacf10f3..eadbf5069c38 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
* current->executable is only used by the procfs. This allows a dispatch
* table to check for several different types of binary formats. We keep
* trying until we recognize the file or we run out of supported binary
- * formats.
+ * formats.
*/
#include <linux/slab.h>
@@ -1268,6 +1268,13 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_thread();
current->personality &= ~bprm->per_clear;
+ /*
+ * We have to apply CLOEXEC before we change whether the process is
+ * dumpable (in setup_new_exec) to avoid a race with a process in userspace
+ * trying to access the should-be-closed file descriptors of a process
+ * undergoing exec(2).
+ */
+ do_close_on_exec(current->files);
return 0;
out:
@@ -1330,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm)
group */
current->self_exec_id++;
flush_signal_handlers(current, 0);
- do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
diff --git a/fs/namespace.c b/fs/namespace.c
index f7e28f8ea04d..b5b1259e064f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -96,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
return &mountpoint_hashtable[tmp & mp_hash_mask];
}
-/*
- * allocation is serialized by namespace_sem, but we need the spinlock to
- * serialize with freeing.
- */
static int mnt_alloc_id(struct mount *mnt)
{
int res;
@@ -1034,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (IS_MNT_SLAVE(old))
list_add(&mnt->mnt_slave, &old->mnt_slave);
mnt->mnt_master = old->mnt_master;
+ } else {
+ CLEAR_MNT_SHARED(mnt);
}
if (flag & CL_MAKE_SHARED)
set_mnt_shared(mnt);
@@ -1828,9 +1826,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL);
- down_read(&namespace_sem);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
- up_read(&namespace_sem);
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d171d2c53f7f..f8933cb53d68 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4834,7 +4834,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
- if (ret || len == 0)
+ if (ret <= 0)
goto out_unlock;
/* Lock out changes to the allocation maps and remap. */
diff --git a/fs/pnode.c b/fs/pnode.c
index 234a9ac49958..06a793f4ae38 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -67,49 +67,47 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
static int do_make_slave(struct mount *mnt)
{
- struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
- struct mount *slave_mnt;
+ struct mount *master, *slave_mnt;
- /*
- * slave 'mnt' to a peer mount that has the
- * same root dentry. If none is available then
- * slave it to anything that is available.
- */
- while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
- peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
-
- if (peer_mnt == mnt) {
- peer_mnt = next_peer(mnt);
- if (peer_mnt == mnt)
- peer_mnt = NULL;
- }
- if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
- list_empty(&mnt->mnt_share))
- mnt_release_group_id(mnt);
-
- list_del_init(&mnt->mnt_share);
- mnt->mnt_group_id = 0;
-
- if (peer_mnt)
- master = peer_mnt;
-
- if (master) {
- list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
- slave_mnt->mnt_master = master;
- list_move(&mnt->mnt_slave, &master->mnt_slave_list);
- list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
+ if (list_empty(&mnt->mnt_share)) {
+ if (IS_MNT_SHARED(mnt)) {
+ mnt_release_group_id(mnt);
+ CLEAR_MNT_SHARED(mnt);
+ }
+ master = mnt->mnt_master;
+ if (!master) {
+ struct list_head *p = &mnt->mnt_slave_list;
+ while (!list_empty(p)) {
+ slave_mnt = list_first_entry(p,
+ struct mount, mnt_slave);
+ list_del_init(&slave_mnt->mnt_slave);
+ slave_mnt->mnt_master = NULL;
+ }
+ return 0;
+ }
} else {
- struct list_head *p = &mnt->mnt_slave_list;
- while (!list_empty(p)) {
- slave_mnt = list_first_entry(p,
- struct mount, mnt_slave);
- list_del_init(&slave_mnt->mnt_slave);
- slave_mnt->mnt_master = NULL;
+ struct mount *m;
+ /*
+ * slave 'mnt' to a peer mount that has the
+ * same root dentry. If none is available then
+ * slave it to anything that is available.
+ */
+ for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
+ if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
+ master = m;
+ break;
+ }
}
+ list_del_init(&mnt->mnt_share);
+ mnt->mnt_group_id = 0;
+ CLEAR_MNT_SHARED(mnt);
}
+ list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
+ slave_mnt->mnt_master = master;
+ list_move(&mnt->mnt_slave, &master->mnt_slave_list);
+ list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
+ INIT_LIST_HEAD(&mnt->mnt_slave_list);
mnt->mnt_master = master;
- CLEAR_MNT_SHARED(mnt);
return 0;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index da6de12b5c46..7537b6b6b5a2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1669,6 +1669,9 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
* Check that the two inodes are eligible for cloning, the ranges make
* sense, and then flush all dirty data. Caller must ensure that the
* inodes have been locked against any other modifications.
+ *
+ * Returns: 0 for "nothing to clone", 1 for "something to clone", or
+ * the usual negative error code.
*/
int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
struct inode *inode_out, loff_t pos_out,
@@ -1695,17 +1698,15 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
/* Are we going all the way to the end? */
isize = i_size_read(inode_in);
- if (isize == 0) {
- *len = 0;
+ if (isize == 0)
return 0;
- }
/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (*len == 0) {
- if (is_dedupe) {
- *len = 0;
+ if (is_dedupe || pos_in == isize)
return 0;
- }
+ if (pos_in > isize)
+ return -EINVAL;
*len = isize - pos_in;
}
@@ -1769,7 +1770,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
return -EBADE;
}
- return 0;
+ return 1;
}
EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
@@ -1955,6 +1956,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
goto out;
ret = 0;
+ if (off + len > i_size_read(src))
+ return -EINVAL;
+
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 368bfb92b115..a11f271800ef 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -190,6 +190,13 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
*/
m->version = file->f_version;
+ /*
+ * if request is to read from zero offset, reset iterator to first
+ * record as it might have been already advanced by previous requests
+ */
+ if (*ppos == 0)
+ m->index = 0;
+
/* Don't assume *ppos is where we left it */
if (unlikely(*ppos != m->read_pos)) {
while ((err = traverse(m, *ppos)) == -EAGAIN)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 45ceb94e89e4..1bc0bd6a9848 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1191,7 +1191,7 @@ out:
return err;
}
-void ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
{
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index aca2d4bd4303..07593a362cd0 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1161,7 +1161,7 @@ xfs_reflink_remap_range(
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
- if (ret || len == 0)
+ if (ret <= 0)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
OpenPOWER on IntegriCloud