diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 670 |
1 files changed, 497 insertions, 173 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 8a415c9c5e55..9263995bf6a1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/percpu.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/acct.h> @@ -139,7 +138,65 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } -struct vfsmount *alloc_vfsmnt(const char *name) +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = 0; + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + +static struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { @@ -155,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + this_cpu_add(mnt->mnt_pcp->mnt_count, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -167,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; -#endif } return mnt; @@ -217,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt) } EXPORT_SYMBOL_GPL(__mnt_is_readonly); -static inline void inc_mnt_writers(struct vfsmount *mnt) +static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif } -static inline void dec_mnt_writers(struct vfsmount *mnt) +static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif } -static unsigned int count_mnt_writers(struct vfsmount *mnt) +static unsigned int mnt_get_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP unsigned int count = 0; int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -274,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt) int ret = 0; preempt_disable(); - inc_mnt_writers(mnt); + mnt_inc_writers(mnt); /* - * The store to inc_mnt_writers must be visible before we pass + * The store to mnt_inc_writers must be visible before we pass * MNT_WRITE_HOLD loop below, so that the slowpath can see our * incremented count after it has set MNT_WRITE_HOLD. */ @@ -290,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt) */ smp_rmb(); if (__mnt_is_readonly(mnt)) { - dec_mnt_writers(mnt); + mnt_dec_writers(mnt); ret = -EROFS; goto out; } @@ -318,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt) if (__mnt_is_readonly(mnt)) return -EROFS; preempt_disable(); - inc_mnt_writers(mnt); + mnt_inc_writers(mnt); preempt_enable(); return 0; } @@ -352,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file); void mnt_drop_write(struct vfsmount *mnt) { preempt_disable(); - dec_mnt_writers(mnt); + mnt_dec_writers(mnt); preempt_enable(); } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -385,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) * MNT_WRITE_HOLD, so it can't be decremented by another CPU while * we're counting up here. */ - if (count_mnt_writers(mnt) > 0) + if (mnt_get_writers(mnt) > 0) ret = -EBUSY; else mnt->mnt_flags |= MNT_READONLY; @@ -406,20 +466,12 @@ static void __mnt_unmake_readonly(struct vfsmount *mnt) br_write_unlock(vfsmount_lock); } -void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) -{ - mnt->mnt_sb = sb; - mnt->mnt_root = dget(sb->s_root); -} - -EXPORT_SYMBOL(simple_set_mnt); - -void free_vfsmnt(struct vfsmount *mnt) +static void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -493,6 +545,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } /* + * Clear dentry's mounted state if it has no remaining mounts. + * vfsmount_lock must be held for write. + */ +static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) +{ + unsigned u; + + for (u = 0; u < HASH_SIZE; u++) { + struct vfsmount *p; + + list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { + if (p->mnt_mountpoint == dentry) + return; + } + } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); +} + +/* * vfsmount lock must be held for write */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) @@ -503,7 +576,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - old_path->dentry->d_mounted--; + dentry_reset_mounted(old_path->mnt, old_path->dentry); } /* @@ -514,7 +587,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, { child_mnt->mnt_parent = mntget(mnt); child_mnt->mnt_mountpoint = dget(dentry); - dentry->d_mounted++; + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); } /* @@ -528,6 +603,21 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); } +static inline void __mnt_make_longterm(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + atomic_inc(&mnt->mnt_longterm); +#endif +} + +/* needs vfsmount lock for write */ +static inline void __mnt_make_shortterm(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + atomic_dec(&mnt->mnt_longterm); +#endif +} + /* * vfsmount lock must be held for write */ @@ -541,8 +631,11 @@ static void commit_tree(struct vfsmount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) + list_for_each_entry(m, &head, mnt_list) { m->mnt_ns = n; + __mnt_make_longterm(m); + } + list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + @@ -577,6 +670,36 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p) return p; } +struct vfsmount * +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) +{ + struct vfsmount *mnt; + struct dentry *root; + + if (!type) + return ERR_PTR(-ENODEV); + + mnt = alloc_vfsmnt(name); + if (!mnt) + return ERR_PTR(-ENOMEM); + + if (flags & MS_KERNMOUNT) + mnt->mnt_flags = MNT_INTERNAL; + + root = mount_fs(type, flags, name, data); + if (IS_ERR(root)) { + free_vfsmnt(mnt); + return ERR_CAST(root); + } + + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + mnt->mnt_mountpoint = mnt->mnt_root; + mnt->mnt_parent = mnt; + return mnt; +} +EXPORT_SYMBOL_GPL(vfs_kern_mount); + static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, int flag) { @@ -630,9 +753,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -640,38 +764,69 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so count_mnt_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ - WARN_ON(count_mnt_writers(mnt)); + WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) +static void mntput_no_expire(struct vfsmount *mnt) { -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) +put_again: +#ifdef CONFIG_SMP + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longterm))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); return; + } + br_read_unlock(vfsmount_lock); + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + mnt_dec_count(mnt); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { - br_write_unlock(vfsmount_lock); - __mntput(mnt); +#else + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); +#endif + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); } -EXPORT_SYMBOL(mntput_no_expire); + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + mntput_no_expire(mnt); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; +} +EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { @@ -679,19 +834,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -847,7 +1000,13 @@ static int show_vfsmnt(struct seq_file *m, void *v) int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + if (mnt->mnt_sb->s_op->show_devname) { + err = mnt->mnt_sb->s_op->show_devname(m, mnt); + if (err) + goto out; + } else { + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + } seq_putc(m, ' '); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); @@ -871,6 +1030,18 @@ const struct seq_operations mounts_op = { .show = show_vfsmnt }; +static int uuid_is_nil(u8 *uuid) +{ + int i; + u8 *cp = (u8 *)uuid; + + for (i = 0; i < 16; i++) { + if (*cp++) + return 0; + } + return 1; +} + static int show_mountinfo(struct seq_file *m, void *v) { struct proc_mounts *p = m->private; @@ -882,7 +1053,12 @@ static int show_mountinfo(struct seq_file *m, void *v) seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id, MAJOR(sb->s_dev), MINOR(sb->s_dev)); - seq_dentry(m, mnt->mnt_root, " \t\n\\"); + if (sb->s_op->show_path) + err = sb->s_op->show_path(m, mnt); + else + seq_dentry(m, mnt->mnt_root, " \t\n\\"); + if (err) + goto out; seq_putc(m, ' '); seq_path_root(m, &mnt_path, &root, " \t\n\\"); if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { @@ -909,11 +1085,20 @@ static int show_mountinfo(struct seq_file *m, void *v) if (IS_MNT_UNBINDABLE(mnt)) seq_puts(m, " unbindable"); + if (!uuid_is_nil(mnt->mnt_sb->s_uuid)) + /* print the uuid */ + seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid); + /* Filesystem specific data */ seq_puts(m, " - "); show_type(m, sb); seq_putc(m, ' '); - mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + if (sb->s_op->show_devname) + err = sb->s_op->show_devname(m, mnt); + else + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + if (err) + goto out; seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) @@ -939,11 +1124,15 @@ static int show_vfsstat(struct seq_file *m, void *v) int err = 0; /* device */ - if (mnt->mnt_devname) { - seq_puts(m, "device "); - mangle(m, mnt->mnt_devname); - } else - seq_puts(m, "no device"); + if (mnt->mnt_sb->s_op->show_devname) { + err = mnt->mnt_sb->s_op->show_devname(m, mnt); + } else { + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + } /* mount point */ seq_puts(m, " mounted on "); @@ -957,7 +1146,8 @@ static int show_vfsstat(struct seq_file *m, void *v) /* optional statistics */ if (mnt->mnt_sb->s_op->show_stats) { seq_putc(m, ' '); - err = mnt->mnt_sb->s_op->show_stats(m, mnt); + if (!err) + err = mnt->mnt_sb->s_op->show_stats(m, mnt); } seq_putc(m, '\n'); @@ -986,12 +1176,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1018,10 +1209,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1058,26 +1249,29 @@ void release_mounts(struct list_head *head) */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { + LIST_HEAD(tmp_list); struct vfsmount *p; for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, kill); + list_move(&p->mnt_hash, &tmp_list); if (propagate) - propagate_umount(kill); + propagate_umount(&tmp_list); - list_for_each_entry(p, kill, mnt_hash) { + list_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; - p->mnt_mountpoint->d_mounted--; + dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); } change_mnt_propagation(p, MS_PRIVATE); } + list_splice(&tmp_list, kill); } static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); @@ -1103,8 +1297,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_unlock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1461,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, return err; } +static int lock_mount(struct path *path) +{ + struct vfsmount *mnt; +retry: + mutex_lock(&path->dentry->d_inode->i_mutex); + if (unlikely(cant_mount(path->dentry))) { + mutex_unlock(&path->dentry->d_inode->i_mutex); + return -ENOENT; + } + down_write(&namespace_sem); + mnt = lookup_mnt(path); + if (likely(!mnt)) + return 0; + up_write(&namespace_sem); + mutex_unlock(&path->dentry->d_inode->i_mutex); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + goto retry; +} + +static void unlock_mount(struct path *path) +{ + up_write(&namespace_sem); + mutex_unlock(&path->dentry->d_inode->i_mutex); +} + static int graft_tree(struct vfsmount *mnt, struct path *path) { - int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; @@ -1471,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; - err = -ENOENT; - mutex_lock(&path->dentry->d_inode->i_mutex); - if (cant_mount(path->dentry)) - goto out_unlock; + if (d_unlinked(path->dentry)) + return -ENOENT; - if (!d_unlinked(path->dentry)) - err = attach_recursive_mnt(mnt, path, NULL); -out_unlock: - mutex_unlock(&path->dentry->d_inode->i_mutex); - return err; + return attach_recursive_mnt(mnt, path, NULL); } /* @@ -1543,6 +1765,7 @@ static int do_change_type(struct path *path, int flag) static int do_loopback(struct path *path, char *old_name, int recurse) { + LIST_HEAD(umount_list); struct path old_path; struct vfsmount *mnt = NULL; int err = mount_is_safe(path); @@ -1554,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name, if (err) return err; - down_write(&namespace_sem); + err = lock_mount(path); + if (err) + goto out; + err = -EINVAL; if (IS_MNT_UNBINDABLE(old_path.mnt)) - goto out; + goto out2; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) - goto out; + goto out2; err = -ENOMEM; if (recurse) @@ -1569,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name, mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); if (!mnt) - goto out; + goto out2; err = graft_tree(mnt, path); if (err) { - LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); br_write_unlock(vfsmount_lock); - release_mounts(&umount_list); } - +out2: + unlock_mount(path); + release_mounts(&umount_list); out: - up_write(&namespace_sem); path_put(&old_path); return err; } @@ -1624,6 +1848,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; + err = security_sb_remount(sb, data); + if (err) + return err; + down_write(&sb->s_umount); if (flags & MS_BIND) err = change_mount_flags(path->mnt, flags); @@ -1667,17 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name) if (err) return err; - down_write(&namespace_sem); - while (d_mountpoint(path->dentry) && - follow_down(path)) - ; - err = -EINVAL; - if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) + err = lock_mount(path); + if (err < 0) goto out; - err = -ENOENT; - mutex_lock(&path->dentry->d_inode->i_mutex); - if (cant_mount(path->dentry)) + err = -EINVAL; + if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out1; if (d_unlinked(path->dentry)) @@ -1719,54 +1942,66 @@ static int do_move_mount(struct path *path, char *old_name) * automatically */ list_del_init(&old_path.mnt->mnt_expire); out1: - mutex_unlock(&path->dentry->d_inode->i_mutex); + unlock_mount(path); out: - up_write(&namespace_sem); if (!err) path_put(&parent_path); path_put(&old_path); return err; } -/* - * create a new mount for userspace and request it to be added into the - * namespace's tree - */ -static int do_new_mount(struct path *path, char *type, int flags, - int mnt_flags, char *name, void *data) +static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) { - struct vfsmount *mnt; - - if (!type) - return -EINVAL; + int err; + const char *subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + err = -EINVAL; + if (!subtype[0]) + goto err; + } else + subtype = ""; - /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); + err = -ENOMEM; + if (!mnt->mnt_sb->s_subtype) + goto err; + return mnt; - mnt = do_kern_mount(type, flags, name, data); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); + err: + mntput(mnt); + return ERR_PTR(err); +} - return do_add_mount(mnt, path, mnt_flags, NULL); +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && + !mnt->mnt_sb->s_subtype) + mnt = fs_set_subtype(mnt, fstype); + put_filesystem(type); + return mnt; } +EXPORT_SYMBOL_GPL(do_kern_mount); /* * add a mount into a namespace's mount tree - * - provide the option of adding the new mount to an expiration list */ -int do_add_mount(struct vfsmount *newmnt, struct path *path, - int mnt_flags, struct list_head *fslist) +static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); - down_write(&namespace_sem); - /* Something was mounted here while we slept */ - while (d_mountpoint(path->dentry) && - follow_down(path)) - ; + err = lock_mount(path); + if (err) + return err; + err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; @@ -1782,22 +2017,87 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, path))) - goto unlock; + err = graft_tree(newmnt, path); - if (fslist) /* add to the specified expiration list */ - list_add_tail(&newmnt->mnt_expire, fslist); +unlock: + unlock_mount(path); + return err; +} - up_write(&namespace_sem); - return 0; +/* + * create a new mount for userspace and request it to be added into the + * namespace's tree + */ +static int do_new_mount(struct path *path, char *type, int flags, + int mnt_flags, char *name, void *data) +{ + struct vfsmount *mnt; + int err; -unlock: - up_write(&namespace_sem); - mntput(newmnt); + if (!type) + return -EINVAL; + + /* we need capabilities... */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mnt = do_kern_mount(type, flags, name, data); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + err = do_add_mount(mnt, path, mnt_flags); + if (err) + mntput(mnt); + return err; +} + +int finish_automount(struct vfsmount *m, struct path *path) +{ + int err; + /* The new mount record should have at least 2 refs to prevent it being + * expired before we get a chance to add it + */ + BUG_ON(mnt_get_count(m) < 2); + + if (m->mnt_sb == path->mnt->mnt_sb && + m->mnt_root == path->dentry) { + err = -ELOOP; + goto fail; + } + + err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); + if (!err) + return 0; +fail: + /* remove m from any expiration list it may be on */ + if (!list_empty(&m->mnt_expire)) { + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + list_del_init(&m->mnt_expire); + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); + } + mntput(m); + mntput(m); return err; } -EXPORT_SYMBOL_GPL(do_add_mount); +/** + * mnt_set_expiry - Put a mount on an expiration list + * @mnt: The mount to list. + * @expiry_list: The list to add the mount to. + */ +void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) +{ + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + + list_add_tail(&mnt->mnt_expire, expiry_list); + + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); +} +EXPORT_SYMBOL(mnt_set_expiry); /* * process a list of expirable mountpoints with the intent of discarding any @@ -2086,6 +2386,22 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } +void mnt_make_longterm(struct vfsmount *mnt) +{ + __mnt_make_longterm(mnt); +} + +void mnt_make_shortterm(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) + return; + br_write_lock(vfsmount_lock); + atomic_dec(&mnt->mnt_longterm); + br_write_unlock(vfsmount_lock); +#endif +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2123,14 +2439,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new_ns->root; while (p) { q->mnt_ns = new_ns; + __mnt_make_longterm(q); if (fs) { if (p == fs->root.mnt) { - rootmnt = p; fs->root.mnt = mntget(q); + __mnt_make_longterm(q); + mnt_make_shortterm(p); + rootmnt = p; } if (p == fs->pwd.mnt) { - pwdmnt = p; fs->pwd.mnt = mntget(q); + __mnt_make_longterm(q); + mnt_make_shortterm(p); + pwdmnt = p; } } p = next_mnt(p, mnt_ns->root); @@ -2174,6 +2495,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) new_ns = alloc_mnt_ns(); if (!IS_ERR(new_ns)) { mnt->mnt_ns = new_ns; + __mnt_make_longterm(mnt); new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } @@ -2260,65 +2582,60 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = user_path_dir(new_root, &new); if (error) goto out0; - error = -EINVAL; - if (!check_mnt(new.mnt)) - goto out1; error = user_path_dir(put_old, &old); if (error) goto out1; error = security_sb_pivotroot(&old, &new); - if (error) { - path_put(&old); - goto out1; - } + if (error) + goto out2; get_fs_root(current->fs, &root); - down_write(&namespace_sem); - mutex_lock(&old.dentry->d_inode->i_mutex); + error = lock_mount(&old); + if (error) + goto out3; + error = -EINVAL; if (IS_MNT_SHARED(old.mnt) || IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(root.mnt->mnt_parent)) - goto out2; - if (!check_mnt(root.mnt)) - goto out2; + goto out4; + if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) + goto out4; error = -ENOENT; - if (cant_mount(old.dentry)) - goto out2; if (d_unlinked(new.dentry)) - goto out2; + goto out4; if (d_unlinked(old.dentry)) - goto out2; + goto out4; error = -EBUSY; if (new.mnt == root.mnt || old.mnt == root.mnt) - goto out2; /* loop, on the same file system */ + goto out4; /* loop, on the same file system */ error = -EINVAL; if (root.mnt->mnt_root != root.dentry) - goto out2; /* not a mountpoint */ + goto out4; /* not a mountpoint */ if (root.mnt->mnt_parent == root.mnt) - goto out2; /* not attached */ + goto out4; /* not attached */ if (new.mnt->mnt_root != new.dentry) - goto out2; /* not a mountpoint */ + goto out4; /* not a mountpoint */ if (new.mnt->mnt_parent == new.mnt) - goto out2; /* not attached */ + goto out4; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) - goto out3; /* already mounted on put_old */ + goto out4; /* already mounted on put_old */ if (tmp->mnt_parent == new.mnt) break; tmp = tmp->mnt_parent; } if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) - goto out3; + goto out4; } else if (!is_subdir(old.dentry, new.dentry)) - goto out3; + goto out4; + br_write_lock(vfsmount_lock); detach_mnt(new.mnt, &parent_path); detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ @@ -2329,20 +2646,20 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; - path_put(&root_parent); - path_put(&parent_path); -out2: - mutex_unlock(&old.dentry->d_inode->i_mutex); - up_write(&namespace_sem); +out4: + unlock_mount(&old); + if (!error) { + path_put(&root_parent); + path_put(&parent_path); + } +out3: path_put(&root); +out2: path_put(&old); out1: path_put(&new); out0: return error; -out3: - br_write_unlock(vfsmount_lock); - goto out2; } static void __init init_mount_tree(void) @@ -2354,6 +2671,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); @@ -2416,3 +2734,9 @@ void put_mnt_ns(struct mnt_namespace *ns) kfree(ns); } EXPORT_SYMBOL(put_mnt_ns); + +struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) +{ + return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); +} +EXPORT_SYMBOL_GPL(kern_mount_data); |