diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 613 |
1 files changed, 332 insertions, 281 deletions
diff --git a/fs/namei.c b/fs/namei.c index 3531deebad30..a7b05bf82d31 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -34,6 +34,7 @@ #include <linux/device_cgroup.h> #include <linux/fs_struct.h> #include <linux/posix_acl.h> +#include <linux/hash.h> #include <asm/uaccess.h> #include "internal.h" @@ -196,6 +197,7 @@ recopy: goto error; result->uptr = filename; + result->aname = NULL; audit_getname(result); return result; @@ -209,7 +211,35 @@ getname(const char __user * filename) { return getname_flags(filename, 0, NULL); } -EXPORT_SYMBOL(getname); + +/* + * The "getname_kernel()" interface doesn't do pathnames longer + * than EMBEDDED_NAME_MAX. Deal with it - you're a kernel user. + */ +struct filename * +getname_kernel(const char * filename) +{ + struct filename *result; + char *kname; + int len; + + len = strlen(filename); + if (len >= EMBEDDED_NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + + result = __getname(); + if (unlikely(!result)) + return ERR_PTR(-ENOMEM); + + kname = (char *)result + sizeof(*result); + result->name = kname; + result->uptr = NULL; + result->aname = NULL; + result->separate = false; + + strlcpy(kname, filename, EMBEDDED_NAME_MAX); + return result; +} #ifdef CONFIG_AUDITSYSCALL void putname(struct filename *name) @@ -235,27 +265,9 @@ static int check_acl(struct inode *inode, int mask) return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); } - acl = get_cached_acl(inode, ACL_TYPE_ACCESS); - - /* - * A filesystem can force a ACL callback by just never filling the - * ACL cache. But normally you'd fill the cache either at inode - * instantiation time, or on the first ->get_acl call. - * - * If the filesystem doesn't have a get_acl() function at all, we'll - * just create the negative cache entry. - */ - if (acl == ACL_NOT_CACHED) { - if (inode->i_op->get_acl) { - acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } else { - set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); - return -EAGAIN; - } - } - + acl = get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (acl) { int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); @@ -321,10 +333,11 @@ int generic_permission(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ - if (inode_capable(inode, CAP_DAC_OVERRIDE)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) return 0; if (!(mask & MAY_WRITE)) - if (inode_capable(inode, CAP_DAC_READ_SEARCH)) + if (capable_wrt_inode_uidgid(inode, + CAP_DAC_READ_SEARCH)) return 0; return -EACCES; } @@ -334,7 +347,7 @@ int generic_permission(struct inode *inode, int mask) * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) - if (inode_capable(inode, CAP_DAC_OVERRIDE)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) return 0; /* @@ -342,11 +355,12 @@ int generic_permission(struct inode *inode, int mask) */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) - if (inode_capable(inode, CAP_DAC_READ_SEARCH)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; } +EXPORT_SYMBOL(generic_permission); /* * We _really_ want to just do "generic_permission()" without @@ -444,6 +458,7 @@ int inode_permission(struct inode *inode, int mask) return retval; return __inode_permission(inode, mask); } +EXPORT_SYMBOL(inode_permission); /** * path_get - get a reference to a path @@ -629,24 +644,22 @@ static int complete_walk(struct nameidata *nd) static __always_inline void set_root(struct nameidata *nd) { - if (!nd->root.mnt) - get_fs_root(current->fs, &nd->root); + get_fs_root(current->fs, &nd->root); } static int link_path_walk(const char *, struct nameidata *); -static __always_inline void set_root_rcu(struct nameidata *nd) +static __always_inline unsigned set_root_rcu(struct nameidata *nd) { - if (!nd->root.mnt) { - struct fs_struct *fs = current->fs; - unsigned seq; + struct fs_struct *fs = current->fs; + unsigned seq, res; - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); - } + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + res = __read_seqcount_begin(&nd->root.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + return res; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -846,7 +859,8 @@ follow_link(struct path *link, struct nameidata *nd, void **p) return PTR_ERR(s); } if (*s == '/') { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); @@ -913,6 +927,7 @@ int follow_up(struct path *path) path->mnt = &parent->mnt; return 1; } +EXPORT_SYMBOL(follow_up); /* * Perform an automount @@ -1074,11 +1089,12 @@ int follow_down_one(struct path *path) } return 0; } +EXPORT_SYMBOL(follow_down_one); -static inline bool managed_dentry_might_block(struct dentry *dentry) +static inline int managed_dentry_rcu(struct dentry *dentry) { - return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && - dentry->d_op->d_manage(dentry, true) < 0); + return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ? + dentry->d_op->d_manage(dentry, true) : 0; } /* @@ -1094,11 +1110,18 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ - if (unlikely(managed_dentry_might_block(path->dentry))) + switch (managed_dentry_rcu(path->dentry)) { + case -ECHILD: + default: return false; + case -EISDIR: + return true; + case 0: + break; + } if (!d_mountpoint(path->dentry)) - break; + return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1114,25 +1137,15 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return !read_seqretry(&mount_lock, nd->m_seq) && + !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); } static int follow_dotdot_rcu(struct nameidata *nd) { - set_root_rcu(nd); + struct inode *inode = nd->inode; + if (!nd->root.mnt) + set_root_rcu(nd); while (1) { if (nd->path.dentry == nd->root.dentry && @@ -1144,6 +1157,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) struct dentry *parent = old->d_parent; unsigned seq; + inode = parent->d_inode; seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) goto failed; @@ -1153,10 +1167,22 @@ static int follow_dotdot_rcu(struct nameidata *nd) } if (!follow_up_rcu(&nd->path)) break; + inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); - nd->inode = nd->path.dentry->d_inode; + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + inode = nd->path.dentry->d_inode; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } + nd->inode = inode; return 0; failed: @@ -1215,6 +1241,7 @@ int follow_down(struct path *path) } return 0; } +EXPORT_SYMBOL(follow_down); /* * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() @@ -1234,7 +1261,8 @@ static void follow_mount(struct path *path) static void follow_dotdot(struct nameidata *nd) { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); while(1) { struct dentry *old = nd->path.dentry; @@ -1388,11 +1416,8 @@ static int lookup_fast(struct nameidata *nd, } path->mnt = mnt; path->dentry = dentry; - if (unlikely(!__follow_mount_rcu(nd, path, inode))) - goto unlazy; - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - goto unlazy; - return 0; + if (likely(__follow_mount_rcu(nd, path, inode))) + return 0; unlazy: if (unlazy_walk(nd, dentry)) return -ECHILD; @@ -1529,7 +1554,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } err = -ENOENT; - if (!inode) + if (!inode || d_is_negative(path->dentry)) goto out_path_put; if (should_follow_link(path->dentry, follow)) { @@ -1615,8 +1640,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) static inline unsigned int fold_hash(unsigned long hash) { - hash += hash >> (8*sizeof(int)); - return hash; + return hash_64(hash, 32); } #else /* 32-bit case */ @@ -1650,9 +1674,9 @@ EXPORT_SYMBOL(full_name_hash); /* * Calculate the length and hash of the path component, and - * return the length of the component; + * return the "hash_len" as the result. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline u64 hash_name(const char *name) { unsigned long a, b, adata, bdata, mask, hash, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; @@ -1672,9 +1696,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) mask = create_zero_mask(adata | bdata); hash += a & zero_bytemask(mask); - *hashp = fold_hash(hash); - - return len + find_zero(mask); + len += find_zero(mask); + return hashlen_create(fold_hash(hash), len); } #else @@ -1692,7 +1715,7 @@ EXPORT_SYMBOL(full_name_hash); * We know there's a real path component here of at least * one character. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline u64 hash_name(const char *name) { unsigned long hash = init_name_hash(); unsigned long len = 0, c; @@ -1703,8 +1726,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - *hashp = end_name_hash(hash); - return len; + return hashlen_create(end_name_hash(hash), len); } #endif @@ -1729,20 +1751,17 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - struct qstr this; - long len; + u64 hash_len; int type; err = may_lookup(nd); if (err) break; - len = hash_name(name, &this.hash); - this.name = name; - this.len = len; + hash_len = hash_name(name); type = LAST_NORM; - if (name[0] == '.') switch (len) { + if (name[0] == '.') switch (hashlen_len(hash_len)) { case 2: if (name[1] == '.') { type = LAST_DOTDOT; @@ -1756,29 +1775,32 @@ static int link_path_walk(const char *name, struct nameidata *nd) struct dentry *parent = nd->path.dentry; nd->flags &= ~LOOKUP_JUMPED; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { + struct qstr this = { { .hash_len = hash_len }, .name = name }; err = parent->d_op->d_hash(parent, &this); if (err < 0) break; + hash_len = this.hash_len; + name = this.name; } } - nd->last = this; + nd->last.hash_len = hash_len; + nd->last.name = name; nd->last_type = type; - if (!name[len]) + name += hashlen_len(hash_len); + if (!*name) return 0; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. */ do { - len++; - } while (unlikely(name[len] == '/')); - if (!name[len]) + name++; + } while (unlikely(*name == '/')); + if (!*name) return 0; - name += len; - err = walk_component(nd, &next, LOOKUP_FOLLOW); if (err < 0) return err; @@ -1788,7 +1810,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } - if (!d_is_directory(nd->path.dentry)) { + if (!d_can_lookup(nd->path.dentry)) { err = -ENOTDIR; break; } @@ -1809,7 +1831,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; if (*name) { - if (!d_is_directory(root)) + if (!d_can_lookup(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1833,7 +1855,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); - set_root_rcu(nd); + nd->seq = set_root_rcu(nd); } else { set_root(nd); path_get(&nd->root); @@ -1865,7 +1887,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!d_is_directory(dentry)) { + if (!d_can_lookup(dentry)) { fdput(f); return -ENOTDIR; } @@ -1873,7 +1895,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.need_put) + if (f.flags & FDPUT_FPUT) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); @@ -1884,7 +1906,14 @@ static int path_init(int dfd, const char *name, unsigned int flags, } nd->inode = nd->path.dentry->d_inode; - return 0; + if (!(flags & LOOKUP_RCU)) + return 0; + if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) + return 0; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + return -ECHILD; } static inline int lookup_last(struct nameidata *nd, struct path *path) @@ -1947,7 +1976,7 @@ static int path_lookupat(int dfd, const char *name, err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { - if (!d_is_directory(nd->path.dentry)) { + if (!d_can_lookup(nd->path.dentry)) { path_put(&nd->path); err = -ENOTDIR; } @@ -2017,6 +2046,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path) *path = nd.path; return res; } +EXPORT_SYMBOL(kern_path); /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair @@ -2041,6 +2071,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, *path = nd.path; return err; } +EXPORT_SYMBOL(vfs_path_lookup); /* * Restricted form of lookup. Doesn't follow links, single-component only, @@ -2103,6 +2134,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return __lookup_hash(&this, base, 0); } +EXPORT_SYMBOL(lookup_one_len); int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) @@ -2127,6 +2159,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, { return user_path_at_empty(dfd, name, flags, path, NULL); } +EXPORT_SYMBOL(user_path_at); /* * NB: most callers don't do anything directly with the reference to the @@ -2232,15 +2265,16 @@ mountpoint_last(struct nameidata *nd, struct path *path) mutex_unlock(&dir->d_inode->i_mutex); done: - if (!dentry->d_inode) { + if (!dentry->d_inode || d_is_negative(dentry)) { error = -ENOENT; dput(dentry); goto out; } path->dentry = dentry; - path->mnt = mntget(nd->path.mnt); + path->mnt = nd->path.mnt; if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) return 1; + mntget(path->mnt); follow_mount(path); error = 0; out: @@ -2362,7 +2396,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) return 0; if (uid_eq(dir->i_uid, fsuid)) return 0; - return !inode_capable(inode, CAP_FOWNER); + return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); } /* @@ -2406,11 +2440,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) return -EPERM; if (isdir) { - if (!d_is_directory(victim) && !d_is_autodir(victim)) + if (!d_is_dir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - } else if (d_is_directory(victim) || d_is_autodir(victim)) + } else if (d_is_dir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; @@ -2469,6 +2503,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return NULL; } +EXPORT_SYMBOL(lock_rename); void unlock_rename(struct dentry *p1, struct dentry *p2) { @@ -2478,6 +2513,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); } } +EXPORT_SYMBOL(unlock_rename); int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool want_excl) @@ -2498,6 +2534,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_create); static int may_open(struct path *path, int acc_mode, int flag) { @@ -2561,7 +2598,7 @@ static int handle_truncate(struct file *filp) /* * Refuse to truncate files with mandatory locks held on them. */ - error = locks_verify_locked(inode); + error = locks_verify_locked(filp); if (!error) error = security_path_truncate(path); if (!error) { @@ -2974,7 +3011,7 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (d_is_negative(path->dentry)) { + if (!inode || d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } @@ -3008,11 +3045,10 @@ finish_open: } audit_inode(name, nd->path.dentry, 0); error = -EISDIR; - if ((open_flag & O_CREAT) && - (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) + if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) goto out; error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) + if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) goto out; if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; @@ -3368,6 +3404,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) { @@ -3457,6 +3494,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fsnotify_mkdir(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mkdir); SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) { @@ -3511,6 +3549,7 @@ void dentry_unhash(struct dentry *dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); } +EXPORT_SYMBOL(dentry_unhash); int vfs_rmdir(struct inode *dir, struct dentry *dentry) { @@ -3548,6 +3587,7 @@ out: d_delete(dentry); return error; } +EXPORT_SYMBOL(vfs_rmdir); static long do_rmdir(int dfd, const char __user *pathname) { @@ -3665,6 +3705,7 @@ out: return error; } +EXPORT_SYMBOL(vfs_unlink); /* * Make sure that the actual truncation of the file will occur outside its @@ -3736,7 +3777,7 @@ exit1: slashes: if (d_is_negative(dentry)) error = -ENOENT; - else if (d_is_directory(dentry) || d_is_autodir(dentry)) + else if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; @@ -3778,6 +3819,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_symlink); SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname) @@ -3886,6 +3928,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de fsnotify_link(dir, inode, new_dentry); return error; } +EXPORT_SYMBOL(vfs_link); /* * Hardlinks are often used in delicate situations. We avoid @@ -3945,10 +3988,13 @@ out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); - if (!error) + if (!error) { + path_put(&old_path); goto retry; + } } if (retry_estale(error, how)) { + path_put(&old_path); how |= LOOKUP_REVAL; goto retry; } @@ -3963,11 +4009,32 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -/* +/** + * vfs_rename - rename a filesystem object + * @old_dir: parent of source + * @old_dentry: source + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break + * @flags: rename flags + * + * The caller must hold multiple mutexes--see lock_rename()). + * + * If vfs_rename discovers a delegation in need of breaking at either + * the source or destination, it will return -EWOULDBLOCK and return a + * reference to the inode in delegated_inode. The caller should then + * break the delegation and retry. Because breaking a delegation may + * take a long time, the caller should drop all locks before doing + * so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + * * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: - * a) we can get into loop creation. Check is done in is_subdir(). + * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. * That's where 4.4 screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another @@ -3991,163 +4058,141 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) { - int error = 0; + int error; + bool is_dir = d_is_dir(old_dentry); + const unsigned char *old_name; + struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; + bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; + if (source == target) + return 0; + + error = may_delete(old_dir, old_dentry, is_dir); + if (error) + return error; + + if (!target) { + error = may_create(new_dir, new_dentry); + } else { + new_is_dir = d_is_dir(new_dentry); + + if (!(flags & RENAME_EXCHANGE)) + error = may_delete(new_dir, new_dentry, is_dir); + else + error = may_delete(new_dir, new_dentry, new_is_dir); + } + if (error) + return error; + + if (!old_dir->i_op->rename && !old_dir->i_op->rename2) + return -EPERM; + + if (flags && !old_dir->i_op->rename2) + return -EINVAL; + /* * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = inode_permission(old_dentry->d_inode, MAY_WRITE); - if (error) - return error; + if (is_dir) { + error = inode_permission(source, MAY_WRITE); + if (error) + return error; + } + if ((flags & RENAME_EXCHANGE) && new_is_dir) { + error = inode_permission(target, MAY_WRITE); + if (error) + return error; + } } - error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); + error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, + flags); if (error) return error; + old_name = fsnotify_oldname_init(old_dentry->d_name.name); dget(new_dentry); - if (target) + if (!is_dir || (flags & RENAME_EXCHANGE)) + lock_two_nondirectories(source, target); + else if (target) mutex_lock(&target->i_mutex); error = -EBUSY; if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; - error = -EMLINK; - if (max_links && !target && new_dir != old_dir && - new_dir->i_nlink >= max_links) - goto out; - - if (target) + if (max_links && new_dir != old_dir) { + error = -EMLINK; + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) + goto out; + if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && + old_dir->i_nlink >= max_links) + goto out; + } + if (is_dir && !(flags & RENAME_EXCHANGE) && target) shrink_dcache_parent(new_dentry); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - goto out; - - if (target) { - target->i_flags |= S_DEAD; - dont_mount(new_dentry); + if (!is_dir) { + error = try_break_deleg(source, delegated_inode); + if (error) + goto out; } -out: - if (target) - mutex_unlock(&target->i_mutex); - dput(new_dentry); - if (!error) - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry,new_dentry); - return error; -} - -static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) -{ - struct inode *target = new_dentry->d_inode; - struct inode *source = old_dentry->d_inode; - int error; - - error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - return error; - - dget(new_dentry); - lock_two_nondirectories(source, target); - - error = -EBUSY; - if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - goto out; - - error = try_break_deleg(source, delegated_inode); - if (error) - goto out; - if (target) { + if (target && !new_is_dir) { error = try_break_deleg(target, delegated_inode); if (error) goto out; } - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (!old_dir->i_op->rename2) { + error = old_dir->i_op->rename(old_dir, old_dentry, + new_dir, new_dentry); + } else { + WARN_ON(old_dir->i_op->rename != NULL); + error = old_dir->i_op->rename2(old_dir, old_dentry, + new_dir, new_dentry, flags); + } if (error) goto out; - if (target) + if (!(flags & RENAME_EXCHANGE) && target) { + if (is_dir) + target->i_flags |= S_DEAD; dont_mount(new_dentry); - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); + } + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { + if (!(flags & RENAME_EXCHANGE)) + d_move(old_dentry, new_dentry); + else + d_exchange(old_dentry, new_dentry); + } out: - unlock_two_nondirectories(source, target); + if (!is_dir || (flags & RENAME_EXCHANGE)) + unlock_two_nondirectories(source, target); + else if (target) + mutex_unlock(&target->i_mutex); dput(new_dentry); - return error; -} - -/** - * vfs_rename - rename a filesystem object - * @old_dir: parent of source - * @old_dentry: source - * @new_dir: parent of destination - * @new_dentry: destination - * @delegated_inode: returns an inode needing a delegation break - * - * The caller must hold multiple mutexes--see lock_rename()). - * - * If vfs_rename discovers a delegation in need of breaking at either - * the source or destination, it will return -EWOULDBLOCK and return a - * reference to the inode in delegated_inode. The caller should then - * break the delegation and retry. Because breaking a delegation may - * take a long time, the caller should drop all locks before doing - * so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. - */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) -{ - int error; - int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); - const unsigned char *old_name; - - if (old_dentry->d_inode == new_dentry->d_inode) - return 0; - - error = may_delete(old_dir, old_dentry, is_dir); - if (error) - return error; - - if (!new_dentry->d_inode) - error = may_create(new_dir, new_dentry); - else - error = may_delete(new_dir, new_dentry, is_dir); - if (error) - return error; - - if (!old_dir->i_op->rename) - return -EPERM; - - old_name = fsnotify_oldname_init(old_dentry->d_name.name); - - if (is_dir) - error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); - else - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); - if (!error) + if (!error) { fsnotify_move(old_dir, new_dir, old_name, is_dir, - new_dentry->d_inode, old_dentry); + !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); + if (flags & RENAME_EXCHANGE) { + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, + new_is_dir, NULL, new_dentry); + } + } fsnotify_oldname_free(old_name); return error; } +EXPORT_SYMBOL(vfs_rename); -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, - int, newdfd, const char __user *, newname) +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, unsigned int, flags) { struct dentry *old_dir, *new_dir; struct dentry *old_dentry, *new_dentry; @@ -4159,6 +4204,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, unsigned int lookup_flags = 0; bool should_retry = false; int error; + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) + return -EINVAL; + retry: from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); if (IS_ERR(from)) { @@ -4182,6 +4234,8 @@ retry: goto exit2; new_dir = newnd.path.dentry; + if (flags & RENAME_NOREPLACE) + error = -EEXIST; if (newnd.last_type != LAST_NORM) goto exit2; @@ -4191,7 +4245,8 @@ retry: oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; - newnd.flags |= LOOKUP_RENAME_TARGET; + if (!(flags & RENAME_EXCHANGE)) + newnd.flags |= LOOKUP_RENAME_TARGET; retry_deleg: trap = lock_rename(new_dir, old_dir); @@ -4204,34 +4259,49 @@ retry_deleg: error = -ENOENT; if (d_is_negative(old_dentry)) goto exit4; + new_dentry = lookup_hash(&newnd); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto exit4; + error = -EEXIST; + if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) + goto exit5; + if (flags & RENAME_EXCHANGE) { + error = -ENOENT; + if (d_is_negative(new_dentry)) + goto exit5; + + if (!d_is_dir(new_dentry)) { + error = -ENOTDIR; + if (newnd.last.name[newnd.last.len]) + goto exit5; + } + } /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { + if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) - goto exit4; - if (newnd.last.name[newnd.last.len]) - goto exit4; + goto exit5; + if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) + goto exit5; } /* source should not be ancestor of target */ error = -EINVAL; if (old_dentry == trap) - goto exit4; - new_dentry = lookup_hash(&newnd); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; + goto exit5; /* target should not be an ancestor of source */ - error = -ENOTEMPTY; + if (!(flags & RENAME_EXCHANGE)) + error = -ENOTEMPTY; if (new_dentry == trap) goto exit5; error = security_path_rename(&oldnd.path, old_dentry, - &newnd.path, new_dentry); + &newnd.path, new_dentry, flags); if (error) goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry, - &delegated_inode); + new_dir->d_inode, new_dentry, + &delegated_inode, flags); exit5: dput(new_dentry); exit4: @@ -4261,16 +4331,20 @@ exit: return error; } -SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname) { - return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); + return sys_renameat2(olddfd, oldname, newdfd, newname, 0); } -int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) +SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - int len; + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); +} - len = PTR_ERR(link); +int readlink_copy(char __user *buffer, int buflen, const char *link) +{ + int len = PTR_ERR(link); if (IS_ERR(link)) goto out; @@ -4282,6 +4356,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c out: return len; } +EXPORT_SYMBOL(readlink_copy); /* * A helper for ->readlink(). This should be used *ONLY* for symlinks that @@ -4299,11 +4374,12 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(cookie)) return PTR_ERR(cookie); - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + res = readlink_copy(buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) dentry->d_inode->i_op->put_link(dentry, &nd, cookie); return res; } +EXPORT_SYMBOL(generic_readlink); /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) @@ -4323,14 +4399,14 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct page *page = NULL; - char *s = page_getlink(dentry, &page); - int res = vfs_readlink(dentry,buffer,buflen,s); + int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); if (page) { kunmap(page); page_cache_release(page); } return res; } +EXPORT_SYMBOL(page_readlink); void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) { @@ -4338,6 +4414,7 @@ void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) nd_set_link(nd, page_getlink(dentry, &page)); return page; } +EXPORT_SYMBOL(page_follow_link_light); void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { @@ -4348,6 +4425,7 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) page_cache_release(page); } } +EXPORT_SYMBOL(page_put_link); /* * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS @@ -4385,45 +4463,18 @@ retry: fail: return err; } +EXPORT_SYMBOL(__page_symlink); int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); } +EXPORT_SYMBOL(page_symlink); const struct inode_operations page_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, }; - -EXPORT_SYMBOL(user_path_at); -EXPORT_SYMBOL(follow_down_one); -EXPORT_SYMBOL(follow_down); -EXPORT_SYMBOL(follow_up); -EXPORT_SYMBOL(get_write_access); /* nfsd */ -EXPORT_SYMBOL(lock_rename); -EXPORT_SYMBOL(lookup_one_len); -EXPORT_SYMBOL(page_follow_link_light); -EXPORT_SYMBOL(page_put_link); -EXPORT_SYMBOL(page_readlink); -EXPORT_SYMBOL(__page_symlink); -EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); -EXPORT_SYMBOL(kern_path); -EXPORT_SYMBOL(vfs_path_lookup); -EXPORT_SYMBOL(inode_permission); -EXPORT_SYMBOL(unlock_rename); -EXPORT_SYMBOL(vfs_create); -EXPORT_SYMBOL(vfs_link); -EXPORT_SYMBOL(vfs_mkdir); -EXPORT_SYMBOL(vfs_mknod); -EXPORT_SYMBOL(generic_permission); -EXPORT_SYMBOL(vfs_readlink); -EXPORT_SYMBOL(vfs_rename); -EXPORT_SYMBOL(vfs_rmdir); -EXPORT_SYMBOL(vfs_symlink); -EXPORT_SYMBOL(vfs_unlink); -EXPORT_SYMBOL(dentry_unhash); -EXPORT_SYMBOL(generic_readlink); |