diff options
Diffstat (limited to 'fs/overlayfs')
-rw-r--r-- | fs/overlayfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 132 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 88 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 115 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 151 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 57 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 4 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 58 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 91 |
9 files changed, 600 insertions, 97 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 0daac5112f7a..c0c9683934b7 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,5 +1,6 @@ config OVERLAY_FS tristate "Overlay filesystem support" + select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem and a 'lower' filesystem. When a name exists in both filesystems, the diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 906ea6c93260..e5869f91b3ab 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -20,6 +20,7 @@ #include <linux/namei.h> #include <linux/fdtable.h> #include <linux/ratelimit.h> +#include <linux/exportfs.h> #include "overlayfs.h" #include "ovl_entry.h" @@ -232,6 +233,82 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } +static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid) +{ + struct ovl_fh *fh; + int fh_type, fh_len, dwords; + void *buf; + int buflen = MAX_HANDLE_SZ; + + buf = kmalloc(buflen, GFP_TEMPORARY); + if (!buf) + return ERR_PTR(-ENOMEM); + + /* + * We encode a non-connectable file handle for non-dir, because we + * only need to find the lower inode number and we don't want to pay + * the price or reconnecting the dentry. + */ + dwords = buflen >> 2; + fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + buflen = (dwords << 2); + + fh = ERR_PTR(-EIO); + if (WARN_ON(fh_type < 0) || + WARN_ON(buflen > MAX_HANDLE_SZ) || + WARN_ON(fh_type == FILEID_INVALID)) + goto out; + + BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255); + fh_len = offsetof(struct ovl_fh, fid) + buflen; + fh = kmalloc(fh_len, GFP_KERNEL); + if (!fh) { + fh = ERR_PTR(-ENOMEM); + goto out; + } + + fh->version = OVL_FH_VERSION; + fh->magic = OVL_FH_MAGIC; + fh->type = fh_type; + fh->flags = OVL_FH_FLAG_CPU_ENDIAN; + fh->len = fh_len; + fh->uuid = *uuid; + memcpy(fh->fid, buf, buflen); + +out: + kfree(buf); + return fh; +} + +static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) +{ + struct super_block *sb = lower->d_sb; + const struct ovl_fh *fh = NULL; + int err; + + /* + * When lower layer doesn't support export operations store a 'null' fh, + * so we can use the overlay.origin xattr to distignuish between a copy + * up and a pure upper inode. + */ + if (sb->s_export_op && sb->s_export_op->fh_to_dentry && + !uuid_is_null(&sb->s_uuid)) { + fh = ovl_encode_fh(lower, &sb->s_uuid); + if (IS_ERR(fh)) + return PTR_ERR(fh); + } + + /* + * Do not fail when upper doesn't support xattrs. + */ + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh, + fh ? fh->len : 0, 0); + kfree(fh); + + return err; +} + static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, struct kstat *stat, const char *link, @@ -252,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, .link = link }; - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto out; - err = security_inode_copy_up(dentry, &new_creds); if (err < 0) - goto out1; + goto out; if (new_creds) old_creds = override_creds(new_creds); @@ -268,13 +339,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (tmpfile) temp = ovl_do_tmpfile(upperdir, stat->mode); else - temp = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(temp); - if (IS_ERR(temp)) - goto out1; - + temp = ovl_lookup_temp(workdir); err = 0; - if (!tmpfile) + if (IS_ERR(temp)) { + err = PTR_ERR(temp); + temp = NULL; + } + + if (!err && !tmpfile) err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { @@ -283,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, } if (err) - goto out2; + goto out; if (S_ISREG(stat->mode)) { struct path upperpath; @@ -316,6 +388,27 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (err) goto out_cleanup; + /* + * Store identifier of lower inode in upper inode xattr to + * allow lookup of the copy up origin inode. + * + * Don't set origin when we are breaking the association with a lower + * hard link. + */ + if (S_ISDIR(stat->mode) || stat->nlink == 1) { + err = ovl_set_origin(dentry, lowerpath->dentry, temp); + if (err) + goto out_cleanup; + } + + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + if (IS_ERR(upper)) { + err = PTR_ERR(upper); + upper = NULL; + goto out_cleanup; + } + if (tmpfile) err = ovl_do_link(temp, udir, upper, true); else @@ -329,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, pstat); -out2: +out: dput(temp); -out1: dput(upper); -out: return err; out_cleanup: if (!tmpfile) ovl_cleanup(wdir, temp); - goto out2; + goto out; } /* @@ -372,6 +463,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(parent, upperdir); + if (err) + return err; + err = vfs_getattr(&parentpath, &pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6515796460df..a63a71656e9b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) } } -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, struct dentry *whiteout; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir, dentry); + whiteout = ovl_lookup_temp(workdir); if (IS_ERR(whiteout)) return whiteout; @@ -127,45 +127,26 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, + int xerr) { int err; - err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); return err; } -static int ovl_dir_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) { - struct dentry *dentry = path->dentry; - int err; - enum ovl_path_type type; - struct path realpath; - const struct cred *old_cred; - - type = ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); - revert_creds(old_cred); - if (err) - return err; - - stat->dev = dentry->d_sb->s_dev; - stat->ino = dentry->d_inode->i_ino; - /* - * It's probably not worth it to count subdirs to get the - * correct link count. nlink=1 seems to pacify 'find' and - * other utilities. + * Fail with -EIO when trying to create opaque dir and upper doesn't + * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to + * return a specific error for noxattr case. */ - if (OVL_TYPE_MERGE(type)) - stat->nlink = 1; - - return 0; + return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } /* Common operations required to be done after creation of file on upper */ @@ -182,6 +163,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, inc_nlink(inode); } d_instantiate(dentry, inode); + /* Force lookup of new upper hardlink to find its lower */ + if (hardlink) + d_drop(dentry); } static bool ovl_type_merge(struct dentry *dentry) @@ -189,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry) return OVL_TYPE_MERGE(ovl_path_type(dentry)); } +static bool ovl_type_origin(struct dentry *dentry) +{ + return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -210,7 +199,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (err) goto out_dput; - if (ovl_type_merge(dentry->d_parent)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } @@ -277,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_lookup_temp(workdir, dentry); + opaquedir = ovl_lookup_temp(workdir); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; @@ -409,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - newdentry = ovl_lookup_temp(workdir, dentry); + newdentry = ovl_lookup_temp(workdir); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; @@ -873,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, - redirect, strlen(redirect), 0); + err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + OVL_XATTR_REDIRECT, + redirect, strlen(redirect), -EXDEV); if (!err) { spin_lock(&dentry->d_lock); ovl_dentry_set_redirect(dentry, redirect); spin_unlock(&dentry->d_lock); } else { kfree(redirect); - if (err == -EOPNOTSUPP) - ovl_clear_redirect_dir(dentry->d_sb); - else - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; } @@ -970,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); + if (!samedir) { + /* + * When moving a merge dir or non-dir with copy up origin into + * a new parent, we are marking the new parent dir "impure". + * When ovl_iterate() iterates an "impure" upper dir, it will + * lookup the origin inodes of the entries to fill d_ino. + */ + if (ovl_type_origin(old)) { + err = ovl_set_impure(new->d_parent, new_upperdir); + if (err) + goto out_revert_creds; + } + if (!overwrite && ovl_type_origin(new)) { + err = ovl_set_impure(old->d_parent, old_upperdir); + if (err) + goto out_revert_creds; + } + } + trap = lock_rename(new_upperdir, old_upperdir); olddentry = lookup_one_len(old->d_name.name, old_upperdir, @@ -1019,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (!old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque(old, olddentry); + err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); if (err) goto out_dput; } @@ -1027,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); else if (!new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque(new, newdentry); + err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); if (err) goto out_dput; } @@ -1070,7 +1076,7 @@ const struct inode_operations ovl_dir_inode_operations = { .create = ovl_create, .mknod = ovl_mknod, .permission = ovl_permission, - .getattr = ovl_dir_getattr, + .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f8fe6bf2036d..d613e2c41242 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -57,18 +57,78 @@ out: return err; } -static int ovl_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; + enum ovl_path_type type; struct path realpath; const struct cred *old_cred; + bool is_dir = S_ISDIR(dentry->d_inode->i_mode); int err; - ovl_path_real(dentry, &realpath); + type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getattr(&realpath, stat, request_mask, flags); + if (err) + goto out; + + /* + * When all layers are on the same fs, all real inode number are + * unique, so we use the overlay st_dev, which is friendly to du -x. + * + * We also use st_ino of the copy up origin, if we know it. + * This guaranties constant st_dev/st_ino across copy up. + * + * If filesystem supports NFS export ops, this also guaranties + * persistent st_ino across mount cycle. + */ + if (ovl_same_sb(dentry->d_sb)) { + if (OVL_TYPE_ORIGIN(type)) { + struct kstat lowerstat; + u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); + + ovl_path_lower(dentry, &realpath); + err = vfs_getattr(&realpath, &lowerstat, + lowermask, flags); + if (err) + goto out; + + WARN_ON_ONCE(stat->dev != lowerstat.dev); + /* + * Lower hardlinks are broken on copy up to different + * upper files, so we cannot use the lower origin st_ino + * for those different files, even for the same fs case. + */ + if (is_dir || lowerstat.nlink == 1) + stat->ino = lowerstat.ino; + } + stat->dev = dentry->d_sb->s_dev; + } else if (is_dir) { + /* + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino. + * + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + } + + /* + * It's probably not worth it to count subdirs to get the + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ + if (is_dir && OVL_TYPE_MERGE(type)) + stat->nlink = 1; + +out: revert_creds(old_cred); + return err; } @@ -180,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -203,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { @@ -303,6 +373,41 @@ static const struct inode_operations ovl_symlink_inode_operations = { .update_time = ovl_update_time, }; +/* + * It is possible to stack overlayfs instance on top of another + * overlayfs instance as lower layer. We need to annonate the + * stackable i_mutex locks according to stack level of the super + * block instance. An overlayfs instance can never be in stack + * depth 0 (there is always a real fs below it). An overlayfs + * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. + * + * For example, here is a snip from /proc/lockdep_chains after + * dir_iterate of nested overlayfs: + * + * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) + * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) + * [...] &type->i_mutex_dir_key (stack_depth=0) + */ +#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH + +static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; + static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; + + int depth = inode->i_sb->s_stack_depth - 1; + + if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) + depth = 0; + + if (S_ISDIR(inode->i_mode)) + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); + else + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); +#endif +} + static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_ino = get_next_ino(); @@ -312,6 +417,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif + ovl_lockdep_annotate_inode_mutex_key(inode); + switch (mode & S_IFMT) { case S_IFREG: inode->i_op = &ovl_file_inode_operations; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index b8b077821fb0..de0d4f742f36 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -12,6 +12,8 @@ #include <linux/namei.h> #include <linux/xattr.h> #include <linux/ratelimit.h> +#include <linux/mount.h> +#include <linux/exportfs.h> #include "overlayfs.h" #include "ovl_entry.h" @@ -81,19 +83,93 @@ invalid: goto err_free; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static int ovl_acceptable(void *ctx, struct dentry *dentry) +{ + return 1; +} + +static struct dentry *ovl_get_origin(struct dentry *dentry, + struct vfsmount *mnt) { int res; - char val; + struct ovl_fh *fh = NULL; + struct dentry *origin = NULL; + int bytes; - if (!d_is_dir(dentry)) - return false; + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + if (res < 0) { + if (res == -ENODATA || res == -EOPNOTSUPP) + return NULL; + goto fail; + } + /* Zero size value means "copied up but origin unknown" */ + if (res == 0) + return NULL; - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); - if (res == 1 && val == 'y') - return true; + fh = kzalloc(res, GFP_TEMPORARY); + if (!fh) + return ERR_PTR(-ENOMEM); + + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + if (res < 0) + goto fail; + + if (res < sizeof(struct ovl_fh) || res < fh->len) + goto invalid; + + if (fh->magic != OVL_FH_MAGIC) + goto invalid; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + goto out; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + goto out; + + bytes = (fh->len - offsetof(struct ovl_fh, fid)); + + /* + * Make sure that the stored uuid matches the uuid of the lower + * layer where file handle will be decoded. + */ + if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) + goto out; - return false; + origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, NULL); + if (IS_ERR(origin)) { + /* Treat stale file handle as "origin unknown" */ + if (origin == ERR_PTR(-ESTALE)) + origin = NULL; + goto out; + } + + if (ovl_dentry_weird(origin) || + ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) { + dput(origin); + origin = NULL; + goto invalid; + } + +out: + kfree(fh); + return origin; + +fail: + pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + goto out; +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + goto out; +} + +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); } static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, @@ -192,6 +268,45 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return 0; } + +static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry, + struct path **stackp, unsigned int *ctrp) +{ + struct super_block *same_sb = ovl_same_sb(dentry->d_sb); + struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; + struct vfsmount *mnt; + struct dentry *origin; + + if (!same_sb || !roe->numlower) + return 0; + + /* + * Since all layers are on the same fs, we use the first layer for + * decoding the file handle. We may get a disconnected dentry, + * which is fine, because we only need to hold the origin inode in + * cache and use its inode number. We may even get a connected dentry, + * that is not under the first layer's root. That is also fine for + * using it's inode number - it's the same as if we held a reference + * to a dentry in first layer that was moved under us. + */ + mnt = roe->lowerstack[0].mnt; + + origin = ovl_get_origin(upperdentry, mnt); + if (IS_ERR_OR_NULL(origin)) + return PTR_ERR(origin); + + BUG_ON(*stackp || *ctrp); + *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + if (!*stackp) { + dput(origin); + return -ENOMEM; + } + **stackp = (struct path) { .dentry = origin, .mnt = mnt }; + *ctrp = 1; + + return 0; +} + /* * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. @@ -220,11 +335,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, const struct cred *old_cred; struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; + bool upperimpure = false; char *upperredirect = NULL; struct dentry *this; unsigned int i; @@ -253,15 +370,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = -EREMOTE; goto out; } + if (upperdentry && !d.is_dir) { + BUG_ON(!d.stop || d.redirect); + err = ovl_check_origin(dentry, upperdentry, + &stack, &ctr); + if (err) + goto out; + } if (d.redirect) { upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; if (d.redirect[0] == '/') - poe = dentry->d_sb->s_root->d_fsdata; + poe = roe; } upperopaque = d.opaque; + if (upperdentry && d.is_dir) + upperimpure = ovl_is_impuredir(upperdentry); } if (!d.stop && poe->numlower) { @@ -290,10 +416,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (d.stop) break; - if (d.redirect && - d.redirect[0] == '/' && - poe != dentry->d_sb->s_root->d_fsdata) { - poe = dentry->d_sb->s_root->d_fsdata; + if (d.redirect && d.redirect[0] == '/' && poe != roe) { + poe = roe; /* Find the current layer on the root dentry */ for (i = 0; i < poe->numlower; i++) @@ -332,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, revert_creds(old_cred); oe->opaque = upperopaque; + oe->impure = upperimpure; oe->redirect = upperredirect; oe->__upperdentry = upperdentry; memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 741dc0b6931f..10863b4105fa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -8,18 +8,57 @@ */ #include <linux/kernel.h> +#include <linux/uuid.h> enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), + __OVL_PATH_ORIGIN = (1 << 2), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) +#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" +#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" +#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" + +/* + * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, + * where: + * origin.fh - exported file handle of the lower file + * origin.uuid - uuid of the lower filesystem + */ +#define OVL_FH_VERSION 0 +#define OVL_FH_MAGIC 0xfb + +/* CPU byte order required for fid decoding: */ +#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) +#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) + +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) + +#if defined(__LITTLE_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN 0 +#elif defined(__BIG_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN +#else +#error Endianness not defined +#endif + +/* On-disk and in-memeory format for redirect by file handle */ +struct ovl_fh { + u8 version; /* 0 */ + u8 magic; /* 0xfb */ + u8 len; /* size of this header + size of fid */ + u8 flags; /* OVL_FH_FLAG_* */ + u8 type; /* fid_type of fid */ + uuid_t uuid; /* uuid of filesystem */ + u8 fid[0]; /* file identifier */ +} __packed; #define OVL_ISUPPER_MASK 1UL @@ -151,6 +190,7 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); +struct super_block *ovl_same_sb(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -164,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); +bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); -void ovl_clear_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); @@ -180,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry); void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); + +static inline bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + /* namei.c */ int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -197,6 +248,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -222,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct cattr { dev_t rdev; umode_t mode; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 59614faa14c3..34bc4a9f5c61 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -28,7 +28,10 @@ struct ovl_fs { /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; bool tmpfile; + bool noxattr; wait_queue_head_t copyup_wq; + /* sb common to all layers */ + struct super_block *same_sb; }; /* private information held for every overlayfs dentry */ @@ -40,6 +43,7 @@ struct ovl_entry { u64 version; const char *redirect; bool opaque; + bool impure; bool copying; }; struct rcu_head rcu; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c9e70d39c1ea..4882ffb37bae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -49,11 +49,28 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static int ovl_check_append_only(struct inode *inode, int flag) +{ + /* + * This test was moot in vfs may_open() because overlay inode does + * not have the S_APPEND flag, so re-check on real upper inode + */ + if (IS_APPEND(inode)) { + if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) + return -EPERM; + if (flag & O_TRUNC) + return -EPERM; + } + + return 0; +} + static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode, unsigned int open_flags) { struct dentry *real; + int err; if (!d_is_reg(dentry)) { if (!inode || inode == d_inode(dentry)) @@ -65,15 +82,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry, return dentry; if (open_flags) { - int err = ovl_open_maybe_copy_up(dentry, open_flags); - + err = ovl_open_maybe_copy_up(dentry, open_flags); if (err) return ERR_PTR(err); } real = ovl_dentry_upper(dentry); - if (real && (!inode || inode == d_inode(real))) + if (real && (!inode || inode == d_inode(real))) { + if (!inode) { + err = ovl_check_append_only(d_inode(real), open_flags); + if (err) + return ERR_PTR(err); + } return real; + } real = ovl_dentry_lower(dentry); if (!real) @@ -709,8 +731,8 @@ static const struct xattr_handler *ovl_xattr_handlers[] = { static int ovl_fill_super(struct super_block *sb, void *data, int silent) { - struct path upperpath = { NULL, NULL }; - struct path workpath = { NULL, NULL }; + struct path upperpath = { }; + struct path workpath = { }; struct dentry *root_dentry; struct inode *realinode; struct ovl_entry *oe; @@ -869,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) dput(temp); else pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* + * xattr + */ + err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, + "0", 1, 0); + if (err) { + ufs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); + } } } @@ -892,11 +927,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->lower_mnt[ufs->numlower] = mnt; ufs->numlower++; + + /* Check if all lower layers are on same sb */ + if (i == 0) + ufs->same_sb = mnt->mnt_sb; + else if (ufs->same_sb != mnt->mnt_sb) + ufs->same_sb = NULL; } /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; + else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) + ufs->same_sb = NULL; if (remote) sb->s_d_op = &ovl_reval_dentry_operations; @@ -931,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) path_put(&workpath); kfree(lowertmp); - oe->__upperdentry = upperpath.dentry; + if (upperpath.dentry) { + oe->__upperdentry = upperpath.dentry; + oe->impure = ovl_is_impuredir(upperpath.dentry); + } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 6e610a205e15..809048913889 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -40,6 +40,13 @@ const struct cred *ovl_override_creds(struct super_block *sb) return override_creds(ofs->creator_cred); } +struct super_block *ovl_same_sb(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->same_sb; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -75,11 +82,13 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) type = __OVL_PATH_UPPER; /* - * Non-dir dentry can hold lower dentry from previous - * location. + * Non-dir dentry can hold lower dentry of its copy up origin. */ - if (oe->numlower && d_is_dir(dentry)) - type |= __OVL_PATH_MERGE; + if (oe->numlower) { + type |= __OVL_PATH_ORIGIN; + if (d_is_dir(dentry)) + type |= __OVL_PATH_MERGE; + } } else { if (oe->numlower > 1) type |= __OVL_PATH_MERGE; @@ -100,7 +109,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path) { struct ovl_entry *oe = dentry->d_fsdata; - *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; + *path = oe->numlower ? oe->lowerstack[0] : (struct path) { }; } enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) @@ -166,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry) return oe->opaque; } +bool ovl_dentry_is_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->impure; +} + bool ovl_dentry_is_whiteout(struct dentry *dentry) { return !dentry->d_inode && ovl_dentry_is_opaque(dentry); @@ -182,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - return ofs->config.redirect_dir; -} - -void ovl_clear_redirect_dir(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - ofs->config.redirect_dir = false; + return ofs->config.redirect_dir && !ofs->noxattr; } const char *ovl_dentry_get_redirect(struct dentry *dentry) @@ -294,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry) wake_up_locked(&ofs->copyup_wq); spin_unlock(&ofs->copyup_wq.lock); } + +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, name, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr) +{ + int err; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (ofs->noxattr) + return xerr; + + err = ovl_do_setxattr(upperdentry, name, value, size, 0); + + if (err == -EOPNOTSUPP) { + pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + ofs->noxattr = true; + return xerr; + } + + return err; +} + +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe->impure) + return 0; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + oe->impure = true; + + return err; +} |