summaryrefslogtreecommitdiffstats
path: root/fs/overlayfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/overlayfs')
-rw-r--r--fs/overlayfs/copy_up.c96
-rw-r--r--fs/overlayfs/dir.c12
-rw-r--r--fs/overlayfs/export.c111
-rw-r--r--fs/overlayfs/file.c162
-rw-r--r--fs/overlayfs/inode.c61
-rw-r--r--fs/overlayfs/namei.c110
-rw-r--r--fs/overlayfs/overlayfs.h58
-rw-r--r--fs/overlayfs/ovl_entry.h26
-rw-r--r--fs/overlayfs/readdir.c22
-rw-r--r--fs/overlayfs/super.c316
-rw-r--r--fs/overlayfs/util.c28
11 files changed, 650 insertions, 352 deletions
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b801c6353100..9fc47c2e078d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -24,7 +24,7 @@
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
- pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
+ pr_warn("\"check_copy_up\" module option is obsolete\n");
return 0;
}
@@ -123,6 +123,9 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
+ loff_t data_pos = -1;
+ loff_t hole_len;
+ bool skip_hole = false;
int error = 0;
if (len == 0)
@@ -144,7 +147,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
goto out;
/* Couldn't clone, so now we try to copy the data */
- /* FIXME: copy up sparse files efficiently */
+ /* Check if lower fs supports seek operation */
+ if (old_file->f_mode & FMODE_LSEEK &&
+ old_file->f_op->llseek)
+ skip_hole = true;
+
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes;
@@ -157,6 +164,36 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
break;
}
+ /*
+ * Fill zero for hole will cost unnecessary disk space
+ * and meanwhile slow down the copy-up speed, so we do
+ * an optimization for hole during copy-up, it relies
+ * on SEEK_DATA implementation in lower fs so if lower
+ * fs does not support it, copy-up will behave as before.
+ *
+ * Detail logic of hole detection as below:
+ * When we detect next data position is larger than current
+ * position we will skip that hole, otherwise we copy
+ * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
+ * it may not recognize all kind of holes and sometimes
+ * only skips partial of hole area. However, it will be
+ * enough for most of the use cases.
+ */
+
+ if (skip_hole && data_pos < old_pos) {
+ data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
+ if (data_pos > old_pos) {
+ hole_len = data_pos - old_pos;
+ len -= hole_len;
+ old_pos = new_pos = data_pos;
+ continue;
+ } else if (data_pos == -ENXIO) {
+ break;
+ } else if (data_pos < 0) {
+ skip_hole = false;
+ }
+ }
+
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
@@ -227,13 +264,17 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
- int fh_type, fh_len, dwords;
- void *buf;
+ int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &real->d_sb->s_uuid;
+ int err;
- buf = kmalloc(buflen, GFP_KERNEL);
- if (!buf)
+ /* Make sure the real fid stays 32bit aligned */
+ BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
+ BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
+
+ fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
+ if (!fh)
return ERR_PTR(-ENOMEM);
/*
@@ -242,27 +283,19 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
+ fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
buflen = (dwords << 2);
- fh = ERR_PTR(-EIO);
+ err = -EIO;
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
- goto out;
-
- BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
- fh_len = offsetof(struct ovl_fh, fid) + buflen;
- fh = kmalloc(fh_len, GFP_KERNEL);
- if (!fh) {
- fh = ERR_PTR(-ENOMEM);
- goto out;
- }
+ goto out_err;
- fh->version = OVL_FH_VERSION;
- fh->magic = OVL_FH_MAGIC;
- fh->type = fh_type;
- fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ fh->fb.version = OVL_FH_VERSION;
+ fh->fb.magic = OVL_FH_MAGIC;
+ fh->fb.type = fh_type;
+ fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
@@ -270,14 +303,15 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
- fh->flags |= OVL_FH_FLAG_PATH_UPPER;
- fh->len = fh_len;
- fh->uuid = *uuid;
- memcpy(fh->fid, buf, buflen);
+ fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
+ fh->fb.len = sizeof(fh->fb) + buflen;
+ fh->fb.uuid = *uuid;
-out:
- kfree(buf);
return fh;
+
+out_err:
+ kfree(fh);
+ return ERR_PTR(err);
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
@@ -300,8 +334,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
/*
* Do not fail when upper doesn't support xattrs.
*/
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
- fh ? fh->len : 0, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
+ fh ? fh->fb.len : 0, 0);
kfree(fh);
return err;
@@ -317,7 +351,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+ err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
kfree(fh);
return err;
@@ -483,7 +517,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
}
inode_lock(temp->d_inode);
- if (c->metacopy)
+ if (S_ISREG(c->stat.mode))
err = ovl_set_size(temp, &c->stat);
if (!err)
err = ovl_set_attr(temp, &c->stat);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 702aa63f6774..8e57d5372b8f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -35,7 +35,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
dput(wdentry);
if (err) {
- pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+ pr_err("cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
@@ -53,7 +53,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir)
temp = lookup_one_len(name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
- pr_err("overlayfs: workdir/%s already exists\n", name);
+ pr_err("workdir/%s already exists\n", name);
dput(temp);
temp = ERR_PTR(-EIO);
}
@@ -134,7 +134,7 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
- pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
+ pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
dentry, err);
return PTR_ERR(d);
}
@@ -267,7 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
d_instantiate(dentry, inode);
if (inode != oip.newinode) {
- pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
+ pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
dentry);
}
@@ -1009,7 +1009,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+ pr_warn_ratelimited("failed to set redirect (%i)\n",
err);
/* Fall back to userspace copy-up */
err = -EXDEV;
@@ -1170,7 +1170,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (newdentry == trap)
goto out_dput;
- if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
+ if (olddentry->d_inode == newdentry->d_inode)
goto out_dput;
err = 0;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index cb8ec1f65c03..6f54d70cef27 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -30,7 +30,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
}
if (err) {
- pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
}
@@ -211,10 +211,11 @@ static int ovl_check_encode_origin(struct dentry *dentry)
return 1;
}
-static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
+ int len;
/*
* Check if we should encode a lower or upper file handle and maybe
@@ -227,48 +228,33 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
/* Encode an upper or lower file handle */
fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
ovl_dentry_upper(dentry), !enc_lower);
- err = PTR_ERR(fh);
if (IS_ERR(fh))
- goto fail;
+ return PTR_ERR(fh);
err = -EOVERFLOW;
- if (fh->len > buflen)
+ len = OVL_FH_LEN(fh);
+ if (len > buflen)
goto fail;
- memcpy(buf, (char *)fh, fh->len);
- err = fh->len;
+ memcpy(fid, fh, len);
+ err = len;
out:
kfree(fh);
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
- dentry, err, buflen, fh ? (int)fh->len : 0,
- fh ? fh->type : 0);
+ pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+ dentry, err, buflen, fh ? (int)fh->fb.len : 0,
+ fh ? fh->fb.type : 0);
goto out;
}
-static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
-{
- int res, len = *max_len << 2;
-
- res = ovl_d_to_fh(dentry, (char *)fid, len);
- if (res <= 0)
- return FILEID_INVALID;
-
- len = res;
-
- /* Round up to dwords */
- *max_len = (len + 3) >> 2;
- return OVL_FILEID;
-}
-
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct dentry *dentry;
- int type;
+ int bytes = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
@@ -278,10 +264,14 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
if (WARN_ON(!dentry))
return FILEID_INVALID;
- type = ovl_dentry_to_fh(dentry, fid, max_len);
-
+ bytes = ovl_dentry_to_fid(dentry, fid, bytes);
dput(dentry);
- return type;
+ if (bytes <= 0)
+ return FILEID_INVALID;
+
+ *max_len = bytes >> 2;
+
+ return OVL_FILEID_V1;
}
/*
@@ -368,7 +358,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
*/
static struct dentry *ovl_lookup_real_one(struct dentry *connected,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct inode *dir = d_inode(connected);
struct dentry *this, *parent = NULL;
@@ -416,7 +406,7 @@ out:
return this;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
this = ERR_PTR(err);
goto out;
@@ -424,17 +414,16 @@ fail:
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer);
+ const struct ovl_layer *layer);
/*
* Lookup an indexed or hashed overlay dentry by real inode.
*/
static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
struct dentry *index = NULL;
struct dentry *this = NULL;
struct inode *inode;
@@ -476,7 +465,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
* recursive call walks back from indexed upper to the topmost
* connected/hashed upper parent (or up to root).
*/
- this = ovl_lookup_real(sb, upper, &upper_layer);
+ this = ovl_lookup_real(sb, upper, &ofs->layers[0]);
dput(upper);
}
@@ -497,7 +486,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *next, *parent = NULL;
struct dentry *ancestor = ERR_PTR(-EIO);
@@ -550,7 +539,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *connected;
int err = 0;
@@ -641,7 +630,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
return connected;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
dput(connected);
return ERR_PTR(err);
@@ -656,8 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
struct dentry *index)
{
struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
- struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+ const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
struct dentry *real = upper ?: (index ?: lowerpath->dentry);
/*
@@ -778,24 +766,45 @@ out_err:
goto out;
}
+static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+{
+ struct ovl_fh *fh;
+
+ /* If on-wire inner fid is aligned - nothing to do */
+ if (fh_type == OVL_FILEID_V1)
+ return (struct ovl_fh *)fid;
+
+ if (fh_type != OVL_FILEID_V0)
+ return ERR_PTR(-EINVAL);
+
+ fh = kzalloc(buflen, GFP_KERNEL);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy unaligned inner fh into aligned buffer */
+ memcpy(&fh->fb, fid, buflen - OVL_FH_WIRE_OFFSET);
+ return fh;
+}
+
static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
- struct ovl_fh *fh = (struct ovl_fh *) fid;
+ struct ovl_fh *fh = NULL;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
- err = -EINVAL;
- if (fh_type != OVL_FILEID)
+ fh = ovl_fid_to_fh(fid, len, fh_type);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
goto out_err;
err = ovl_check_fh_len(fh, len);
if (err)
goto out_err;
- flags = fh->flags;
+ flags = fh->fb.flags;
dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
ovl_upper_fh_to_d(sb, fh) :
ovl_lower_fh_to_d(sb, fh);
@@ -803,18 +812,24 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
+out:
+ /* We may have needed to re-align OVL_FILEID_V0 */
+ if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
+ kfree(fh);
+
return dentry;
out_err:
- pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
- len, fh_type, flags, err);
- return ERR_PTR(err);
+ pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+ fh_len, fh_type, flags, err);
+ dentry = ERR_PTR(err);
+ goto out;
}
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
- pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+ pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
return ERR_PTR(-EACCES);
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a635d9ec..a5317216de73 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -9,8 +9,19 @@
#include <linux/xattr.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
+#include <linux/splice.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
#include "overlayfs.h"
+struct ovl_aio_req {
+ struct kiocb iocb;
+ struct kiocb *orig_iocb;
+ struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ovl_inode_upper(inode))
@@ -146,7 +157,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
- ssize_t ret;
+ loff_t ret;
/*
* The two special cases below do not need to involve real fs,
@@ -171,7 +182,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
* limitations that are more strict than ->s_maxbytes for specific
* files, so we use the real file to perform seeks.
*/
- inode_lock(inode);
+ ovl_inode_lock(inode);
real.file->f_pos = file->f_pos;
old_cred = ovl_override_creds(inode->i_sb);
@@ -179,7 +190,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
revert_creds(old_cred);
file->f_pos = real.file->f_pos;
- inode_unlock(inode);
+ ovl_inode_unlock(inode);
fdput(real);
@@ -225,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags;
}
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+ file_end_write(iocb->ki_filp);
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ }
+
+ orig_iocb->ki_pos = iocb->ki_pos;
+ fdput(aio_req->fd);
+ kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct ovl_aio_req *aio_req = container_of(iocb,
+ struct ovl_aio_req, iocb);
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ ovl_aio_cleanup_handler(aio_req);
+ orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -240,10 +278,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ if (is_sync_kiocb(iocb)) {
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
ovl_file_accessed(file);
fdput(real);
@@ -274,15 +330,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- file_end_write(real.file);
+ if (is_sync_kiocb(iocb)) {
+ file_start_write(real.file);
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ file_start_write(real.file);
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
-
fdput(real);
out_unlock:
@@ -291,6 +365,48 @@ out_unlock:
return ret;
}
+static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ ssize_t ret;
+ struct fd real;
+ const struct cred *old_cred;
+
+ ret = ovl_real_fdget(in, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(in)->i_sb);
+ ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
+ revert_creds(old_cred);
+
+ ovl_file_accessed(in);
+ fdput(real);
+ return ret;
+}
+
+static ssize_t
+ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ ret = ovl_real_fdget(out, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(out)->i_sb);
+ ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+ revert_creds(old_cred);
+
+ ovl_file_accessed(out);
+ fdput(real);
+ return ret;
+}
+
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
@@ -647,7 +763,25 @@ const struct file_operations ovl_file_operations = {
.fadvise = ovl_fadvise,
.unlocked_ioctl = ovl_ioctl,
.compat_ioctl = ovl_compat_ioctl,
+ .splice_read = ovl_splice_read,
+ .splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
};
+
+int __init ovl_aio_request_cache_init(void)
+{
+ ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+ sizeof(struct ovl_aio_req),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ovl_aio_request_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+ kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 7663aeb85fa3..79e8994e3bc1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -75,10 +75,9 @@ out:
return err;
}
-static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
- struct ovl_layer *lower_layer)
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{
- bool samefs = ovl_same_sb(dentry->d_sb);
+ bool samefs = ovl_same_fs(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
if (samefs) {
@@ -100,12 +99,10 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
* persistent for a given layer configuration.
*/
if (stat->ino >> shift) {
- pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+ pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits);
} else {
- if (lower_layer)
- stat->ino |= ((u64)lower_layer->fsid) << shift;
-
+ stat->ino |= ((u64)fsid) << shift;
stat->dev = dentry->d_sb->s_dev;
return 0;
}
@@ -124,15 +121,14 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
- } else if (lower_layer && lower_layer->fsid) {
+ } else {
/*
* For non-samefs setup, if we cannot map all layers st_ino
* to a unified address space, we need to make sure that st_dev
- * is unique per lower fs. Upper layer uses real st_dev and
- * lower layers use the unique anonymous bdev assigned to the
- * lower fs.
+ * is unique per underlying fs, so we use the unique anonymous
+ * bdev assigned to the underlying fs.
*/
- stat->dev = lower_layer->fs->pseudo_dev;
+ stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
}
return 0;
@@ -146,8 +142,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
- bool samefs = ovl_same_sb(dentry->d_sb);
- struct ovl_layer *lower_layer = NULL;
+ int fsid = 0;
int err;
bool metacopy_blocks = false;
@@ -168,9 +163,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
* If lower filesystem supports NFS file handles, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+ if (!is_dir || ovl_same_dev(dentry->d_sb)) {
if (!OVL_TYPE_UPPER(type)) {
- lower_layer = ovl_layer_lower(dentry);
+ fsid = ovl_layer_lower(dentry)->fsid;
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | STATX_BLOCKS |
@@ -200,8 +195,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
+ fsid = ovl_layer_lower(dentry)->fsid;
stat->ino = lowerstat.ino;
- lower_layer = ovl_layer_lower(dentry);
}
/*
@@ -235,7 +230,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
}
}
- err = ovl_map_dev_ino(dentry, stat, lower_layer);
+ err = ovl_map_dev_ino(dentry, stat, fsid);
if (err)
goto out;
@@ -383,7 +378,8 @@ static bool ovl_can_list(const char *s)
return true;
/* Never list trusted.overlay, list other trusted for superuser only */
- return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+ return !ovl_is_private_xattr(s) &&
+ ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
@@ -520,6 +516,27 @@ static const struct address_space_operations ovl_aops = {
* [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
* [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
* [...] &type->i_mutex_dir_key (stack_depth=0)
+ *
+ * Locking order w.r.t ovl_want_write() is important for nested overlayfs.
+ *
+ * This chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * And this chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - lowerinode->i_rwsem (inode_lock[1])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is
+ * held, because it is in reverse order of the non-nested case using the same
+ * upper fs:
+ * - inode->i_rwsem (inode_lock[1])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[1])
*/
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
@@ -558,7 +575,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
* ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
* upper inode i_ino on ovl_inode_init() or ovl_inode_update().
*/
- if (ovl_same_sb(inode->i_sb) || xinobits) {
+ if (ovl_same_dev(inode->i_sb)) {
inode->i_ino = ino;
if (xinobits && fsid && !(ino >> (64 - xinobits)))
inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
@@ -691,7 +708,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
return nlink;
fail:
- pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n",
upperdentry, err);
return fallback;
}
@@ -962,7 +979,7 @@ out:
return inode;
out_err:
- pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err);
+ pr_warn_ratelimited("failed to get inode (%i)\n", err);
inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index e9717c2f7d45..ed9e129fae04 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -84,21 +84,21 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
* Return -ENODATA for "origin unknown".
* Return <0 for an invalid file handle.
*/
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
{
- if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+ if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
return -EINVAL;
- if (fh->magic != OVL_FH_MAGIC)
+ if (fb->magic != OVL_FH_MAGIC)
return -EINVAL;
/* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
return -ENODATA;
/* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
return -ENODATA;
return 0;
@@ -119,15 +119,15 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_KERNEL);
+ fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh, res);
+ res = vfs_getxattr(dentry, name, fh->buf, res);
if (res < 0)
goto fail;
- err = ovl_check_fh_len(fh, res);
+ err = ovl_check_fb_len(&fh->fb, res);
if (err < 0) {
if (err == -ENODATA)
goto out;
@@ -141,10 +141,10 @@ out:
return NULL;
fail:
- pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ pr_warn_ratelimited("failed to get origin (%i)\n", res);
goto out;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
goto out;
}
@@ -158,12 +158,12 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
- if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+ if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
return NULL;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
- real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
+ bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+ bytes >> 2, (int)fh->fb.type,
connected ? ovl_acceptable : NULL, mnt);
if (IS_ERR(real)) {
/*
@@ -173,7 +173,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* index entries correctly.
*/
if (real == ERR_PTR(-ESTALE) &&
- !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+ !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
real = NULL;
return real;
}
@@ -200,7 +200,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
int err;
bool last_element = !post[0];
- this = lookup_one_len_unlocked(name, base, namelen);
+ this = lookup_positive_unlocked(name, base, namelen);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -208,8 +208,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out;
goto out_err;
}
- if (!this->d_inode)
- goto put_and_out;
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
@@ -324,8 +322,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *origin = NULL;
int i;
- for (i = 0; i < ofs->numlower; i++) {
- origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+ for (i = 1; i < ofs->numlayer; i++) {
+ /*
+ * If lower fs uuid is not unique among lower fs we cannot match
+ * fh->uuid to layer.
+ */
+ if (ofs->layers[i].fsid &&
+ ofs->layers[i].fs->bad_uuid)
+ continue;
+
+ origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
connected);
if (origin)
break;
@@ -348,13 +354,13 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
}
**stackp = (struct ovl_path){
.dentry = origin,
- .layer = &ofs->lower_layers[i]
+ .layer = &ofs->layers[i]
};
return 0;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
dput(origin);
@@ -402,7 +408,7 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
if (IS_ERR(ofh))
return PTR_ERR(ofh);
- if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
err = -ESTALE;
kfree(ofh);
@@ -433,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
err = ovl_verify_fh(dentry, name, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
+ err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
if (err)
goto fail;
@@ -443,7 +449,7 @@ out:
fail:
inode = d_inode(real);
- pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+ pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
is_upper ? "upper" : "origin", real,
inode ? inode->i_ino : 0, err);
goto out;
@@ -469,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
return upper ?: ERR_PTR(-ESTALE);
if (!d_is_dir(upper)) {
- pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+ pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
index, upper);
dput(upper);
return ERR_PTR(-EIO);
@@ -507,20 +513,20 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
err = -EINVAL;
- if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
- fh = kzalloc(len, GFP_KERNEL);
+ fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len))
+ if (hex2bin(fh->buf, index->d_name.name, len))
goto fail;
- err = ovl_check_fh_len(fh, len);
+ err = ovl_check_fb_len(&fh->fb, len);
if (err)
goto fail;
@@ -583,12 +589,12 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+ pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
orphan:
- pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+ pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(index)->i_nlink);
err = -ENOENT;
@@ -599,11 +605,11 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
{
char *n, *s;
- n = kcalloc(fh->len, 2, GFP_KERNEL);
+ n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
if (!n)
return -ENOMEM;
- s = bin2hex(n, fh, fh->len);
+ s = bin2hex(n, fh->buf, fh->fb.len);
*name = (struct qstr) QSTR_INIT(n, s - n);
return 0;
@@ -651,7 +657,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
kfree(name.name);
if (IS_ERR(index)) {
if (PTR_ERR(index) == -ENOENT)
@@ -659,9 +665,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
return index;
}
- if (d_is_negative(index))
- err = 0;
- else if (ovl_is_whiteout(index))
+ if (ovl_is_whiteout(index))
err = -ESTALE;
else if (ovl_dentry_weird(index))
err = -EIO;
@@ -685,14 +689,14 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
index = NULL;
goto out;
}
- pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+ pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
@@ -700,9 +704,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}
inode = d_inode(index);
- if (d_is_negative(index)) {
- goto out_dput;
- } else if (ovl_is_whiteout(index) && !verify) {
+ if (ovl_is_whiteout(index) && !verify) {
/*
* When index lookup is called with !verify for decoding an
* overlay file handle, a whiteout index implies that decode
@@ -721,13 +723,13 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
* unlinked, which means that finding a lower origin on lookup
* whose index is a whiteout should be treated as an error.
*/
- pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
} else if (is_dir && verify) {
if (!upper) {
- pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
origin, index);
goto fail;
}
@@ -736,7 +738,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
err = ovl_verify_upper(index, upper, false);
if (err) {
if (err == -ESTALE) {
- pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
upper, origin, index);
}
goto fail;
@@ -883,7 +885,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!d.stop && poe->numlower) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
+ stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
GFP_KERNEL);
if (!stack)
goto out_put_upper;
@@ -965,7 +967,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
*/
err = -EPERM;
if (d.redirect && !ofs->config.redirect_follow) {
- pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+ pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
dentry);
goto out_put;
}
@@ -992,7 +994,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = -EPERM;
if (!ofs->config.metacopy) {
- pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
dentry);
goto out_put;
}
@@ -1131,7 +1133,7 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;
- this = lookup_one_len_unlocked(name->name, lowerdir,
+ this = lookup_positive_unlocked(name->name, lowerdir,
name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
@@ -1148,10 +1150,8 @@ bool ovl_lower_positive(struct dentry *dentry)
break;
}
} else {
- if (this->d_inode) {
- positive = !ovl_is_whiteout(this);
- done = true;
- }
+ positive = !ovl_is_whiteout(this);
+ done = true;
dput(this);
}
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf030f0..3623d28aa4fa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,9 @@
#include <linux/fs.h>
#include "ovl_entry.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "overlayfs: " fmt
+
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
@@ -71,20 +74,36 @@ enum ovl_entry_flag {
#error Endianness not defined
#endif
-/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
-#define OVL_FILEID 0xfb
+/* The type used to be returned by overlay exportfs for misaligned fid */
+#define OVL_FILEID_V0 0xfb
+/* The type returned by overlay exportfs for 32bit aligned fid */
+#define OVL_FILEID_V1 0xf8
-/* On-disk and in-memeory format for redirect by file handle */
-struct ovl_fh {
+/* On-disk format for "origin" file handle */
+struct ovl_fb {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
- u8 fid[0]; /* file identifier */
+ u32 fid[0]; /* file identifier should be 32bit aligned in-memory */
+} __packed;
+
+/* In-memory and on-wire format for overlay file handle */
+struct ovl_fh {
+ u8 padding[3]; /* make sure fb.fid is 32bit aligned */
+ union {
+ struct ovl_fb fb;
+ u8 buf[0];
+ };
} __packed;
+#define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb)
+#define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len)
+#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
+ offsetof(struct ovl_fb, fid))
+
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
@@ -205,7 +224,6 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
-struct super_block *ovl_same_sb(struct super_block *sb);
int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
@@ -221,7 +239,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
@@ -283,11 +301,21 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
}
-static inline unsigned int ovl_xino_bits(struct super_block *sb)
+/* All layers on same fs? */
+static inline bool ovl_same_fs(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ return OVL_FS(sb)->xino_mode == 0;
+}
- return ofs->xino_bits;
+/* All overlay inodes have same st_dev? */
+static inline bool ovl_same_dev(struct super_block *sb)
+{
+ return OVL_FS(sb)->xino_mode >= 0;
+}
+
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+ return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
}
static inline int ovl_inode_lock(struct inode *inode)
@@ -302,7 +330,13 @@ static inline void ovl_inode_unlock(struct inode *inode)
/* namei.c */
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
+
+static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+ return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
+}
+
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
@@ -416,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */
extern const struct file_operations ovl_file_operations;
+int __init ovl_aio_request_cache_init(void);
+void ovl_aio_request_cache_destroy(void);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 28a2d12a1029..89015ea822e7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -22,6 +22,10 @@ struct ovl_config {
struct ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
+ /* Unusable (conflicting) uuid */
+ bool bad_uuid;
+ /* Used as a lower layer (but maybe also as upper) */
+ bool is_lower;
};
struct ovl_layer {
@@ -36,18 +40,18 @@ struct ovl_layer {
};
struct ovl_path {
- struct ovl_layer *layer;
+ const struct ovl_layer *layer;
struct dentry *dentry;
};
/* private information held for overlayfs's superblock */
struct ovl_fs {
struct vfsmount *upper_mnt;
- unsigned int numlower;
- /* Number of unique lower sb that differ from upper sb */
- unsigned int numlowerfs;
- struct ovl_layer *lower_layers;
- struct ovl_sb *lower_fs;
+ unsigned int numlayer;
+ /* Number of unique fs among layers including upper fs */
+ unsigned int numfs;
+ const struct ovl_layer *layers;
+ struct ovl_sb *fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
@@ -66,12 +70,18 @@ struct ovl_fs {
bool workdir_locked;
/* Traps in ovl inode cache */
struct inode *upperdir_trap;
+ struct inode *workbasedir_trap;
struct inode *workdir_trap;
struct inode *indexdir_trap;
- /* Inode numbers in all layers do not use the high xino_bits */
- unsigned int xino_bits;
+ /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
+ int xino_mode;
};
+static inline struct ovl_fs *OVL_FS(struct super_block *sb)
+{
+ return (struct ovl_fs *)sb->s_fs_info;
+}
+
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 47a91c9733a5..40ac9ce2465a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -441,7 +441,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
const char *name, int namelen)
{
if (ino >> (64 - xinobits)) {
- pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+ pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
namelen, name, ino, xinobits);
return ino;
}
@@ -469,7 +469,7 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
int xinobits = ovl_xino_bits(dir->d_sb);
int err = 0;
- if (!ovl_same_sb(dir->d_sb) && !xinobits)
+ if (!ovl_same_dev(dir->d_sb))
goto out;
if (p->name[0] == '.') {
@@ -504,7 +504,13 @@ get:
if (err)
goto fail;
- WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+ /*
+ * Directory inode is always on overlay st_dev.
+ * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+ * of xino bits overflow.
+ */
+ WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+ dir->d_sb->s_dev != stat.dev);
ino = stat.ino;
} else if (xinobits && !OVL_TYPE_UPPER(type)) {
ino = ovl_remap_lower_ino(ino, xinobits,
@@ -518,7 +524,7 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
+ pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
p->name, err);
goto out;
}
@@ -685,7 +691,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
int err;
struct ovl_dir_file *od = file->private_data;
struct dentry *dir = file->f_path.dentry;
- struct ovl_layer *lower_layer = ovl_layer_lower(dir);
+ const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
struct ovl_readdir_translate rdt = {
.ctx.actor = ovl_fill_real,
.orig_ctx = ctx,
@@ -738,7 +744,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
* entries.
*/
if (ovl_xino_bits(dentry->d_sb) ||
- (ovl_same_sb(dentry->d_sb) &&
+ (ovl_same_fs(dentry->d_sb) &&
(ovl_is_impure_dir(file) ||
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
return ovl_iterate_real(file, ctx);
@@ -965,7 +971,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
dentry = lookup_one_len(p->name, upper, p->len);
if (IS_ERR(dentry)) {
- pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+ pr_err("lookup '%s/%.*s' failed (%i)\n",
upper->d_name.name, p->len, p->name,
(int) PTR_ERR(dentry));
continue;
@@ -1147,6 +1153,6 @@ next:
out:
ovl_cache_free(&list);
if (err)
- pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+ pr_err("failed index dir cleanup (%i)\n", err);
return err;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b368e2e102fa..319fe0d355b0 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -212,6 +212,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
{
unsigned i;
+ iput(ofs->workbasedir_trap);
iput(ofs->indexdir_trap);
iput(ofs->workdir_trap);
iput(ofs->upperdir_trap);
@@ -223,14 +224,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
if (ofs->upperdir_locked)
ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
mntput(ofs->upper_mnt);
- for (i = 0; i < ofs->numlower; i++) {
- iput(ofs->lower_layers[i].trap);
- mntput(ofs->lower_layers[i].mnt);
+ for (i = 1; i < ofs->numlayer; i++) {
+ iput(ofs->layers[i].trap);
+ mntput(ofs->layers[i].mnt);
}
- for (i = 0; i < ofs->numlowerfs; i++)
- free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
- kfree(ofs->lower_layers);
- kfree(ofs->lower_fs);
+ kfree(ofs->layers);
+ for (i = 0; i < ofs->numfs; i++)
+ free_anon_bdev(ofs->fs[i].pseudo_dev);
+ kfree(ofs->fs);
kfree(ofs->config.lowerdir);
kfree(ofs->config.upperdir);
@@ -357,7 +358,7 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ofs->config.nfs_export != ovl_nfs_export_def)
seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
"on" : "off");
- if (ofs->config.xino != ovl_xino_def())
+ if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s",
@@ -461,7 +462,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
if (ovl_redirect_always_follow)
config->redirect_follow = true;
} else if (strcmp(mode, "nofollow") != 0) {
- pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+ pr_err("bad mount option \"redirect_dir=%s\"\n",
mode);
return -EINVAL;
}
@@ -559,14 +560,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
break;
default:
- pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
+ pr_err("unrecognized mount option \"%s\" or missing value\n",
+ p);
return -EINVAL;
}
}
/* Workdir is useless in non-upper mount */
if (!config->upperdir && config->workdir) {
- pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
config->workdir);
kfree(config->workdir);
config->workdir = NULL;
@@ -586,7 +588,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
/* Resolve metacopy -> redirect_dir dependency */
if (config->metacopy && !config->redirect_dir) {
if (metacopy_opt && redirect_opt) {
- pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+ pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
config->redirect_mode);
return -EINVAL;
}
@@ -595,7 +597,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
* There was an explicit redirect_dir=... that resulted
* in this conflict.
*/
- pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+ pr_info("disabling metacopy due to redirect_dir=%s\n",
config->redirect_mode);
config->metacopy = false;
} else {
@@ -691,7 +693,7 @@ out_unlock:
out_dput:
dput(work);
out_err:
- pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
ofs->config.workdir, name, -err);
work = NULL;
goto out_unlock;
@@ -715,21 +717,21 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
int err = -EINVAL;
if (!*name) {
- pr_err("overlayfs: empty lowerdir\n");
+ pr_err("empty lowerdir\n");
goto out;
}
err = kern_path(name, LOOKUP_FOLLOW, path);
if (err) {
- pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
+ pr_err("failed to resolve '%s': %i\n", name, err);
goto out;
}
err = -EINVAL;
if (ovl_dentry_weird(path->dentry)) {
- pr_err("overlayfs: filesystem on '%s' not supported\n", name);
+ pr_err("filesystem on '%s' not supported\n", name);
goto out_put;
}
if (!d_is_dir(path->dentry)) {
- pr_err("overlayfs: '%s' not a directory\n", name);
+ pr_err("'%s' not a directory\n", name);
goto out_put;
}
return 0;
@@ -751,7 +753,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
if (!err)
if (ovl_dentry_remote(path->dentry)) {
- pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
+ pr_err("filesystem on '%s' not supported as upperdir\n",
tmp);
path_put_init(path);
err = -EINVAL;
@@ -768,7 +770,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
int err = vfs_statfs(path, &statfs);
if (err)
- pr_err("overlayfs: statfs failed on '%s'\n", name);
+ pr_err("statfs failed on '%s'\n", name);
else
ofs->namelen = max(ofs->namelen, statfs.f_namelen);
@@ -803,13 +805,13 @@ static int ovl_lower_dir(const char *name, struct path *path,
(ofs->config.index && ofs->config.upperdir)) && !fh_type) {
ofs->config.index = false;
ofs->config.nfs_export = false;
- pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+ pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
name);
}
/* Check if lower fs has 32bit inode numbers */
if (fh_type != FILEID_INO32_GEN)
- ofs->xino_bits = 0;
+ ofs->xino_mode = -1;
return 0;
@@ -995,7 +997,7 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
err = PTR_ERR_OR_ZERO(trap);
if (err) {
if (err == -ELOOP)
- pr_err("overlayfs: conflicting %s path\n", name);
+ pr_err("conflicting %s path\n", name);
return err;
}
@@ -1003,6 +1005,25 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
return 0;
}
+/*
+ * Determine how we treat concurrent use of upperdir/workdir based on the
+ * index feature. This is papering over mount leaks of container runtimes,
+ * for example, an old overlay mount is leaked and now its upperdir is
+ * attempted to be used as a lower layer in a new overlay mount.
+ */
+static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
+{
+ if (ofs->config.index) {
+ pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
+ name);
+ return -EBUSY;
+ } else {
+ pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
+ name);
+ return 0;
+ }
+}
+
static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
struct path *upperpath)
{
@@ -1015,7 +1036,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
/* Upper fs should not be r/o */
if (sb_rdonly(upperpath->mnt->mnt_sb)) {
- pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+ pr_err("upper fs is r/o, try multi-lower layers mount\n");
err = -EINVAL;
goto out;
}
@@ -1032,7 +1053,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
upper_mnt = clone_private_mount(upperpath);
err = PTR_ERR(upper_mnt);
if (IS_ERR(upper_mnt)) {
- pr_err("overlayfs: failed to clone upperpath\n");
+ pr_err("failed to clone upperpath\n");
goto out;
}
@@ -1040,14 +1061,12 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
ofs->upper_mnt = upper_mnt;
- err = -EBUSY;
if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
ofs->upperdir_locked = true;
- } else if (ofs->config.index) {
- pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
- goto out;
} else {
- pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ err = ovl_report_in_use(ofs, "upperdir");
+ if (err)
+ goto out;
}
err = 0;
@@ -1090,7 +1109,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
* kernel upgrade. So warn instead of erroring out.
*/
if (!err)
- pr_warn("overlayfs: upper fs needs to support d_type.\n");
+ pr_warn("upper fs needs to support d_type.\n");
/* Check if upper/work fs supports O_TMPFILE */
temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
@@ -1098,7 +1117,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
if (ofs->tmpfile)
dput(temp);
else
- pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+ pr_warn("upper fs does not support tmpfile.\n");
/*
* Check if upper/work fs supports trusted.overlay.* xattr
@@ -1108,7 +1127,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
ofs->noxattr = true;
ofs->config.index = false;
ofs->config.metacopy = false;
- pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
+ pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
err = 0;
} else {
vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
@@ -1118,16 +1137,16 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
if (ofs->config.index && !fh_type) {
ofs->config.index = false;
- pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ pr_warn("upper fs does not support file handles, falling back to index=off.\n");
}
/* Check if upper fs has 32bit inode numbers */
if (fh_type != FILEID_INO32_GEN)
- ofs->xino_bits = 0;
+ ofs->xino_mode = -1;
/* NFS export of r/w mount depends on index */
if (ofs->config.nfs_export && !ofs->config.index) {
- pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+ pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
out:
@@ -1147,26 +1166,29 @@ static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
err = -EINVAL;
if (upperpath->mnt != workpath.mnt) {
- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+ pr_err("workdir and upperdir must reside under the same mount\n");
goto out;
}
if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+ pr_err("workdir and upperdir must be separate subtrees\n");
goto out;
}
ofs->workbasedir = dget(workpath.dentry);
- err = -EBUSY;
if (ovl_inuse_trylock(ofs->workbasedir)) {
ofs->workdir_locked = true;
- } else if (ofs->config.index) {
- pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
- goto out;
} else {
- pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
+ err = ovl_report_in_use(ofs, "workdir");
+ if (err)
+ goto out;
}
+ err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
+ "workdir");
+ if (err)
+ goto out;
+
err = ovl_make_workdir(sb, ofs, &workpath);
out:
@@ -1189,7 +1211,7 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
true);
if (err) {
- pr_err("overlayfs: failed to verify upper root origin\n");
+ pr_err("failed to verify upper root origin\n");
goto out;
}
@@ -1212,18 +1234,18 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
upperpath->dentry, true, false);
if (err)
- pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+ pr_err("failed to verify index dir 'origin' xattr\n");
}
err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
if (err)
- pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
+ pr_err("failed to verify index dir 'upper' xattr\n");
/* Cleanup bad/stale/orphan index entries */
if (!err)
err = ovl_indexdir_cleanup(ofs);
}
if (err || !ofs->indexdir)
- pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
out:
mnt_drop_write(mnt);
@@ -1234,17 +1256,22 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
- if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+ if (!ofs->config.nfs_export && !ofs->upper_mnt)
return true;
- for (i = 0; i < ofs->numlowerfs; i++) {
+ for (i = 0; i < ofs->numfs; i++) {
/*
* We use uuid to associate an overlay lower file handle with a
* lower layer, so we can accept lower fs with null uuid as long
* as all lower layers with null uuid are on the same fs.
+ * if we detect multiple lower fs with the same uuid, we
+ * disable lower file handle decoding on all of them.
*/
- if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+ if (ofs->fs[i].is_lower &&
+ uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
+ ofs->fs[i].bad_uuid = true;
return false;
+ }
}
return true;
}
@@ -1256,53 +1283,79 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
unsigned int i;
dev_t dev;
int err;
+ bool bad_uuid = false;
- /* fsid 0 is reserved for upper fs even with non upper overlay */
- if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
- return 0;
-
- for (i = 0; i < ofs->numlowerfs; i++) {
- if (ofs->lower_fs[i].sb == sb)
- return i + 1;
+ for (i = 0; i < ofs->numfs; i++) {
+ if (ofs->fs[i].sb == sb)
+ return i;
}
if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
- ofs->config.index = false;
- ofs->config.nfs_export = false;
- pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
- uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
- path->dentry);
+ bad_uuid = true;
+ if (ofs->config.index || ofs->config.nfs_export) {
+ ofs->config.index = false;
+ ofs->config.nfs_export = false;
+ pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+ uuid_is_null(&sb->s_uuid) ? "null" :
+ "conflicting",
+ path->dentry);
+ }
}
err = get_anon_bdev(&dev);
if (err) {
- pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+ pr_err("failed to get anonymous bdev for lowerpath\n");
return err;
}
- ofs->lower_fs[ofs->numlowerfs].sb = sb;
- ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
- ofs->numlowerfs++;
+ ofs->fs[ofs->numfs].sb = sb;
+ ofs->fs[ofs->numfs].pseudo_dev = dev;
+ ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
- return ofs->numlowerfs;
+ return ofs->numfs++;
}
-static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
- struct path *stack, unsigned int numlower)
+static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
+ struct path *stack, unsigned int numlower)
{
int err;
unsigned int i;
+ struct ovl_layer *layers;
err = -ENOMEM;
- ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
- GFP_KERNEL);
- if (ofs->lower_layers == NULL)
+ layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+ if (!layers)
+ goto out;
+ ofs->layers = layers;
+
+ ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
+ if (ofs->fs == NULL)
goto out;
- ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
- GFP_KERNEL);
- if (ofs->lower_fs == NULL)
+ /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
+ ofs->numfs++;
+
+ layers[0].mnt = ofs->upper_mnt;
+ layers[0].idx = 0;
+ layers[0].fsid = 0;
+ ofs->numlayer = 1;
+
+ /*
+ * All lower layers that share the same fs as upper layer, use the same
+ * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
+ * only overlay to simplify ovl_fs_free().
+ * is_lower will be set if upper fs is shared with a lower layer.
+ */
+ err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
+ if (err) {
+ pr_err("failed to get anonymous bdev for upper fs\n");
goto out;
+ }
+
+ if (ofs->upper_mnt) {
+ ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
+ ofs->fs[0].is_lower = false;
+ }
for (i = 0; i < numlower; i++) {
struct vfsmount *mnt;
@@ -1313,20 +1366,20 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
if (err < 0)
goto out;
- err = -EBUSY;
- if (ovl_is_inuse(stack[i].dentry)) {
- pr_err("overlayfs: lowerdir is in-use as upperdir/workdir\n");
- goto out;
- }
-
err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
if (err)
goto out;
+ if (ovl_is_inuse(stack[i].dentry)) {
+ err = ovl_report_in_use(ofs, "lowerdir");
+ if (err)
+ goto out;
+ }
+
mnt = clone_private_mount(&stack[i]);
err = PTR_ERR(mnt);
if (IS_ERR(mnt)) {
- pr_err("overlayfs: failed to clone lowerpath\n");
+ pr_err("failed to clone lowerpath\n");
iput(trap);
goto out;
}
@@ -1337,15 +1390,13 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
*/
mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
- ofs->lower_layers[ofs->numlower].trap = trap;
- ofs->lower_layers[ofs->numlower].mnt = mnt;
- ofs->lower_layers[ofs->numlower].idx = i + 1;
- ofs->lower_layers[ofs->numlower].fsid = fsid;
- if (fsid) {
- ofs->lower_layers[ofs->numlower].fs =
- &ofs->lower_fs[fsid - 1];
- }
- ofs->numlower++;
+ layers[ofs->numlayer].trap = trap;
+ layers[ofs->numlayer].mnt = mnt;
+ layers[ofs->numlayer].idx = ofs->numlayer;
+ layers[ofs->numlayer].fsid = fsid;
+ layers[ofs->numlayer].fs = &ofs->fs[fsid];
+ ofs->numlayer++;
+ ofs->fs[fsid].is_lower = true;
}
/*
@@ -1356,22 +1407,23 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
* bits reserved for fsid, it emits a warning and uses the original
* inode number.
*/
- if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
- ofs->xino_bits = 0;
- ofs->config.xino = OVL_XINO_OFF;
- } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+ if (ofs->numfs - !ofs->upper_mnt == 1) {
+ if (ofs->config.xino == OVL_XINO_ON)
+ pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
+ ofs->xino_mode = 0;
+ } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/*
- * This is a roundup of number of bits needed for numlowerfs+1
- * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
- * upper fs even with non upper overlay.
+ * This is a roundup of number of bits needed for encoding
+ * fsid, where fsid 0 is reserved for upper fs even with
+ * lower only overlay.
*/
BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
- ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+ ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
}
- if (ofs->xino_bits) {
- pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
- ofs->xino_bits);
+ if (ofs->xino_mode > 0) {
+ pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
+ ofs->xino_mode);
}
err = 0;
@@ -1397,15 +1449,15 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
if (stacklen > OVL_MAX_STACK) {
- pr_err("overlayfs: too many lower directories, limit is %d\n",
+ pr_err("too many lower directories, limit is %d\n",
OVL_MAX_STACK);
goto out_err;
} else if (!ofs->config.upperdir && stacklen == 1) {
- pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+ pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
goto out_err;
} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
ofs->config.redirect_follow) {
- pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
@@ -1428,11 +1480,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
err = -EINVAL;
sb->s_stack_depth++;
if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
- pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+ pr_err("maximum fs stacking depth exceeded\n");
goto out_err;
}
- err = ovl_get_lower_layers(sb, ofs, stack, numlower);
+ err = ovl_get_layers(sb, ofs, stack, numlower);
if (err)
goto out_err;
@@ -1443,7 +1495,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = dget(stack[i].dentry);
- oe->lowerstack[i].layer = &ofs->lower_layers[i];
+ oe->lowerstack[i].layer = &ofs->layers[i+1];
}
if (remote)
@@ -1469,8 +1521,8 @@ out_err:
* - another layer of this overlayfs instance
* - upper/work dir of any overlayfs instance
*/
-static int ovl_check_layer(struct super_block *sb, struct dentry *dentry,
- const char *name)
+static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
+ struct dentry *dentry, const char *name)
{
struct dentry *next = dentry, *parent;
int err = 0;
@@ -1482,13 +1534,11 @@ static int ovl_check_layer(struct super_block *sb, struct dentry *dentry,
/* Walk back ancestors to root (inclusive) looking for traps */
while (!err && parent != next) {
- if (ovl_is_inuse(parent)) {
- err = -EBUSY;
- pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n",
- name);
- } else if (ovl_lookup_trap_inode(sb, parent)) {
+ if (ovl_lookup_trap_inode(sb, parent)) {
err = -ELOOP;
- pr_err("overlayfs: overlapping %s path\n", name);
+ pr_err("overlapping %s path\n", name);
+ } else if (ovl_is_inuse(parent)) {
+ err = ovl_report_in_use(ofs, name);
}
next = parent;
parent = dget_parent(next);
@@ -1509,7 +1559,8 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
int i, err;
if (ofs->upper_mnt) {
- err = ovl_check_layer(sb, ofs->upper_mnt->mnt_root, "upperdir");
+ err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
+ "upperdir");
if (err)
return err;
@@ -1520,13 +1571,14 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
* workbasedir. In that case, we already have their traps in
* inode cache and we will catch that case on lookup.
*/
- err = ovl_check_layer(sb, ofs->workbasedir, "workdir");
+ err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
if (err)
return err;
}
- for (i = 0; i < ofs->numlower; i++) {
- err = ovl_check_layer(sb, ofs->lower_layers[i].mnt->mnt_root,
+ for (i = 1; i < ofs->numlayer; i++) {
+ err = ovl_check_layer(sb, ofs,
+ ofs->layers[i].mnt->mnt_root,
"lowerdir");
if (err)
return err;
@@ -1564,7 +1616,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
err = -EINVAL;
if (!ofs->config.lowerdir) {
if (!silent)
- pr_err("overlayfs: missing 'lowerdir'\n");
+ pr_err("missing 'lowerdir'\n");
goto out_err;
}
@@ -1572,14 +1624,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
if (ofs->config.xino != OVL_XINO_OFF)
- ofs->xino_bits = BITS_PER_LONG - 32;
+ ofs->xino_mode = BITS_PER_LONG - 32;
/* alloc/destroy_inode needed for setting up traps in inode cache */
sb->s_op = &ovl_super_operations;
if (ofs->config.upperdir) {
if (!ofs->config.workdir) {
- pr_err("overlayfs: missing 'workdir'\n");
+ pr_err("missing 'workdir'\n");
goto out_err;
}
@@ -1629,13 +1681,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ofs->indexdir) {
ofs->config.index = false;
if (ofs->upper_mnt && ofs->config.nfs_export) {
- pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+ pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
}
if (ofs->config.metacopy && ofs->config.nfs_export) {
- pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+ pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
@@ -1718,9 +1770,15 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL)
return -ENOMEM;
- err = register_filesystem(&ovl_fs_type);
- if (err)
- kmem_cache_destroy(ovl_inode_cachep);
+ err = ovl_aio_request_cache_init();
+ if (!err) {
+ err = register_filesystem(&ovl_fs_type);
+ if (!err)
+ return 0;
+
+ ovl_aio_request_cache_destroy();
+ }
+ kmem_cache_destroy(ovl_inode_cachep);
return err;
}
@@ -1735,7 +1793,7 @@ static void __exit ovl_exit(void)
*/
rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep);
-
+ ovl_aio_request_cache_destroy();
}
module_init(ovl_init);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f5678a3f8350..ea005085803f 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -40,18 +40,6 @@ const struct cred *ovl_override_creds(struct super_block *sb)
return override_creds(ofs->creator_cred);
}
-struct super_block *ovl_same_sb(struct super_block *sb)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- if (!ofs->numlowerfs)
- return ofs->upper_mnt->mnt_sb;
- else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
- return ofs->lower_fs[0].sb;
- else
- return NULL;
-}
-
/*
* Check if underlying fs supports file handles and try to determine encoding
* type, in order to deduce maximum inode number used by fs.
@@ -198,7 +186,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry)
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -576,7 +564,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
err = ovl_do_setxattr(upperdentry, name, value, size, 0);
if (err == -EOPNOTSUPP) {
- pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+ pr_warn("cannot set %s xattr on upper\n", name);
ofs->noxattr = true;
return xerr;
}
@@ -700,7 +688,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
inode = d_inode(upperdentry);
if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
- pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+ pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
* We either have a bug with persistent union nlink or a lower
@@ -739,7 +727,7 @@ out:
return;
fail:
- pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+ pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
goto out;
}
@@ -830,7 +818,7 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
err_unlock:
unlock_rename(workdir, upperdir);
err:
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ pr_err("failed to lock workdir+upperdir\n");
return -EIO;
}
@@ -852,7 +840,7 @@ int ovl_check_metacopy_xattr(struct dentry *dentry)
return 1;
out:
- pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+ pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
return res;
}
@@ -899,7 +887,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
return res;
fail:
- pr_warn_ratelimited("overlayfs: failed to get xattr %s: err=%zi)\n",
+ pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n",
name, res);
kfree(buf);
return res;
@@ -931,7 +919,7 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
return buf;
invalid:
- pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+ pr_warn_ratelimited("invalid redirect (%s)\n", buf);
res = -EINVAL;
kfree(buf);
return ERR_PTR(res);
OpenPOWER on IntegriCloud