From 69bca80744eef58fa155e8042996b968fec17b26 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 May 2016 14:46:43 +0200 Subject: xfs: Propagate dentry down to inode_change_ok() To avoid clearing of capabilities or security related extended attributes too early, inode_change_ok() will need to take dentry instead of inode. Propagate dentry down to functions calling inode_change_ok(). This is rather straightforward except for xfs_set_mode() function which does not have dentry easily available. Luckily that function does not call inode_change_ok() anyway so we just have to do a little dance with function prototypes. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e08eaea6327b..df22e1254c98 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1710,7 +1710,7 @@ xfs_inactive_truncate( /* * Log the inode size first to prevent stale data exposure in the event * of a system crash before the truncate completes. See the related - * comment in xfs_setattr_size() for details. + * comment in xfs_vn_setattr_size() for details. */ ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -- cgit v1.2.3 From c2050a454c7f123d7a57fa1d76ff61bd43643abb Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 14 Sep 2016 07:48:06 -0700 Subject: fs: Replace current_fs_time() with current_time() current_fs_time() uses struct super_block* as an argument. As per Linus's suggestion, this is changed to take struct inode* as a parameter instead. This is because the function is primarily meant for vfs inode timestamps. Also the function was renamed as per Arnd's suggestion. Change all calls to current_fs_time() to use the new current_time() function instead. current_fs_time() will be deleted. Signed-off-by: Deepa Dinamani Signed-off-by: Al Viro --- drivers/char/sonypi.c | 2 +- drivers/platform/x86/sony-laptop.c | 2 +- fs/attr.c | 2 +- fs/bad_inode.c | 2 +- fs/binfmt_misc.c | 2 +- fs/btrfs/file.c | 6 +++--- fs/btrfs/inode.c | 22 +++++++++++----------- fs/btrfs/ioctl.c | 8 ++++---- fs/btrfs/transaction.c | 4 ++-- fs/btrfs/xattr.c | 2 +- fs/ceph/file.c | 4 ++-- fs/ceph/inode.c | 2 +- fs/ceph/xattr.c | 2 +- fs/cifs/file.c | 4 ++-- fs/configfs/inode.c | 6 +++--- fs/debugfs/inode.c | 2 +- fs/f2fs/xattr.c | 2 +- fs/fat/file.c | 2 +- fs/fuse/dir.c | 2 +- fs/inode.c | 6 +++--- fs/jfs/namei.c | 2 +- fs/kernfs/inode.c | 2 +- fs/locks.c | 2 +- fs/nfsd/blocklayout.c | 2 +- fs/ntfs/inode.c | 2 +- fs/ntfs/mft.c | 2 +- fs/orangefs/namei.c | 10 +++++----- fs/udf/ialloc.c | 2 +- fs/udf/inode.c | 4 ++-- fs/udf/namei.c | 20 ++++++++++---------- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_trans_inode.c | 2 +- 34 files changed, 70 insertions(+), 70 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index e496daefe9e0..719c5b4eed39 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -934,7 +934,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf, if (ret > 0) { struct inode *inode = file_inode(file); - inode->i_atime = current_fs_time(inode->i_sb); + inode->i_atime = current_time(inode); } return ret; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 1dba3598cfcb..c890a49587e4 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -4116,7 +4116,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf, if (ret > 0) { struct inode *inode = file_inode(file); - inode->i_atime = current_fs_time(inode->i_sb); + inode->i_atime = current_time(inode); } return ret; diff --git a/fs/attr.c b/fs/attr.c index 42bb42bb3c72..236d11300216 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -209,7 +209,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de inode->i_flags &= ~S_NOSEC; } - now = current_fs_time(inode->i_sb); + now = current_time(inode); attr->ia_ctime = now; if (!(ia_valid & ATTR_ATIME_SET)) diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 3ba385eaa26e..3c8ec390b446 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -173,7 +173,7 @@ void make_bad_inode(struct inode *inode) inode->i_mode = S_IFREG; inode->i_atime = inode->i_mtime = inode->i_ctime = - current_fs_time(inode->i_sb); + current_time(inode); inode->i_op = &bad_inode_ops; inode->i_fop = &bad_file_ops; } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6103a6362ccd..9b4688ab1d8e 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -584,7 +584,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = - current_fs_time(inode->i_sb); + current_time(inode); } return inode; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fea31a4a6e36..dad53ce54d91 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1757,7 +1757,7 @@ static void update_time_for_write(struct inode *inode) if (IS_NOCMTIME(inode)) return; - now = current_fs_time(inode->i_sb); + now = current_time(inode); if (!timespec_equal(&inode->i_mtime, &now)) inode->i_mtime = now; @@ -2578,7 +2578,7 @@ out_trans: goto out_free; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_mtime = inode->i_ctime = current_time(inode); trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); @@ -2842,7 +2842,7 @@ static long btrfs_fallocate(struct file *file, int mode, if (IS_ERR(trans)) { ret = PTR_ERR(trans); } else { - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); i_size_write(inode, actual_end); btrfs_ordered_update_i_size(inode, actual_end, NULL); ret = btrfs_update_inode(trans, root, inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e6811c42e41e..2924774343e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4059,7 +4059,7 @@ err: inode_inc_iversion(inode); inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = - dir->i_ctime = current_fs_time(inode->i_sb); + dir->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, dir); out: return ret; @@ -4202,7 +4202,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode_inc_iversion(dir); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); ret = btrfs_update_inode_fallback(trans, root, dir); if (ret) btrfs_abort_transaction(trans, ret); @@ -4965,7 +4965,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) inode_inc_iversion(inode); if (!(mask & (ATTR_CTIME | ATTR_MTIME))) inode->i_ctime = inode->i_mtime = - current_fs_time(inode->i_sb); + current_time(inode); } if (newsize > oldsize) { @@ -5672,7 +5672,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - inode->i_mtime = current_fs_time(inode->i_sb); + inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -6258,7 +6258,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); - inode->i_mtime = current_fs_time(inode->i_sb); + inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; BTRFS_I(inode)->i_otime = inode->i_mtime; @@ -6372,7 +6372,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, name_len * 2); inode_inc_iversion(parent_inode); parent_inode->i_mtime = parent_inode->i_ctime = - current_fs_time(parent_inode->i_sb); + current_time(parent_inode); ret = btrfs_update_inode(trans, root, parent_inode); if (ret) btrfs_abort_transaction(trans, ret); @@ -6590,7 +6590,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inc_nlink(inode); inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); @@ -9492,7 +9492,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; - struct timespec ctime = CURRENT_TIME; + struct timespec ctime = current_time(old_inode); struct dentry *parent; u64 old_ino = btrfs_ino(old_inode); u64 new_ino = btrfs_ino(new_inode); @@ -9860,7 +9860,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, inode_inc_iversion(old_inode); old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = new_dir->i_mtime = - old_inode->i_ctime = current_fs_time(old_dir->i_sb); + old_inode->i_ctime = current_time(old_dir); if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); @@ -9885,7 +9885,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode) { inode_inc_iversion(new_inode); - new_inode->i_ctime = current_fs_time(new_inode->i_sb); + new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; @@ -10403,7 +10403,7 @@ next: *alloc_hint = ins.objectid + ins.offset; inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b2a2da5893af..c48e37444717 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -349,7 +349,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_update_iflags(inode); inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -445,7 +445,7 @@ static noinline int create_subvol(struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; - struct timespec cur_time = current_fs_time(dir->i_sb); + struct timespec cur_time = current_time(dir); struct inode *inode; int ret; int err; @@ -3279,7 +3279,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, inode_inc_iversion(inode); if (!no_time_update) - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_mtime = inode->i_ctime = current_time(inode); /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -5094,7 +5094,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; - struct timespec ct = current_fs_time(inode->i_sb); + struct timespec ct = current_time(inode); int ret = 0; int received_uuid_changed; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 95d41919d034..c294313ea2c8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1474,7 +1474,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root, 0); - cur_time = current_fs_time(parent_inode->i_sb); + cur_time = current_time(parent_inode); /* * insert the directory item @@ -1630,7 +1630,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = - current_fs_time(parent_inode->i_sb); + current_time(parent_inode); ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); if (ret) { btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d1a177a3dbe8..fccbf5567e78 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -252,7 +252,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, goto out; inode_inc_iversion(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0f5375d8e030..a37a343fbd0f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -886,7 +886,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int num_pages = 0; int flags; int ret; - struct timespec mtime = current_fs_time(inode->i_sb); + struct timespec mtime = current_time(inode); size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; @@ -1091,7 +1091,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int flags; int check_caps = 0; int ret; - struct timespec mtime = current_fs_time(inode->i_sb); + struct timespec mtime = current_time(inode); size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index dd3a6dbf71eb..ca1ccf741771 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2080,7 +2080,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, &prealloc_cf); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); } release &= issued; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index adc231892b0d..40b703217977 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1034,7 +1034,7 @@ retry: dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, &prealloc_cf); ci->i_xattrs.dirty = true; - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 579e41b350a2..00b6bf08cdba 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1878,7 +1878,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) write_data, to - from, &offset); cifsFileInfo_put(open_file); /* Does mm or vfs already set times? */ - inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); + inode->i_atime = inode->i_mtime = current_time(inode); if ((bytes_written > 0) && (offset)) rc = 0; else if (bytes_written < 0) @@ -3571,7 +3571,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, cifs_dbg(FYI, "Bytes read %d\n", rc); file_inode(file)->i_atime = - current_fs_time(file_inode(file)->i_sb); + current_time(file_inode(file)); if (PAGE_SIZE > rc) memset(read_data + rc, 0, PAGE_SIZE - rc); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 0387968e6f47..ad718e5e37bb 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -76,7 +76,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) sd_iattr->ia_uid = GLOBAL_ROOT_UID; sd_iattr->ia_gid = GLOBAL_ROOT_GID; sd_iattr->ia_atime = sd_iattr->ia_mtime = - sd_iattr->ia_ctime = current_fs_time(inode->i_sb); + sd_iattr->ia_ctime = current_time(inode); sd->s_iattr = sd_iattr; } /* attributes were changed atleast once in past */ @@ -113,7 +113,7 @@ static inline void set_default_inode_attr(struct inode * inode, umode_t mode) { inode->i_mode = mode; inode->i_atime = inode->i_mtime = - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); } static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) @@ -197,7 +197,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in return -ENOMEM; p_inode = d_inode(dentry->d_parent); - p_inode->i_mtime = p_inode->i_ctime = current_fs_time(p_inode->i_sb); + p_inode->i_mtime = p_inode->i_ctime = current_time(p_inode); configfs_set_inode_lock_class(sd, inode); init(inode); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 72361baf9da7..56a3c82a5785 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -45,7 +45,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb) if (inode) { inode->i_ino = get_next_ino(); inode->i_atime = inode->i_mtime = - inode->i_ctime = current_fs_time(sb); + inode->i_ctime = current_time(inode); } return inode; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index c8898b5148eb..62f76ea6cb3c 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -541,7 +541,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; - inode->i_ctime = CURRENT_TIME; + inode->i_ctime = current_time(inode); clear_inode_flag(inode, FI_ACL_MODE); } if (index == F2FS_XATTR_INDEX_ENCRYPTION && diff --git a/fs/fat/file.c b/fs/fat/file.c index 811bbe0ab7a3..fc71ff524367 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -63,7 +63,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; - ia.ia_ctime = current_fs_time(inode->i_sb); + ia.ia_ctime = current_time(inode); if (is_dir) ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); else { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c47b7780ce37..8de80dc563c5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -637,7 +637,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, static inline void fuse_update_ctime(struct inode *inode) { if (!IS_NOCMTIME(inode)) { - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); } } diff --git a/fs/inode.c b/fs/inode.c index 2172d0f77011..cadf75fb579e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1636,7 +1636,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode) if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) return false; - now = current_fs_time(inode->i_sb); + now = current_time(inode); if (!relatime_need_update(mnt, inode, now)) return false; @@ -1670,7 +1670,7 @@ void touch_atime(const struct path *path) * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ - now = current_fs_time(inode->i_sb); + now = current_time(inode); update_time(inode, &now, S_ATIME); __mnt_drop_write(mnt); skip_update: @@ -1793,7 +1793,7 @@ int file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return 0; - now = current_fs_time(inode->i_sb); + now = current_time(inode); if (!timespec_equal(&inode->i_mtime, &now)) sync_it = S_MTIME; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b05d0b4c68f6..10449c1deac0 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1281,7 +1281,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_ip->i_ctime = current_time(old_ip); mark_inode_dirty(old_ip); - new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb); + new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir); mark_inode_dirty(new_dir); /* Build list of inodes modified by this transaction */ diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 63b925d5ba1e..43f6848266d5 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -241,7 +241,7 @@ static inline void set_default_inode_attr(struct inode *inode, umode_t mode) { inode->i_mode = mode; inode->i_atime = inode->i_mtime = - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); } static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) diff --git a/fs/locks.c b/fs/locks.c index ee1b15f6fc13..b5152b067f37 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1539,7 +1539,7 @@ void lease_get_mtime(struct inode *inode, struct timespec *time) } if (has_lease) - *time = current_fs_time(inode->i_sb); + *time = current_time(inode); else *time = inode->i_mtime; } diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 5a1708441510..0780ff864539 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -123,7 +123,7 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, if (lcp->lc_mtime.tv_nsec == UTIME_NOW || timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) - lcp->lc_mtime = current_fs_time(inode->i_sb); + lcp->lc_mtime = current_time(inode); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index e01287c964a8..6b7588a6f98c 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2813,7 +2813,7 @@ done: * for real. */ if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { - struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb); + struct timespec now = current_time(VFS_I(base_ni)); int sync_it = 0; if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) || diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index d15d492ce47b..d3c009626032 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2692,7 +2692,7 @@ mft_rec_already_initialized: /* Set the inode times to the current time. */ vi->i_atime = vi->i_mtime = vi->i_ctime = - current_fs_time(vi->i_sb); + current_time(vi); /* * Set the file size to 0, the ntfs inode sizes are set to 0 by * the call to ntfs_init_big_inode() below. diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 62c525936ee8..e1ab95529231 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -81,7 +81,7 @@ static int orangefs_create(struct inode *dir, dentry->d_name.name); SetMtimeFlag(parent); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); mark_inode_dirty_sync(dir); ret = 0; out: @@ -254,7 +254,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) drop_nlink(inode); SetMtimeFlag(parent); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); mark_inode_dirty_sync(dir); } return ret; @@ -331,7 +331,7 @@ static int orangefs_symlink(struct inode *dir, dentry->d_name.name); SetMtimeFlag(parent); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); mark_inode_dirty_sync(dir); ret = 0; out: @@ -399,7 +399,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode * across clients; keep constant at 1. */ SetMtimeFlag(parent); - dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); + dir->i_mtime = dir->i_ctime = current_time(dir); mark_inode_dirty_sync(dir); out: op_release(new_op); @@ -443,7 +443,7 @@ static int orangefs_rename(struct inode *old_dir, ret); if (new_dentry->d_inode) - new_dentry->d_inode->i_ctime = CURRENT_TIME; + new_dentry->d_inode->i_ctime = current_time(new_dentry->d_inode); op_release(new_op); return ret; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index e77db621ec89..c1ed18a10ce4 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -121,7 +121,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) else iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = - iinfo->i_crtime = current_fs_time(inode->i_sb); + iinfo->i_crtime = current_time(inode); if (unlikely(insert_inode_locked(inode) < 0)) { make_bad_inode(inode); iput(inode); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 55aa587bbc38..aad46401ede5 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -886,7 +886,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, *new = 1; iinfo->i_next_alloc_block = block; iinfo->i_next_alloc_goal = newblocknum; - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); if (IS_SYNC(inode)) udf_sync_inode(inode); @@ -1268,7 +1268,7 @@ set_size: up_write(&iinfo->i_data_sem); } update_time: - inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_mtime = inode->i_ctime = current_time(inode); if (IS_SYNC(inode)) udf_sync_inode(inode); else diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c3e5c9679371..61af3f1e1973 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -616,7 +616,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -730,7 +730,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); inc_nlink(dir); - dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); unlock_new_inode(inode); d_instantiate(dentry, inode); @@ -845,7 +845,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) inode->i_size = 0; inode_dec_link_count(dir); inode->i_ctime = dir->i_ctime = dir->i_mtime = - current_fs_time(dir->i_sb); + current_time(inode); mark_inode_dirty(dir); end_rmdir: @@ -888,7 +888,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) retval = udf_delete_entry(dir, fi, &fibh, &cfi); if (retval) goto end_unlink; - dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); inode_dec_link_count(inode); inode->i_ctime = dir->i_ctime; @@ -1079,9 +1079,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, brelse(fibh.ebh); brelse(fibh.sbh); inc_nlink(inode); - inode->i_ctime = current_fs_time(inode->i_sb); + inode->i_ctime = current_time(inode); mark_inode_dirty(inode); - dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); ihold(inode); d_instantiate(dentry, inode); @@ -1172,7 +1172,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = current_fs_time(old_inode->i_sb); + old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); /* @@ -1188,11 +1188,11 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); if (new_inode) { - new_inode->i_ctime = current_fs_time(new_inode->i_sb); + new_inode->i_ctime = current_time(new_inode); inode_dec_link_count(new_inode); } - old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb); - new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb); + old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir); mark_inode_dirty(old_dir); mark_inode_dirty(new_dir); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b6e527b8eccb..74f7c68105b9 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -236,7 +236,7 @@ xfs_set_mode(struct inode *inode, umode_t mode) iattr.ia_valid = ATTR_MODE | ATTR_CTIME; iattr.ia_mode = mode; - iattr.ia_ctime = current_fs_time(inode->i_sb); + iattr.ia_ctime = current_time(inode); error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e08eaea6327b..020110b76a40 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -821,7 +821,7 @@ xfs_ialloc( ip->i_d.di_nextents = 0; ASSERT(ip->i_d.di_nblocks == 0); - tv = current_fs_time(mp->m_super); + tv = current_time(inode); inode->i_mtime = tv; inode->i_atime = tv; inode->i_ctime = tv; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index b24c3102fa93..08c33f7fbe49 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -882,7 +882,7 @@ xfs_setattr_size( if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); + current_time(inode); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 11a3af08b5c7..dab8daa676f9 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -73,7 +73,7 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - tv = current_fs_time(inode->i_sb); + tv = current_time(inode); if (flags & XFS_ICHGTIME_MOD) inode->i_mtime = tv; -- cgit v1.2.3 From 17c12bcd3030e4b3247d0c961ce598c731a9dcf9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:29 -0700 Subject: xfs: when replaying bmap operations, don't let unlinked inodes get reaped Log recovery will iget an inode to replay BUI items and iput the inode when it's done. Unfortunately, if the inode was unlinked, the iput will see that i_nlink == 0 and decide to truncate & free the inode, which prevents us from replaying subsequent BUIs. We can't skip the BUIs because we have to replay all the redo items to ensure that atomic operations complete. Since unlinked inode recovery will reap the inode anyway, we can safely introduce a new inode flag to indicate that an inode is in this 'unlinked recovery' state and should not be auto-reaped in the drop_inode path. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap_item.c | 2 ++ fs/xfs/xfs_inode.c | 1 + fs/xfs/xfs_inode.h | 6 ++++++ fs/xfs/xfs_log_recover.c | 1 + fs/xfs/xfs_mount.c | 9 +++++++++ fs/xfs/xfs_super.c | 10 ++++++++++ 6 files changed, 29 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 02e8f93f0315..9bf57c76623b 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -456,6 +456,8 @@ xfs_bui_recover( if (error) goto err_inode; + if (VFS_I(ip)->i_nlink == 0) + xfs_iflags_set(ip, XFS_IRECOVERY); xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e08eaea6327b..edb453d531e1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1850,6 +1850,7 @@ xfs_inactive( } mp = ip->i_mount; + ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); /* If this is a read-only mount, don't do this (would generate I/O) */ if (mp->m_flags & XFS_MOUNT_RDONLY) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a8658e68041a..4be32036e16a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -222,6 +222,12 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) #define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */ #define XFS_IEOFBLOCKS (1 << 10)/* has the preallocblocks tag set */ +/* + * If this unlinked inode is in the middle of recovery, don't let drop_inode + * truncate and free the inode. This can happen if we iget the inode during + * log recovery to replay a bmap operation on the inode. + */ +#define XFS_IRECOVERY (1 << 11) /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9697e946aa35..9b3d7c76915d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4969,6 +4969,7 @@ xlog_recover_process_one_iunlink( if (error) goto fail_iput; + xfs_iflags_clear(ip, XFS_IRECOVERY); ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3f64615f58db..a78a1164bab3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -924,6 +924,15 @@ xfs_mountfs( } } + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. + */ + mp->m_super->s_flags |= MS_ACTIVE; + /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 204b794cebd5..35c04a790754 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1008,6 +1008,16 @@ xfs_fs_drop_inode( { struct xfs_inode *ip = XFS_I(inode); + /* + * If this unlinked inode is in the middle of recovery, don't + * drop the inode just yet; log recovery will take care of + * that. See the comment for this inode flag. + */ + if (ip->i_flags & XFS_IRECOVERY) { + ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); + return 0; + } + return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); } -- cgit v1.2.3 From aa8968f227a8c6c7468c9d498daf06f8dbc30af0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:38 -0700 Subject: xfs: cancel CoW reservations and clear inode reflink flag when freeing blocks When we're freeing blocks (truncate, punch, etc.), clear all CoW reservations in the range being freed. If the file block count drops to zero, also clear the inode reflink flag. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edb453d531e1..f072f48b3c04 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -49,6 +49,7 @@ #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_bmap_btree.h" +#include "xfs_reflink.h" kmem_zone_t *xfs_inode_zone; @@ -1586,6 +1587,18 @@ xfs_itruncate_extents( goto out; } + /* Remove all pending CoW reservations. */ + error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, + last_block); + if (error) + goto out; + + /* + * Clear the reflink flag if we truncated everything. + */ + if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + /* * Always re-log the inode so that our permanent transaction can keep * on rolling it forward in the log. -- cgit v1.2.3 From f7ca35227253dc8244fd908140b06010e67a31e5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:43 -0700 Subject: xfs: create a separate cow extent size hint for the allocator Create a per-inode extent size allocator hint for copy-on-write. This hint is separate from the existing extent size hint so that CoW can take advantage of the fragmentation-reducing properties of extent size hints without disabling delalloc for regular writes. The extent size hint that's fed to the allocator during a copy on write operation is the greater of the cowextsize and regular extsize hint. During reflink, if we're sharing the entire source file to the entire destination file and the destination file doesn't already have a cowextsize hint, propagate the source file's cowextsize hint to the destination file. Furthermore, zero the bulkstat buffer prior to setting the fields so that we don't copy kernel memory contents into userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 9 ++++-- fs/xfs/libxfs/xfs_format.h | 3 +- fs/xfs/libxfs/xfs_fs.h | 3 +- fs/xfs/libxfs/xfs_inode_buf.c | 4 ++- fs/xfs/libxfs/xfs_inode_buf.h | 1 + fs/xfs/libxfs/xfs_log_format.h | 3 +- fs/xfs/xfs_bmap_util.c | 9 ++++-- fs/xfs/xfs_inode.c | 33 +++++++++++++++++++++ fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_inode_item.c | 2 +- fs/xfs/xfs_ioctl.c | 67 ++++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_iomap.h | 1 + fs/xfs/xfs_itable.c | 8 ++++- fs/xfs/xfs_reflink.c | 41 +++++++++++++++++++++----- 15 files changed, 167 insertions(+), 20 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index e4bf2c406e3b..9702f4c112bf 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3666,7 +3666,9 @@ xfs_bmap_btalloc( else if (mp->m_dalign) stripe_align = mp->m_dalign; - if (xfs_alloc_is_userdata(ap->datatype)) + if (ap->flags & XFS_BMAPI_COWFORK) + align = xfs_get_cowextsz_hint(ap->ip); + else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -4192,7 +4194,10 @@ xfs_bmapi_reserve_delalloc( alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); /* Figure out the extent size, adjust alen */ - extsz = xfs_get_extsz_hint(ip); + if (whichfork == XFS_COW_FORK) + extsz = xfs_get_cowextsz_hint(ip); + else + extsz = xfs_get_extsz_hint(ip); if (extsz) { error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 1, 0, &aoff, &alen); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 777fa974a319..5e04ea2aa902 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -901,7 +901,8 @@ typedef struct xfs_dinode { __be64 di_changecount; /* number of attribute changes */ __be64 di_lsn; /* flush sequence */ __be64 di_flags2; /* more random flags */ - __u8 di_pad2[16]; /* more padding for future expansion */ + __be32 di_cowextsize; /* basic cow extent size for file */ + __u8 di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_timestamp_t di_crtime; /* time created */ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 3d1efe5bf37b..b72dc821d78b 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -278,7 +278,8 @@ typedef struct xfs_bstat { #define bs_projid bs_projid_lo /* (previously just bs_projid) */ __u16 bs_forkoff; /* inode fork offset in bytes */ __u16 bs_projid_hi; /* higher part of project id */ - unsigned char bs_pad[10]; /* pad space, unused */ + unsigned char bs_pad[6]; /* pad space, unused */ + __u32 bs_cowextsize; /* cow extent size */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 4b9769e23c83..a3e803823a19 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -256,6 +256,7 @@ xfs_inode_from_disk( to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); + to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } } @@ -305,7 +306,7 @@ xfs_inode_to_disk( to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); - + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); to->di_lsn = cpu_to_be64(lsn); memset(to->di_pad2, 0, sizeof(to->di_pad2)); @@ -357,6 +358,7 @@ xfs_log_dinode_to_disk( to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 7c4dd321b215..62d9d4681c8c 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -47,6 +47,7 @@ struct xfs_icdinode { __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ __uint64_t di_flags2; /* more random flags */ + __uint32_t di_cowextsize; /* basic cow extent size for file */ xfs_ictimestamp_t di_crtime; /* time created */ }; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 5dd7c2e8f439..364ce6fcc213 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -423,7 +423,8 @@ struct xfs_log_dinode { __uint64_t di_changecount; /* number of attribute changes */ xfs_lsn_t di_lsn; /* flush sequence */ __uint64_t di_flags2; /* more random flags */ - __uint8_t di_pad2[16]; /* more padding for future expansion */ + __uint32_t di_cowextsize; /* basic cow extent size for file */ + __uint8_t di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_ictimestamp_t di_crtime; /* time created */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e2f46e645612..4a807f05e460 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -582,8 +582,13 @@ xfs_getbmap( if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) return -EINVAL; - prealloced = 0; - fixlen = XFS_ISIZE(ip); + if (xfs_get_cowextsz_hint(ip)) { + prealloced = 1; + fixlen = mp->m_super->s_maxbytes; + } else { + prealloced = 0; + fixlen = XFS_ISIZE(ip); + } break; default: if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f072f48b3c04..09640bdd3e44 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -77,6 +77,27 @@ xfs_get_extsz_hint( return 0; } +/* + * Helper function to extract CoW extent size hint from inode. + * Between the extent size hint and the CoW extent size hint, we + * return the greater of the two. + */ +xfs_extlen_t +xfs_get_cowextsz_hint( + struct xfs_inode *ip) +{ + xfs_extlen_t a, b; + + a = 0; + if (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + a = ip->i_d.di_cowextsize; + b = xfs_get_extsz_hint(ip); + + if (a > b) + return a; + return b; +} + /* * These two are wrapper routines around the xfs_ilock() routine used to * centralize some grungy code. They are used in places that wish to lock the @@ -652,6 +673,8 @@ _xfs_dic2xflags( if (di_flags2 & XFS_DIFLAG2_ANY) { if (di_flags2 & XFS_DIFLAG2_DAX) flags |= FS_XFLAG_DAX; + if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + flags |= FS_XFLAG_COWEXTSIZE; } if (has_attr) @@ -835,6 +858,7 @@ xfs_ialloc( if (ip->i_d.di_version == 3) { inode->i_version = 1; ip->i_d.di_flags2 = 0; + ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec; } @@ -897,6 +921,15 @@ xfs_ialloc( ip->i_d.di_flags |= di_flags; ip->i_d.di_flags2 |= di_flags2; } + if (pip && + (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && + pip->i_d.di_version == 3 && + ip->i_d.di_version == 3) { + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { + ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; + } + } /* FALLTHROUGH */ case S_IFLNK: ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ff6fc03c0092..6d63dc031127 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -426,6 +426,7 @@ int xfs_iflush(struct xfs_inode *, struct xfs_buf **); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); +xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 892c2aced207..9610e9c00952 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -368,7 +368,7 @@ xfs_inode_to_log_dinode( to->di_crtime.t_sec = from->di_crtime.t_sec; to->di_crtime.t_nsec = from->di_crtime.t_nsec; to->di_flags2 = from->di_flags2; - + to->di_cowextsize = from->di_cowextsize; to->di_ino = ip->i_ino; to->di_lsn = lsn; memset(to->di_pad2, 0, sizeof(to->di_pad2)); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 96a70fd1f5d6..1388a1275dc8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -903,6 +903,8 @@ xfs_ioc_fsgetxattr( xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa.fsx_cowextsize = ip->i_d.di_cowextsize << + ip->i_mount->m_sb.sb_blocklog; fa.fsx_projid = xfs_get_projid(ip); if (attr) { @@ -973,12 +975,13 @@ xfs_set_diflags( if (ip->i_d.di_version < 3) return; - di_flags2 = 0; + di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; + if (xflags & FS_XFLAG_COWEXTSIZE) + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_flags2 = di_flags2; - } STATIC void @@ -1219,6 +1222,56 @@ xfs_ioctl_setattr_check_extsize( return 0; } +/* + * CoW extent size hint validation rules are: + * + * 1. CoW extent size hint can only be set if reflink is enabled on the fs. + * The inode does not have to have any shared blocks, but it must be a v3. + * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; + * for a directory, the hint is propagated to new files. + * 3. Can be changed on files & directories at any time. + * 4. CoW extsize hint of 0 turns off hints, clears inode flags. + * 5. Extent size must be a multiple of the appropriate block size. + * 6. The extent size hint must be limited to half the AG size to avoid + * alignment extending the extent beyond the limits of the AG. + */ +static int +xfs_ioctl_setattr_check_cowextsize( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) + return 0; + + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || + ip->i_d.di_version != 3) + return -EINVAL; + + if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode)) + return -EINVAL; + + if (fa->fsx_cowextsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t cowextsize_fsb; + + cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + if (cowextsize_fsb > MAXEXTLEN) + return -EINVAL; + + size = mp->m_sb.sb_blocksize; + if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; + + if (fa->fsx_cowextsize % size) + return -EINVAL; + } else + fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + + return 0; +} + static int xfs_ioctl_setattr_check_projid( struct xfs_inode *ip, @@ -1311,6 +1364,10 @@ xfs_ioctl_setattr( if (code) goto error_trans_cancel; + code = xfs_ioctl_setattr_check_cowextsize(ip, fa); + if (code) + goto error_trans_cancel; + code = xfs_ioctl_setattr_xflags(tp, ip, fa); if (code) goto error_trans_cancel; @@ -1346,6 +1403,12 @@ xfs_ioctl_setattr( ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; else ip->i_d.di_extsize = 0; + if (ip->i_d.di_version == 3 && + (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + ip->i_d.di_cowextsize = fa->fsx_cowextsize >> + mp->m_sb.sb_blocklog; + else + ip->i_d.di_cowextsize = 0; code = xfs_trans_commit(tp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 765849ed9b70..d907eb9f8ef3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -71,7 +71,7 @@ xfs_bmbt_to_iomap( iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); } -static xfs_extlen_t +xfs_extlen_t xfs_eof_alignment( struct xfs_inode *ip, xfs_extlen_t extsize) diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index a16b9567e76b..6d45cf01fcff 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -31,6 +31,7 @@ int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); +xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); extern struct iomap_ops xfs_iomap_ops; extern struct iomap_ops xfs_xattr_iomap_ops; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index ce73eb34620d..66e881790c17 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -66,7 +66,7 @@ xfs_bulkstat_one_int( if (!buffer || xfs_internal_inum(mp, ino)) return -EINVAL; - buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); + buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); if (!buf) return -ENOMEM; @@ -111,6 +111,12 @@ xfs_bulkstat_one_int( buf->bs_aextents = dic->di_anextents; buf->bs_forkoff = XFS_IFORK_BOFF(ip); + if (dic->di_version == 3) { + if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + buf->bs_cowextsize = dic->di_cowextsize << + mp->m_sb.sb_blocklog; + } + switch (dic->di_format) { case XFS_DINODE_FMT_DEV: buf->bs_rdev = ip->i_df.if_u2.if_rdev; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index d4707e627a74..8e894118295f 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -238,6 +238,7 @@ __xfs_reflink_reserve_cow( int nimaps, eof = 0, error = 0; bool shared = false, trimmed = false; xfs_extnum_t idx; + xfs_extlen_t align; /* Already reserved? Skip the refcount btree access. */ xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, @@ -277,6 +278,10 @@ __xfs_reflink_reserve_cow( if (error) goto out_unlock; + align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); + if (align) + end_fsb = roundup_64(end_fsb, align); + retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, end_fsb - *offset_fsb, &got, @@ -927,18 +932,19 @@ out_error: } /* - * Update destination inode size, if necessary. + * Update destination inode size & cowextsize hint, if necessary. */ STATIC int xfs_reflink_update_dest( struct xfs_inode *dest, - xfs_off_t newlen) + xfs_off_t newlen, + xfs_extlen_t cowextsize) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (newlen <= i_size_read(VFS_I(dest))) + if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -948,9 +954,17 @@ xfs_reflink_update_dest( xfs_ilock(dest, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); - trace_xfs_reflink_update_inode_size(dest, newlen); - i_size_write(VFS_I(dest), newlen); - dest->i_d.di_size = newlen; + if (newlen > i_size_read(VFS_I(dest))) { + trace_xfs_reflink_update_inode_size(dest, newlen); + i_size_write(VFS_I(dest), newlen); + dest->i_d.di_size = newlen; + } + + if (cowextsize) { + dest->i_d.di_cowextsize = cowextsize; + dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + } + xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); @@ -1270,6 +1284,7 @@ xfs_reflink_remap_range( xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; int error; + xfs_extlen_t cowextsize; bool is_same; if (!xfs_sb_version_hasreflink(&mp->m_sb)) @@ -1330,7 +1345,19 @@ xfs_reflink_remap_range( if (error) goto out_error; - error = xfs_reflink_update_dest(dest, destoff + len); + /* + * Carry the cowextsize hint from src to dest if we're sharing the + * entire source file to the entire destination file, the source file + * has a cowextsize hint, and the destination file does not. + */ + cowextsize = 0; + if (srcoff == 0 && len == i_size_read(VFS_I(src)) && + (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && + destoff == 0 && len >= i_size_read(VFS_I(dest)) && + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + cowextsize = src->i_d.di_cowextsize; + + error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); if (error) goto out_error; -- cgit v1.2.3 From 83104d449e8c44e4870a795132437257cdf80006 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:46 -0700 Subject: xfs: garbage collect old cowextsz reservations Trim CoW reservations made on behalf of a cowextsz hint if they get too old or we run low on quota, so long as we don't have dirty data awaiting writeback or directio operations in progress. Garbage collection of the cowextsize extents are kept separate from prealloc extent reaping because setting the CoW prealloc lifetime to a (much) higher value than the regular prealloc extent lifetime has been useful for combatting CoW fragmentation on VM hosts where the VMs experience bursty write behaviors and we can keep the utilization ratios low enough that we don't start to run out of space. IOWs, it benefits us to keep the CoW fork reservations around for as long as we can unless we run out of blocks or hit inode reclaim. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap_util.c | 2 + fs/xfs/xfs_file.c | 3 + fs/xfs/xfs_globals.c | 5 +- fs/xfs/xfs_icache.c | 238 +++++++++++++++++++++++++++++++++++++++++++------ fs/xfs/xfs_icache.h | 7 ++ fs/xfs/xfs_inode.c | 4 +- fs/xfs/xfs_linux.h | 1 + fs/xfs/xfs_mount.c | 1 + fs/xfs/xfs_mount.h | 2 + fs/xfs/xfs_reflink.c | 38 ++++++++ fs/xfs/xfs_reflink.h | 2 + fs/xfs/xfs_super.c | 1 + fs/xfs/xfs_sysctl.c | 9 ++ fs/xfs/xfs_sysctl.h | 1 + fs/xfs/xfs_trace.h | 5 ++ 15 files changed, 287 insertions(+), 32 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 4a807f05e460..eacd4c203376 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1891,6 +1891,8 @@ xfs_swap_extents( cowfp = ip->i_cowfp; ip->i_cowfp = tip->i_cowfp; tip->i_cowfp = cowfp; + xfs_inode_set_cowblocks_tag(ip); + xfs_inode_set_cowblocks_tag(tip); } xfs_trans_log_inode(tp, ip, src_log_flags); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 07f951dd2685..0726df88bce2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -782,6 +782,9 @@ write_retry: enospc = xfs_inode_free_quota_eofblocks(ip); if (enospc) goto write_retry; + enospc = xfs_inode_free_quota_cowblocks(ip); + if (enospc) + goto write_retry; } else if (ret == -ENOSPC && !enospc) { struct xfs_eofblocks eofb = {0}; diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 4d41b241298f..687a4b01fc53 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -21,8 +21,8 @@ /* * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, * other XFS code uses these values. Times are measured in centisecs (i.e. - * 100ths of a second) with the exception of eofb_timer, which is measured in - * seconds. + * 100ths of a second) with the exception of eofb_timer and cowb_timer, which + * are measured in seconds. */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ @@ -42,6 +42,7 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, .eofb_timer = { 1, 300, 3600*24}, + .cowb_timer = { 1, 1800, 3600*24}, }; struct xfs_globals xfs_globals = { diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2d3de02f3529..14796b744e0a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,7 @@ #include "xfs_bmap_util.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" +#include "xfs_reflink.h" #include #include @@ -792,6 +793,33 @@ xfs_eofblocks_worker( xfs_queue_eofblocks(mp); } +/* + * Background scanning to trim preallocated CoW space. This is queued + * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). + * (We'll just piggyback on the post-EOF prealloc space workqueue.) + */ +STATIC void +xfs_queue_cowblocks( + struct xfs_mount *mp) +{ + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_COWBLOCKS_TAG)) + queue_delayed_work(mp->m_eofblocks_workqueue, + &mp->m_cowblocks_work, + msecs_to_jiffies(xfs_cowb_secs * 1000)); + rcu_read_unlock(); +} + +void +xfs_cowblocks_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_cowblocks_work); + xfs_icache_free_cowblocks(mp, NULL); + xfs_queue_cowblocks(mp); +} + int xfs_inode_ag_iterator( struct xfs_mount *mp, @@ -1348,18 +1376,30 @@ xfs_inode_free_eofblocks( return ret; } -int -xfs_icache_free_eofblocks( +static int +__xfs_icache_free_eofblocks( struct xfs_mount *mp, - struct xfs_eofblocks *eofb) + struct xfs_eofblocks *eofb, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int tag) { int flags = SYNC_TRYLOCK; if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) flags = SYNC_WAIT; - return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags, - eofb, XFS_ICI_EOFBLOCKS_TAG); + return xfs_inode_ag_iterator_tag(mp, execute, flags, + eofb, tag); +} + +int +xfs_icache_free_eofblocks( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks, + XFS_ICI_EOFBLOCKS_TAG); } /* @@ -1368,9 +1408,11 @@ xfs_icache_free_eofblocks( * failure. We make a best effort by including each quota under low free space * conditions (less than 1% free space) in the scan. */ -int -xfs_inode_free_quota_eofblocks( - struct xfs_inode *ip) +static int +__xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip, + int (*execute)(struct xfs_mount *mp, + struct xfs_eofblocks *eofb)) { int scan = 0; struct xfs_eofblocks eofb = {0}; @@ -1406,14 +1448,25 @@ xfs_inode_free_quota_eofblocks( } if (scan) - xfs_icache_free_eofblocks(ip->i_mount, &eofb); + execute(ip->i_mount, &eofb); return scan; } -void -xfs_inode_set_eofblocks_tag( - xfs_inode_t *ip) +int +xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip) +{ + return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); +} + +static void +__xfs_inode_set_eofblocks_tag( + xfs_inode_t *ip, + void (*execute)(struct xfs_mount *mp), + void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, + int error, unsigned long caller_ip), + int tag) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; @@ -1431,26 +1484,22 @@ xfs_inode_set_eofblocks_tag( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); - trace_xfs_inode_set_eofblocks_tag(ip); - tagged = radix_tree_tagged(&pag->pag_ici_root, - XFS_ICI_EOFBLOCKS_TAG); + tagged = radix_tree_tagged(&pag->pag_ici_root, tag); radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); if (!tagged) { /* propagate the eofblocks tag up into the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_set(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + tag); spin_unlock(&ip->i_mount->m_perag_lock); /* kick off background trimming */ - xfs_queue_eofblocks(ip->i_mount); + execute(ip->i_mount); - trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); + set_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); @@ -1458,8 +1507,21 @@ xfs_inode_set_eofblocks_tag( } void -xfs_inode_clear_eofblocks_tag( +xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) +{ + trace_xfs_inode_set_eofblocks_tag(ip); + return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + trace_xfs_perag_set_eofblocks, + XFS_ICI_EOFBLOCKS_TAG); +} + +static void +__xfs_inode_clear_eofblocks_tag( + xfs_inode_t *ip, + void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, + int error, unsigned long caller_ip), + int tag) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; @@ -1470,23 +1532,141 @@ xfs_inode_clear_eofblocks_tag( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); - trace_xfs_inode_clear_eofblocks_tag(ip); radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); - if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); + if (!radix_tree_tagged(&pag->pag_ici_root, tag)) { /* clear the eofblocks tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + tag); spin_unlock(&ip->i_mount->m_perag_lock); - trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); + clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } +void +xfs_inode_clear_eofblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_clear_eofblocks_tag(ip); + return __xfs_inode_clear_eofblocks_tag(ip, + trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); +} + +/* + * Automatic CoW Reservation Freeing + * + * These functions automatically garbage collect leftover CoW reservations + * that were made on behalf of a cowextsize hint when we start to run out + * of quota or when the reservations sit around for too long. If the file + * has dirty pages or is undergoing writeback, its CoW reservations will + * be retained. + * + * The actual garbage collection piggybacks off the same code that runs + * the speculative EOF preallocation garbage collector. + */ +STATIC int +xfs_inode_free_cowblocks( + struct xfs_inode *ip, + int flags, + void *args) +{ + int ret; + struct xfs_eofblocks *eofb = args; + bool need_iolock = true; + int match; + + ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); + + if (!xfs_reflink_has_real_cow_blocks(ip)) { + trace_xfs_inode_free_cowblocks_invalid(ip); + xfs_inode_clear_cowblocks_tag(ip); + return 0; + } + + /* + * If the mapping is dirty or under writeback we cannot touch the + * CoW fork. Leave it alone if we're in the midst of a directio. + */ + if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || + atomic_read(&VFS_I(ip)->i_dio_count)) + return 0; + + if (eofb) { + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) + match = xfs_inode_match_id_union(ip, eofb); + else + match = xfs_inode_match_id(ip, eofb); + if (!match) + return 0; + + /* skip the inode if the file size is too small */ + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && + XFS_ISIZE(ip) < eofb->eof_min_file_size) + return 0; + + /* + * A scan owner implies we already hold the iolock. Skip it in + * xfs_free_eofblocks() to avoid deadlock. This also eliminates + * the possibility of EAGAIN being returned. + */ + if (eofb->eof_scan_owner == ip->i_ino) + need_iolock = false; + } + + /* Free the CoW blocks */ + if (need_iolock) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + } + + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + + if (need_iolock) { + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } + + return ret; +} + +int +xfs_icache_free_cowblocks( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks, + XFS_ICI_COWBLOCKS_TAG); +} + +int +xfs_inode_free_quota_cowblocks( + struct xfs_inode *ip) +{ + return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_cowblocks); +} + +void +xfs_inode_set_cowblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_set_eofblocks_tag(ip); + return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + trace_xfs_perag_set_eofblocks, + XFS_ICI_COWBLOCKS_TAG); +} + +void +xfs_inode_clear_cowblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_clear_eofblocks_tag(ip); + return __xfs_inode_clear_eofblocks_tag(ip, + trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); +} diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 05bac99bef75..a1e02f4708ab 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -40,6 +40,7 @@ struct xfs_eofblocks { in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ +#define XFS_ICI_COWBLOCKS_TAG 2 /* inode can have cow blocks to gc */ /* * Flags for xfs_iget() @@ -70,6 +71,12 @@ int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); void xfs_eofblocks_worker(struct work_struct *); void xfs_queue_eofblocks(struct xfs_mount *); +void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip); +void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); +int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); +int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); +void xfs_cowblocks_worker(struct work_struct *); + int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 09640bdd3e44..89e6441f963f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1629,8 +1629,10 @@ xfs_itruncate_extents( /* * Clear the reflink flag if we truncated everything. */ - if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) + if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + xfs_inode_clear_cowblocks_tag(ip); + } /* * Always re-log the inode so that our permanent transaction can keep diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index b8d64d520e12..68640fb63a54 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -116,6 +116,7 @@ typedef __u32 xfs_nlink_t; #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val #define xfs_eofb_secs xfs_params.eofb_timer.val +#define xfs_cowb_secs xfs_params.cowb_timer.val #define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40fedc00b30d..fc7873942bea 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1049,6 +1049,7 @@ xfs_unmountfs( int error; cancel_delayed_work_sync(&mp->m_eofblocks_work); + cancel_delayed_work_sync(&mp->m_cowblocks_work); xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0be14a76216a..819b80b15bfb 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -164,6 +164,8 @@ typedef struct xfs_mount { struct delayed_work m_reclaim_work; /* background inode reclaim */ struct delayed_work m_eofblocks_work; /* background eof blocks trimming */ + struct delayed_work m_cowblocks_work; /* background cow blocks + trimming */ bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e92ccd316391..685c419ae011 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -304,6 +304,9 @@ retry: goto out_unlock; } + if (end_fsb != orig_end_fsb) + xfs_inode_set_cowblocks_tag(ip); + trace_xfs_reflink_cow_alloc(ip, &got); done: *offset_fsb = end_fsb; @@ -1562,6 +1565,7 @@ next: /* Clear the inode flag. */ trace_xfs_reflink_unset_inode_flag(ip); ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + xfs_inode_clear_cowblocks_tag(ip); xfs_trans_ijoin(*tpp, ip, 0); xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); @@ -1662,3 +1666,37 @@ out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } + +/* + * Does this inode have any real CoW reservations? + */ +bool +xfs_reflink_has_real_cow_blocks( + struct xfs_inode *ip) +{ + struct xfs_bmbt_irec irec; + struct xfs_ifork *ifp; + struct xfs_bmbt_rec_host *gotp; + xfs_extnum_t idx; + + if (!xfs_is_reflink_inode(ip)) + return false; + + /* Go find the old extent in the CoW fork. */ + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); + while (gotp) { + xfs_bmbt_get_all(gotp, &irec); + + if (!isnullstartblock(irec.br_startblock)) + return true; + + /* Roll on... */ + idx++; + if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + break; + gotp = xfs_iext_get_ext(ifp, idx); + } + + return false; +} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index bade5a61f3b0..5dc3c8ac12aa 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -53,4 +53,6 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); +extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); + #endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 90a8fd724abb..15b9fcd16c67 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1531,6 +1531,7 @@ xfs_fs_fill_super( atomic_set(&mp->m_active_trans, 0); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); mp->m_kobj.kobject.kset = xfs_kset; mp->m_super = sb; diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index aed74d3f8da9..afe1f66aaa69 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -184,6 +184,15 @@ static struct ctl_table xfs_table[] = { .extra1 = &xfs_params.eofb_timer.min, .extra2 = &xfs_params.eofb_timer.max, }, + { + .procname = "speculative_cow_prealloc_lifetime", + .data = &xfs_params.cowb_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.cowb_timer.min, + .extra2 = &xfs_params.cowb_timer.max, + }, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS { diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index ffef45375754..984a3499cfe3 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -48,6 +48,7 @@ typedef struct xfs_param { xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ xfs_sysctl_val_t eofb_timer; /* Interval between eofb scan wakeups */ + xfs_sysctl_val_t cowb_timer; /* Interval between cowb scan wakeups */ } xfs_param_t; /* diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5c10b12170d0..263dab10c982 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -136,6 +136,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); +DEFINE_PERAG_REF_EVENT(xfs_perag_set_cowblocks); +DEFINE_PERAG_REF_EVENT(xfs_perag_clear_cowblocks); DECLARE_EVENT_CLASS(xfs_ag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), @@ -687,6 +689,9 @@ DEFINE_INODE_EVENT(xfs_dquot_dqdetach); DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); +DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); +DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); +DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); -- cgit v1.2.3 From e153aa7990a09a8a12860fc1f79304b02a6bc03f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:49 -0700 Subject: xfs: set a default CoW extent size of 32 blocks If the admin doesn't set a CoW extent size or a regular extent size hint, default to creating CoW reservations 32 blocks long to reduce fragmentation. Signed-off-by: DarricK J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 10 ++++++---- fs/xfs/xfs_inode.h | 3 +++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 89e6441f963f..3332fb695cfd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -80,7 +80,8 @@ xfs_get_extsz_hint( /* * Helper function to extract CoW extent size hint from inode. * Between the extent size hint and the CoW extent size hint, we - * return the greater of the two. + * return the greater of the two. If the value is zero (automatic), + * use the default size. */ xfs_extlen_t xfs_get_cowextsz_hint( @@ -93,9 +94,10 @@ xfs_get_cowextsz_hint( a = ip->i_d.di_cowextsize; b = xfs_get_extsz_hint(ip); - if (a > b) - return a; - return b; + a = max(a, b); + if (a == 0) + return XFS_DEFAULT_COWEXTSZ_HINT; + return a; } /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6d63dc031127..f14c1de2549d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -490,4 +490,7 @@ do { \ extern struct kmem_zone *xfs_inode_zone; +/* The default CoW extent size hint. */ +#define XFS_DEFAULT_COWEXTSZ_HINT 32 + #endif /* __XFS_INODE_H__ */ -- cgit v1.2.3