summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/Makefile3
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/file.c1
-rw-r--r--fs/afs/inode.c7
-rw-r--r--fs/afs/internal.h5
-rw-r--r--fs/afs/mntpt.c1
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/security.c5
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/xattr.c121
-rw-r--r--fs/aio.c15
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/block_dev.c31
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c57
-rw-r--r--fs/btrfs/compression.c138
-rw-r--r--fs/btrfs/compression.h48
-rw-r--r--fs/btrfs/ctree.c42
-rw-r--r--fs/btrfs/ctree.h90
-rw-r--r--fs/btrfs/delayed-ref.c29
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/dir-item.c76
-rw-r--r--fs/btrfs/disk-io.c247
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/export.c5
-rw-r--r--fs/btrfs/extent-tree.c493
-rw-r--r--fs/btrfs/extent_io.c276
-rw-r--r--fs/btrfs/extent_io.h82
-rw-r--r--fs/btrfs/file-item.c45
-rw-r--r--fs/btrfs/file.c92
-rw-r--r--fs/btrfs/free-space-tree.c38
-rw-r--r--fs/btrfs/hash.c5
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c521
-rw-r--r--fs/btrfs/ioctl.c18
-rw-r--r--fs/btrfs/lzo.c33
-rw-r--r--fs/btrfs/ordered-data.c17
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/print-tree.c7
-rw-r--r--fs/btrfs/props.c7
-rw-r--r--fs/btrfs/qgroup.c225
-rw-r--r--fs/btrfs/qgroup.h9
-rw-r--r--fs/btrfs/raid56.c32
-rw-r--r--fs/btrfs/reada.c1
-rw-r--r--fs/btrfs/relocation.c17
-rw-r--r--fs/btrfs/root-tree.c7
-rw-r--r--fs/btrfs/scrub.c237
-rw-r--r--fs/btrfs/send.c112
-rw-r--r--fs/btrfs/super.c74
-rw-r--r--fs/btrfs/sysfs.c41
-rw-r--r--fs/btrfs/tests/extent-io-tests.c2
-rw-r--r--fs/btrfs/transaction.c25
-rw-r--r--fs/btrfs/tree-log.c44
-rw-r--r--fs/btrfs/volumes.c85
-rw-r--r--fs/btrfs/volumes.h7
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c20
-rw-r--r--fs/buffer.c167
-rw-r--r--fs/cachefiles/internal.h4
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/cachefiles/rdwr.c4
-rw-r--r--fs/ceph/acl.c1
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/inode.c5
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/cifs/Kconfig9
-rw-r--r--fs/cifs/cifs_unicode.c8
-rw-r--r--fs/cifs/file.c18
-rw-r--r--fs/cifs/inode.c1
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb1ops.c9
-rw-r--r--fs/cifs/smb2ops.c127
-rw-r--r--fs/cifs/smb2pdu.c52
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/smb2transport.c28
-rw-r--r--fs/cifs/transport.c7
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/coda/file.c4
-rw-r--r--fs/compat_ioctl.c25
-rw-r--r--fs/configfs/item.c8
-rw-r--r--fs/configfs/symlink.c3
-rw-r--r--fs/crypto/Kconfig1
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/crypto/crypto.c23
-rw-r--r--fs/crypto/fname.c9
-rw-r--r--fs/crypto/fscrypt_private.h9
-rw-r--r--fs/crypto/keyinfo.c173
-rw-r--r--fs/crypto/policy.c9
-rw-r--r--fs/dax.c20
-rw-r--r--fs/dcache.c60
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c12
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/eventfd.c6
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c35
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/file.c5
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext2/xattr.c48
-rw-r--r--fs/ext4/acl.c21
-rw-r--r--fs/ext4/ext4.h63
-rw-r--r--fs/ext4/ext4_jbd2.h23
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c42
-rw-r--r--fs/ext4/fsmap.c4
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/ialloc.c76
-rw-r--r--fs/ext4/indirect.c3
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c145
-rw-r--r--fs/ext4/mballoc.h6
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c131
-rw-r--r--fs/ext4/page-io.c15
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/ext4/super.c111
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c1699
-rw-r--r--fs/ext4/xattr.h35
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/checkpoint.c33
-rw-r--r--fs/f2fs/data.c225
-rw-r--r--fs/f2fs/dir.c3
-rw-r--r--fs/f2fs/extent_cache.c12
-rw-r--r--fs/f2fs/f2fs.h206
-rw-r--r--fs/f2fs/file.c183
-rw-r--r--fs/f2fs/gc.c40
-rw-r--r--fs/f2fs/inline.c22
-rw-r--r--fs/f2fs/inode.c17
-rw-r--r--fs/f2fs/namei.c71
-rw-r--r--fs/f2fs/node.c67
-rw-r--r--fs/f2fs/node.h6
-rw-r--r--fs/f2fs/segment.c238
-rw-r--r--fs/f2fs/segment.h4
-rw-r--r--fs/f2fs/super.c705
-rw-r--r--fs/f2fs/sysfs.c364
-rw-r--r--fs/fcntl.c304
-rw-r--r--fs/file.c22
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/filesystems.c4
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c7
-rw-r--r--fs/gfs2/glock.c135
-rw-r--r--fs/gfs2/glock.h7
-rw-r--r--fs/gfs2/glops.c56
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c19
-rw-r--r--fs/gfs2/log.c3
-rw-r--r--fs/gfs2/lops.c18
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/rgrp.c6
-rw-r--r--fs/gfs2/super.c33
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/xattr.c4
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c25
-rw-r--r--fs/iomap.c107
-rw-r--r--fs/jbd2/commit.c16
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c11
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/locks.c97
-rw-r--r--fs/mbcache.c52
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/minix/itree_common.c2
-rw-r--r--fs/mount.h1
-rw-r--r--fs/mpage.c5
-rw-r--r--fs/namei.c11
-rw-r--r--fs/namespace.c13
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/callback_xdr.c1
-rw-r--r--fs/nfs/dir.c51
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/super.c17
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/vfs.c73
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/netdebug.c1
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/stackglue.c2
-rw-r--r--fs/ocfs2/xattr.c23
-rw-r--r--fs/open.c4
-rw-r--r--fs/orangefs/orangefs-bufmap.c12
-rw-r--r--fs/overlayfs/copy_up.c33
-rw-r--r--fs/pnode.c212
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/pstore/inode.c22
-rw-r--r--fs/pstore/internal.h2
-rw-r--r--fs/pstore/platform.c69
-rw-r--r--fs/pstore/pmsg.c10
-rw-r--r--fs/pstore/ram.c16
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/read_write.c234
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/select.c48
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/splice.c2
-rw-r--r--fs/statfs.c60
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/timerfd.c43
-rw-r--r--fs/ufs/balloc.c44
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/inode.c74
-rw-r--r--fs/ufs/super.c73
-rw-r--r--fs/ufs/ufs_fs.h9
-rw-r--r--fs/ufs/util.c17
-rw-r--r--fs/ufs/util.h9
-rw-r--r--fs/userfaultfd.c71
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c26
-rw-r--r--fs/xfs/libxfs/xfs_attr.c26
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c13
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/libxfs/xfs_bit.h24
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c51
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c34
-rw-r--r--fs/xfs/libxfs/xfs_btree.c52
-rw-r--r--fs/xfs/libxfs/xfs_btree.h33
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h16
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c14
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h8
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c28
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h64
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h8
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c18
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/libxfs/xfs_format.h113
-rw-r--r--fs/xfs/libxfs/xfs_fs.h16
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c53
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c36
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c7
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h31
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h256
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h6
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c16
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h16
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c14
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h11
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c34
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_sb.c4
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h46
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_acl.c6
-rw-r--r--fs/xfs/xfs_acl.h1
-rw-r--r--fs/xfs/xfs_aops.c20
-rw-r--r--fs/xfs/xfs_attr.h3
-rw-r--r--fs/xfs/xfs_attr_list.c61
-rw-r--r--fs/xfs/xfs_bmap_item.c17
-rw-r--r--fs/xfs/xfs_bmap_util.c164
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_buf.c71
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c21
-rw-r--r--fs/xfs/xfs_dir2_readdir.c341
-rw-r--r--fs/xfs/xfs_discard.c4
-rw-r--r--fs/xfs/xfs_dquot.c85
-rw-r--r--fs/xfs/xfs_error.c319
-rw-r--r--fs/xfs/xfs_error.h44
-rw-r--r--fs/xfs/xfs_file.c412
-rw-r--r--fs/xfs/xfs_fsops.c16
-rw-r--r--fs/xfs/xfs_fsops.h4
-rw-r--r--fs/xfs/xfs_globals.c5
-rw-r--r--fs/xfs/xfs_icache.c61
-rw-r--r--fs/xfs/xfs_icache.h4
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h7
-rw-r--r--fs/xfs/xfs_ioctl.c27
-rw-r--r--fs/xfs/xfs_ioctl.h10
-rw-r--r--fs/xfs/xfs_ioctl32.h6
-rw-r--r--fs/xfs/xfs_iomap.c26
-rw-r--r--fs/xfs/xfs_iops.c6
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_itable.h2
-rw-r--r--fs/xfs/xfs_linux.h21
-rw-r--r--fs/xfs/xfs_log.c87
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c91
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c49
-rw-r--r--fs/xfs/xfs_message.c5
-rw-r--r--fs/xfs/xfs_mount.c30
-rw-r--r--fs/xfs/xfs_mount.h60
-rw-r--r--fs/xfs/xfs_qm.c28
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_quotaops.c1
-rw-r--r--fs/xfs/xfs_reflink.c101
-rw-r--r--fs/xfs/xfs_reflink.h8
-rw-r--r--fs/xfs/xfs_rtalloc.c8
-rw-r--r--fs/xfs/xfs_rtalloc.h3
-rw-r--r--fs/xfs/xfs_stats.c8
-rw-r--r--fs/xfs/xfs_stats.h190
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_symlink.c12
-rw-r--r--fs/xfs/xfs_symlink.h1
-rw-r--r--fs/xfs/xfs_sysctl.h1
-rw-r--r--fs/xfs/xfs_sysfs.c81
-rw-r--r--fs/xfs/xfs_trace.h40
-rw-r--r--fs/xfs/xfs_trans.h8
-rw-r--r--fs/xfs/xfs_trans_bmap.c11
-rw-r--r--fs/xfs/xfs_trans_buf.c21
-rw-r--r--fs/xfs/xfs_trans_rmap.c2
343 files changed, 9682 insertions, 5776 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 4f64b95d57bd..095c54165dfd 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -27,6 +27,7 @@ kafs-objs := \
vlocation.o \
vnode.o \
volume.o \
- write.o
+ write.o \
+ xattr.o
obj-$(CONFIG_AFS_FS) := kafs.o
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 949f960337f5..613a77058263 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -61,6 +61,7 @@ const struct inode_operations afs_dir_inode_operations = {
.permission = afs_permission,
.getattr = afs_getattr,
.setattr = afs_setattr,
+ .listxattr = afs_listxattr,
};
const struct dentry_operations afs_fs_dentry_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0d5b8508869b..510cba15fa56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -46,6 +46,7 @@ const struct inode_operations afs_file_inode_operations = {
.getattr = afs_getattr,
.setattr = afs_setattr,
.permission = afs_permission,
+ .listxattr = afs_listxattr,
};
const struct address_space_operations afs_fs_aops = {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index aae55dd15108..342316a9e3e0 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -28,6 +28,11 @@ struct afs_iget_data {
struct afs_volume *volume; /* volume on which resides */
};
+static const struct inode_operations afs_symlink_inode_operations = {
+ .get_link = page_get_link,
+ .listxattr = afs_listxattr,
+};
+
/*
* map the AFS file status to the inode member variables
*/
@@ -67,7 +72,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_fop = &afs_mntpt_file_operations;
} else {
inode->i_mode = S_IFLNK | vnode->status.mode;
- inode->i_op = &page_symlink_inode_operations;
+ inode->i_op = &afs_symlink_inode_operations;
}
inode_nohighmem(inode);
break;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 4e2556606623..82e16556afea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -731,6 +731,11 @@ extern int afs_writeback_all(struct afs_vnode *);
extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
+/*
+ * xattr.c
+ */
+extern const struct xattr_handler *afs_xattr_handlers[];
+extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
/*****************************************************************************/
/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index bd3b65cde282..690fea9d84c3 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -35,6 +35,7 @@ const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
.readlink = page_readlink,
.getattr = afs_getattr,
+ .listxattr = afs_listxattr,
};
const struct inode_operations afs_autocell_inode_operations = {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5990eb160bd..02781e78ffb6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -341,6 +341,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
size_t offset;
+ s64 tx_total_len;
u32 abort_code;
int ret;
@@ -364,9 +365,20 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
srx.transport.sin.sin_port = call->port;
memcpy(&srx.transport.sin.sin_addr, addr, 4);
+ /* Work out the length we're going to transmit. This is awkward for
+ * calls such as FS.StoreData where there's an extra injection of data
+ * after the initial fixed part.
+ */
+ tx_total_len = call->request_size;
+ if (call->send_pages) {
+ tx_total_len += call->last_to - call->first_offset;
+ tx_total_len += (call->last - call->first) * PAGE_SIZE;
+ }
+
/* create a call */
rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
- (unsigned long) call, gfp,
+ (unsigned long)call,
+ tx_total_len, gfp,
(async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter));
@@ -738,6 +750,8 @@ void afs_send_empty_reply(struct afs_call *call)
_enter("");
+ rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0);
+
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
@@ -772,6 +786,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
_enter("");
+ rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len);
+
iov[0].iov_base = (void *) buf;
iov[0].iov_len = len;
msg.msg_name = NULL;
diff --git a/fs/afs/security.c b/fs/afs/security.c
index ecb86a670180..faca66227ecf 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -327,12 +327,11 @@ int afs_permission(struct inode *inode, int mask)
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
} else if (mask & MAY_READ) {
- if (!(access & AFS_ACE_READ))
+ if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
} else if (mask & MAY_WRITE) {
if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
- AFS_ACE_INSERT | /* create, mkdir, symlink, rename to */
- AFS_ACE_WRITE))) /* chmod */
+ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
goto permission_denied;
} else {
BUG();
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c79633e5cfd8..67680c2d96cf 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -319,6 +319,7 @@ static int afs_fill_super(struct super_block *sb,
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = AFS_FS_MAGIC;
sb->s_op = &afs_super_ops;
+ sb->s_xattr = afs_xattr_handlers;
ret = super_setup_bdi(sb);
if (ret)
return ret;
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
new file mode 100644
index 000000000000..2830e4f48d85
--- /dev/null
+++ b/fs/afs/xattr.c
@@ -0,0 +1,121 @@
+/* Extended attribute handling for AFS. We use xattrs to get and set metadata
+ * instead of providing pioctl().
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+static const char afs_xattr_list[] =
+ "afs.cell\0"
+ "afs.fid\0"
+ "afs.volume";
+
+/*
+ * Retrieve a list of the supported xattrs.
+ */
+ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ if (size == 0)
+ return sizeof(afs_xattr_list);
+ if (size < sizeof(afs_xattr_list))
+ return -ERANGE;
+ memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list));
+ return sizeof(afs_xattr_list);
+}
+
+/*
+ * Get the name of the cell on which a file resides.
+ */
+static int afs_xattr_get_cell(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_cell *cell = vnode->volume->cell;
+ size_t namelen;
+
+ namelen = strlen(cell->name);
+ if (size == 0)
+ return namelen;
+ if (namelen > size)
+ return -ERANGE;
+ memcpy(buffer, cell->name, size);
+ return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_cell_handler = {
+ .name = "afs.cell",
+ .get = afs_xattr_get_cell,
+};
+
+/*
+ * Get the volume ID, vnode ID and vnode uniquifier of a file as a sequence of
+ * hex numbers separated by colons.
+ */
+static int afs_xattr_get_fid(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ char text[8 + 1 + 8 + 1 + 8 + 1];
+ size_t len;
+
+ len = sprintf(text, "%x:%x:%x",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+ if (size == 0)
+ return len;
+ if (len > size)
+ return -ERANGE;
+ memcpy(buffer, text, len);
+ return len;
+}
+
+static const struct xattr_handler afs_xattr_afs_fid_handler = {
+ .name = "afs.fid",
+ .get = afs_xattr_get_fid,
+};
+
+/*
+ * Get the name of the volume on which a file resides.
+ */
+static int afs_xattr_get_volume(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ const char *volname = vnode->volume->vlocation->vldb.name;
+ size_t namelen;
+
+ namelen = strlen(volname);
+ if (size == 0)
+ return namelen;
+ if (namelen > size)
+ return -ERANGE;
+ memcpy(buffer, volname, size);
+ return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_volume_handler = {
+ .name = "afs.volume",
+ .get = afs_xattr_get_volume,
+};
+
+const struct xattr_handler *afs_xattr_handlers[] = {
+ &afs_xattr_afs_cell_handler,
+ &afs_xattr_afs_fid_handler,
+ &afs_xattr_afs_volume_handler,
+ NULL
+};
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..dcad3a66748c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = iocb_flags(req->common.ki_filp);
+ req->common.ki_hint = file_write_hint(file);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
+ ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+ if (unlikely(ret)) {
+ pr_debug("EINVAL: aio_rw_flags\n");
+ goto out_put_req;
+ }
+
+ if ((req->common.ki_flags & IOCB_NOWAIT) &&
+ !(req->common.ki_flags & IOCB_DIRECT)) {
+ ret = -EOPNOTSUPP;
+ goto out_put_req;
+ }
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8d9676..dd9f1bebb5a3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
int status;
token = (autofs_wqt_t) param->fail.token;
- status = param->fail.status ? param->fail.status : -ENOENT;
+ status = param->fail.status < 0 ? param->fail.status : -ENOENT;
return autofs4_wait_release(sbi, token, status);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..9941dc8342df 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_init(&bio, vecs, nr_pages);
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
@@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
if (vecs != inline_vecs)
kfree(vecs);
- if (unlikely(bio.bi_error))
- return bio.bi_error;
+ if (unlikely(bio.bi_status))
+ ret = blk_status_to_errno(bio.bi_status);
+
+ bio_uninit(&bio);
+
return ret;
}
@@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
bool should_dirty = dio->should_dirty;
if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_error && !dio->bio.bi_error)
- dio->bio.bi_error = bio->bi_error;
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
} else {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
- ssize_t ret = dio->bio.bi_error;
+ ssize_t ret;
- if (likely(!ret)) {
+ if (likely(!dio->bio.bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
}
dio->iocb->ki_complete(iocb, ret, 0);
@@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- int ret;
+ int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
for (;;) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_error = ret;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
}
@@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
}
__set_current_state(TASK_RUNNING);
- ret = dio->bio.bi_error;
+ if (!ret)
+ ret = blk_status_to_errno(dio->bio.bi_status);
if (likely(!ret))
ret = dio->size;
@@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+ blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
if (!blkdev_dio_pool)
return -ENOMEM;
return 0;
@@ -624,7 +632,7 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
struct block_device *bdev = I_BDEV(bd_inode);
int error;
- error = filemap_write_and_wait_range(filp->f_mapping, start, end);
+ error = file_write_and_wait_range(filp, start, end);
if (error)
return error;
@@ -1743,6 +1751,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return -ENOMEM;
filp->f_mapping = bdev->bd_inode->i_mapping;
+ filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
return blkdev_get(bdev, filp->f_mode, filp);
}
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 247b8dfaf6e5..8d8370ddb6b2 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -78,12 +78,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
switch (type) {
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
- if (acl) {
- ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (ret)
- return ret;
- }
- ret = 0;
break;
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
@@ -119,6 +113,13 @@ out:
int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ int ret;
+
+ if (type == ACL_TYPE_ACCESS && acl) {
+ ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (ret)
+ return ret;
+ }
return __btrfs_set_acl(NULL, inode, acl, type);
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 24865da63d8f..f723c11bb763 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -16,7 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/rbtree.h>
#include "ctree.h"
#include "disk-io.h"
@@ -2305,7 +2305,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes)
size_t alloc_bytes;
alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = vmalloc(alloc_bytes);
+ data = kvmalloc(alloc_bytes, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
@@ -2339,9 +2339,9 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
if (IS_ERR(fspath))
return (void *)fspath;
- ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
+ ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
if (!ifp) {
- vfree(fspath);
+ kvfree(fspath);
return ERR_PTR(-ENOMEM);
}
@@ -2356,6 +2356,6 @@ void free_ipath(struct inode_fs_paths *ipath)
{
if (!ipath)
return;
- vfree(ipath->fspath);
+ kvfree(ipath->fspath);
kfree(ipath);
}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
- int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+ blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+ blk_status_t);
};
/*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..11d37c94ce05 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -94,7 +94,7 @@
#include <linux/mutex.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/string.h>
#include "ctree.h"
#include "disk-io.h"
@@ -1638,12 +1638,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
struct bio *bio;
unsigned int j;
- bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
- if (!bio) {
- pr_info("btrfsic: bio_alloc() for %u pages failed!\n",
- num_pages - i);
- return -1;
- }
+ bio = btrfs_io_bio_alloc(num_pages - i);
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
@@ -1668,14 +1663,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
dev_bytenr += (j - i) * PAGE_SIZE;
i = j;
}
- for (i = 0; i < num_pages; i++) {
+ for (i = 0; i < num_pages; i++)
block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
- if (!block_ctx->datav[i]) {
- pr_info("btrfsic: kmap() failed (dev %s)!\n",
- block_ctx->dev->name);
- return -1;
- }
- }
return block_ctx->len;
}
@@ -2129,7 +2118,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
/* mutex is not held! This is not save if IO is not yet completed
* on umount */
iodone_w_error = 0;
- if (bp->bi_error)
+ if (bp->bi_status)
iodone_w_error = 1;
BUG_ON(NULL == block);
@@ -2143,7 +2132,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bp->bi_error,
+ bp->bi_status,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->dev_bytenr, block->mirror_num);
@@ -2822,44 +2811,47 @@ static void __btrfsic_submit_bio(struct bio *bio)
dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
- unsigned int i;
+ unsigned int i = 0;
u64 dev_bytenr;
u64 cur_bytenr;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
int bio_is_patched;
char **mapped_datav;
+ unsigned int segs = bio_segments(bio);
dev_bytenr = 512 * bio->bi_iter.bi_sector;
bio_is_patched = 0;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
- bio_op(bio), bio->bi_opf, bio->bi_vcnt,
+ bio_op(bio), bio->bi_opf, segs,
(unsigned long long)bio->bi_iter.bi_sector,
dev_bytenr, bio->bi_bdev);
- mapped_datav = kmalloc_array(bio->bi_vcnt,
+ mapped_datav = kmalloc_array(segs,
sizeof(*mapped_datav), GFP_NOFS);
if (!mapped_datav)
goto leave;
cur_bytenr = dev_bytenr;
- bio_for_each_segment_all(bvec, bio, i) {
- BUG_ON(bvec->bv_len != PAGE_SIZE);
- mapped_datav[i] = kmap(bvec->bv_page);
+ bio_for_each_segment(bvec, bio, iter) {
+ BUG_ON(bvec.bv_len != PAGE_SIZE);
+ mapped_datav[i] = kmap(bvec.bv_page);
+ i++;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
- i, cur_bytenr, bvec->bv_len, bvec->bv_offset);
- cur_bytenr += bvec->bv_len;
+ i, cur_bytenr, bvec.bv_len, bvec.bv_offset);
+ cur_bytenr += bvec.bv_len;
}
btrfsic_process_written_block(dev_state, dev_bytenr,
- mapped_datav, bio->bi_vcnt,
+ mapped_datav, segs,
bio, &bio_is_patched,
NULL, bio->bi_opf);
- bio_for_each_segment_all(bvec, bio, i)
- kunmap(bvec->bv_page);
+ bio_for_each_segment(bvec, bio, iter)
+ kunmap(bvec.bv_page);
kfree(mapped_datav);
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
@@ -2923,13 +2915,10 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
fs_info->sectorsize, PAGE_SIZE);
return -1;
}
- state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
- state = vzalloc(sizeof(*state));
- if (!state) {
- pr_info("btrfs check-integrity: vzalloc() failed!\n");
- return -1;
- }
+ pr_info("btrfs check-integrity: allocation failed!\n");
+ return -1;
}
if (!btrfsic_is_initialized) {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..2c0b7b57fcd5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,6 +32,7 @@
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -42,48 +43,7 @@
#include "extent_io.h"
#include "extent_map.h"
-struct compressed_bio {
- /* number of bios pending for this compressed extent */
- refcount_t pending_bios;
-
- /* the pages with the compressed data on them */
- struct page **compressed_pages;
-
- /* inode that owns this data */
- struct inode *inode;
-
- /* starting offset in the inode for our pages */
- u64 start;
-
- /* number of bytes in the inode we're working on */
- unsigned long len;
-
- /* number of bytes on disk */
- unsigned long compressed_len;
-
- /* the compression algorithm for this bio */
- int compress_type;
-
- /* number of compressed pages in the array */
- unsigned long nr_pages;
-
- /* IO errors */
- int errors;
- int mirror_num;
-
- /* for reads, this is the bio we are copying the data into */
- struct bio *orig_bio;
-
- /*
- * the start of a variable length array of checksums only
- * used by reads
- */
- u32 sums;
-};
-
-static int btrfs_decompress_bio(int type, struct page **pages_in,
- u64 disk_start, struct bio *orig_bio,
- size_t srclen);
+static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
unsigned long disk_size)
@@ -94,12 +54,6 @@ static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
}
-static struct bio *compressed_bio_alloc(struct block_device *bdev,
- u64 first_byte, gfp_t gfp_flags)
-{
- return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
-}
-
static int check_compressed_csum(struct btrfs_inode *inode,
struct compressed_bio *cb,
u64 disk_start)
@@ -155,7 +109,7 @@ static void end_compressed_bio_read(struct bio *bio)
unsigned long index;
int ret;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -173,11 +127,8 @@ static void end_compressed_bio_read(struct bio *bio)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- ret = btrfs_decompress_bio(cb->compress_type,
- cb->compressed_pages,
- cb->start,
- cb->orig_bio,
- cb->compressed_len);
+ ret = btrfs_decompress_bio(cb);
+
csum_failed:
if (ret)
cb->errors = 1;
@@ -268,7 +219,7 @@ static void end_compressed_bio_write(struct bio *bio)
struct page *page;
unsigned long index;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -287,7 +238,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ bio->bi_status ? 0 : 1);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
@@ -320,7 +271,7 @@ out:
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
@@ -335,13 +286,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
- int ret;
+ blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_SIZE - 1));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -355,11 +306,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = fs_info->fs_devices->latest_bdev;
- bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- if (!bio) {
- kfree(cb);
- return -ENOMEM;
- }
+ bio = btrfs_bio_alloc(bdev, first_byte);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
@@ -368,17 +315,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+ int submit = 0;
+
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(page, 0,
+ submit = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
@@ -400,14 +347,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
bio_put(bio);
- bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- BUG_ON(!bio);
+ bio = btrfs_bio_alloc(bdev, first_byte);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
@@ -434,7 +380,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
@@ -569,7 +515,7 @@ next:
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +532,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret = -ENOMEM;
+ blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
u32 *sums;
@@ -600,7 +546,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em)
- return -EIO;
+ return BLK_STS_IOERR;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +584,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
__GFP_HIGHMEM);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
- ret = -ENOMEM;
+ ret = BLK_STS_RESOURCE;
goto fail2;
}
}
@@ -650,28 +596,26 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
- comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
- if (!comp_bio)
- goto fail2;
+ comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+ int submit = 0;
+
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(page, 0,
+ submit = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
@@ -697,15 +641,13 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
bio_put(comp_bio);
- comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
- GFP_NOFS);
- BUG_ON(!comp_bio);
+ comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -726,7 +668,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
@@ -801,6 +743,7 @@ static struct list_head *find_workspace(int type)
struct list_head *workspace;
int cpus = num_online_cpus();
int idx = type - 1;
+ unsigned nofs_flag;
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
@@ -830,7 +773,15 @@ again:
atomic_inc(total_ws);
spin_unlock(ws_lock);
+ /*
+ * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have
+ * to turn it off here because we might get called from the restricted
+ * context of btrfs_compress_bio/btrfs_compress_pages
+ */
+ nofs_flag = memalloc_nofs_save();
workspace = btrfs_compress_op[idx]->alloc_workspace();
+ memalloc_nofs_restore(nofs_flag);
+
if (IS_ERR(workspace)) {
atomic_dec(total_ws);
wake_up(ws_wait);
@@ -961,19 +912,16 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
* be contiguous. They all correspond to the range of bytes covered by
* the compressed extent.
*/
-static int btrfs_decompress_bio(int type, struct page **pages_in,
- u64 disk_start, struct bio *orig_bio,
- size_t srclen)
+static int btrfs_decompress_bio(struct compressed_bio *cb)
{
struct list_head *workspace;
int ret;
+ int type = cb->compress_type;
workspace = find_workspace(type);
-
- ret = btrfs_compress_op[type-1]->decompress_bio(workspace, pages_in,
- disk_start, orig_bio,
- srclen);
+ ret = btrfs_compress_op[type - 1]->decompress_bio(workspace, cb);
free_workspace(type, workspace);
+
return ret;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..87f6d3332163 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -34,6 +34,45 @@
/* Maximum size of data before compression */
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
+struct compressed_bio {
+ /* number of bios pending for this compressed extent */
+ refcount_t pending_bios;
+
+ /* the pages with the compressed data on them */
+ struct page **compressed_pages;
+
+ /* inode that owns this data */
+ struct inode *inode;
+
+ /* starting offset in the inode for our pages */
+ u64 start;
+
+ /* number of bytes in the inode we're working on */
+ unsigned long len;
+
+ /* number of bytes on disk */
+ unsigned long compressed_len;
+
+ /* the compression algorithm for this bio */
+ int compress_type;
+
+ /* number of compressed pages in the array */
+ unsigned long nr_pages;
+
+ /* IO errors */
+ int errors;
+ int mirror_num;
+
+ /* for reads, this is the bio we are copying the data into */
+ struct bio *orig_bio;
+
+ /*
+ * the start of a variable length array of checksums only
+ * used by reads
+ */
+ u32 sums;
+};
+
void btrfs_init_compress(void);
void btrfs_exit_compress(void);
@@ -48,12 +87,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
enum btrfs_compression_type {
@@ -78,10 +117,7 @@ struct btrfs_compress_op {
unsigned long *total_out);
int (*decompress_bio)(struct list_head *workspace,
- struct page **pages_in,
- u64 disk_start,
- struct bio *orig_bio,
- size_t srclen);
+ struct compressed_bio *cb);
int (*decompress)(struct list_head *workspace,
unsigned char *data_in,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a3a75f1de002..3f4daa9d6e2c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -19,7 +19,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -3667,14 +3667,14 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
/* make room in the right data area */
data_end = leaf_data_end(fs_info, right);
memmove_extent_buffer(right,
- btrfs_leaf_data(right) + data_end - push_space,
- btrfs_leaf_data(right) + data_end,
+ BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
+ BTRFS_LEAF_DATA_OFFSET + data_end,
BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
/* copy from the left data area */
- copy_extent_buffer(right, left, btrfs_leaf_data(right) +
+ copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
- btrfs_leaf_data(left) + leaf_data_end(fs_info, left),
+ BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left),
push_space);
memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
@@ -3888,9 +3888,9 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
btrfs_item_offset_nr(right, push_items - 1);
- copy_extent_buffer(left, right, btrfs_leaf_data(left) +
+ copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
leaf_data_end(fs_info, left) - push_space,
- btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_OFFSET +
btrfs_item_offset_nr(right, push_items - 1),
push_space);
old_left_nritems = btrfs_header_nritems(left);
@@ -3917,9 +3917,9 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
if (push_items < right_nritems) {
push_space = btrfs_item_offset_nr(right, push_items - 1) -
leaf_data_end(fs_info, right);
- memmove_extent_buffer(right, btrfs_leaf_data(right) +
+ memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
- btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_OFFSET +
leaf_data_end(fs_info, right), push_space);
memmove_extent_buffer(right, btrfs_item_nr_offset(0),
@@ -4069,8 +4069,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
nritems * sizeof(struct btrfs_item));
copy_extent_buffer(right, l,
- btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(fs_info) -
- data_copy_size, btrfs_leaf_data(l) +
+ BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
+ data_copy_size, BTRFS_LEAF_DATA_OFFSET +
leaf_data_end(fs_info, l), data_copy_size);
rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
@@ -4607,8 +4607,8 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
/* shift the data */
if (from_end) {
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end + size_diff, btrfs_leaf_data(leaf) +
+ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
+ data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
data_end, old_data_start + new_size - data_end);
} else {
struct btrfs_disk_key disk_key;
@@ -4634,8 +4634,8 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
}
}
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end + size_diff, btrfs_leaf_data(leaf) +
+ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
+ data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
data_end, old_data_start - data_end);
offset = btrfs_disk_key_offset(&disk_key);
@@ -4707,8 +4707,8 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
}
/* shift the data */
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end - data_size, btrfs_leaf_data(leaf) +
+ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
+ data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
data_end, old_data - data_end);
data_end = old_data;
@@ -4790,8 +4790,8 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
(nritems - slot) * sizeof(struct btrfs_item));
/* shift the data */
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end - total_data, btrfs_leaf_data(leaf) +
+ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
+ data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
data_end, old_data - data_end);
data_end = old_data;
}
@@ -4983,9 +4983,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (slot + nr != nritems) {
int data_end = leaf_data_end(fs_info, leaf);
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize,
- btrfs_leaf_data(leaf) + data_end,
+ BTRFS_LEAF_DATA_OFFSET + data_end,
last_off - data_end);
for (i = slot + nr; i < nritems; i++) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4f8f75d9e839..3f3eb7b17cac 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -48,7 +48,6 @@ struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
extern struct kmem_cache *btrfs_trans_handle_cachep;
-extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
@@ -716,6 +715,10 @@ struct btrfs_delayed_root;
#define BTRFS_FS_BTREE_ERR 11
#define BTRFS_FS_LOG1_ERR 12
#define BTRFS_FS_LOG2_ERR 13
+#define BTRFS_FS_QUOTA_OVERRIDE 14
+/* Used to record internally whether fs has been frozen */
+#define BTRFS_FS_FROZEN 15
+
/*
* Indicate that a whole-filesystem exclusive operation is running
* (device replace, resize, device add/delete, balance)
@@ -748,8 +751,7 @@ struct btrfs_fs_info {
struct rb_root block_group_cache_tree;
/* keep track of unallocated space */
- spinlock_t free_chunk_lock;
- u64 free_chunk_space;
+ atomic64_t free_chunk_space;
struct extent_io_tree freed_extents[2];
struct extent_io_tree *pinned_extents;
@@ -797,17 +799,7 @@ struct btrfs_fs_info {
* so it is also safe.
*/
u64 max_inline;
- /*
- * Protected by ->chunk_mutex and sb->s_umount.
- *
- * The reason that we use two lock to protect it is because only
- * remount and mount operations can change it and these two operations
- * are under sb->s_umount, but the read side (chunk allocation) can not
- * acquire sb->s_umount or the deadlock would happen. So we use two
- * locks to protect it. On the write side, we must acquire two locks,
- * and on the read side, we just need acquire one of them.
- */
- u64 alloc_start;
+
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
@@ -1107,9 +1099,6 @@ struct btrfs_fs_info {
*/
struct list_head pinned_chunks;
- /* Used to record internally whether fs has been frozen */
- int fs_frozen;
-
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
@@ -1277,21 +1266,20 @@ struct btrfs_root {
/* For qgroup metadata space reserve */
atomic64_t qgroup_meta_rsv;
};
+
static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
{
return btrfs_sb(inode->i_sb)->sectorsize;
}
-static inline u32 __BTRFS_LEAF_DATA_SIZE(u32 blocksize)
-{
- return blocksize - sizeof(struct btrfs_header);
-}
-
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
{
- return __BTRFS_LEAF_DATA_SIZE(info->nodesize);
+
+ return info->nodesize - sizeof(struct btrfs_header);
}
+#define BTRFS_LEAF_DATA_OFFSET offsetof(struct btrfs_leaf, items)
+
static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
{
return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
@@ -1553,8 +1541,27 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
s->member = cpu_to_le##bits(val); \
}
+
+static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb,
+ struct btrfs_dev_item *s)
+{
+ BUILD_BUG_ON(sizeof(u64) !=
+ sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+ return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
+ total_bytes));
+}
+static inline void btrfs_set_device_total_bytes(struct extent_buffer *eb,
+ struct btrfs_dev_item *s,
+ u64 val)
+{
+ BUILD_BUG_ON(sizeof(u64) !=
+ sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+ WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
+ btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
+}
+
+
BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
-BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
@@ -2324,10 +2331,6 @@ static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
return btrfs_csum_sizes[t];
}
-static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
-{
- return offsetof(struct btrfs_leaf, items);
-}
/*
* The leaf data grows from end-to-front in the node.
@@ -2538,11 +2541,11 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
/* helper function to cast into the data area of the leaf. */
#define btrfs_item_ptr(leaf, slot, type) \
- ((type *)(btrfs_leaf_data(leaf) + \
+ ((type *)(BTRFS_LEAF_DATA_OFFSET + \
btrfs_item_offset_nr(leaf, slot)))
#define btrfs_item_ptr_offset(leaf, slot) \
- ((unsigned long)(btrfs_leaf_data(leaf) + \
+ ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
btrfs_item_offset_nr(leaf, slot)))
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
@@ -2680,7 +2683,9 @@ void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
+u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info);
+u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info);
+u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
enum btrfs_reserve_flush_enum {
@@ -2703,9 +2708,13 @@ enum btrfs_flush_state {
COMMIT_TRANS = 6,
};
-int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
-void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
+int btrfs_check_data_free_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len);
+void btrfs_free_reserved_data_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len);
+void btrfs_delalloc_release_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -2722,8 +2731,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
-void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
+int btrfs_delalloc_reserve_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
@@ -3031,12 +3040,14 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
const char *name, u16 name_len,
int mod);
int verify_dir_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
+ struct extent_buffer *leaf, int slot,
struct btrfs_dir_item *dir_item);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
const char *name,
int name_len);
+bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
+ unsigned long start, u16 name_len);
/* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -3078,8 +3089,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3094,7 +3105,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
@@ -3171,6 +3182,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
+void btrfs_set_range_writeback(void *private_data, u64 start, u64 end);
int btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index be70d90dfee5..93ffa898df6d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -470,7 +470,8 @@ add_tail:
static noinline void
update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
+ struct btrfs_delayed_ref_node *update,
+ int *old_ref_mod_ret)
{
struct btrfs_delayed_ref_head *existing_ref;
struct btrfs_delayed_ref_head *ref;
@@ -523,6 +524,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
* currently, for refs we just added we know we're a-ok.
*/
old_ref_mod = existing_ref->total_ref_mod;
+ if (old_ref_mod_ret)
+ *old_ref_mod_ret = old_ref_mod;
existing->ref_mod += update->ref_mod;
existing_ref->total_ref_mod += update->ref_mod;
@@ -550,7 +553,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data, int *qrecord_inserted_ret)
+ int action, int is_data, int *qrecord_inserted_ret,
+ int *old_ref_mod, int *new_ref_mod)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
@@ -638,7 +642,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
if (existing) {
WARN_ON(ref_root && reserved && existing->qgroup_ref_root
&& existing->qgroup_reserved);
- update_existing_head_ref(delayed_refs, &existing->node, ref);
+ update_existing_head_ref(delayed_refs, &existing->node, ref,
+ old_ref_mod);
/*
* we've updated the existing ref, free the newly
* allocated ref
@@ -646,6 +651,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
head_ref = existing;
} else {
+ if (old_ref_mod)
+ *old_ref_mod = 0;
if (is_data && count_mod < 0)
delayed_refs->pending_csums += num_bytes;
delayed_refs->num_heads++;
@@ -655,6 +662,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
}
if (qrecord_inserted_ret)
*qrecord_inserted_ret = qrecord_inserted;
+ if (new_ref_mod)
+ *new_ref_mod = head_ref->total_ref_mod;
return head_ref;
}
@@ -778,7 +787,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_extent_op *extent_op,
+ int *old_ref_mod, int *new_ref_mod)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@@ -813,7 +823,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, 0, 0, action, 0,
- &qrecord_inserted);
+ &qrecord_inserted, old_ref_mod,
+ new_ref_mod);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
@@ -838,7 +849,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action)
+ u64 owner, u64 offset, u64 reserved, int action,
+ int *old_ref_mod, int *new_ref_mod)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@@ -878,7 +890,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, ref_root, reserved,
- action, 1, &qrecord_inserted);
+ action, 1, &qrecord_inserted,
+ old_ref_mod, new_ref_mod);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
@@ -909,7 +922,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data, NULL);
+ extent_op->is_data, NULL, NULL, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c0264ff01b53..ce88e4ac5276 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -247,12 +247,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op);
+ struct btrfs_delayed_extent_op *extent_op,
+ int *old_ref_mod, int *new_ref_mod);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action);
+ u64 owner, u64 offset, u64 reserved, int action,
+ int *old_ref_mod, int *new_ref_mod);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5fe1ca8abc70..bee3edeea7a3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -388,7 +388,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
if (ret)
btrfs_err(fs_info, "kobj add dev failed %d", ret);
- btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
- btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c24d615e3d7f..41cb9196eaa8 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -395,8 +395,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
leaf = path->nodes[0];
dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
- if (verify_dir_item(fs_info, leaf, dir_item))
- return NULL;
total_len = btrfs_item_size_nr(leaf, path->slots[0]);
while (cur < total_len) {
@@ -405,6 +403,8 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
btrfs_dir_data_len(leaf, dir_item);
name_ptr = (unsigned long)(dir_item + 1);
+ if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item))
+ return NULL;
if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
return dir_item;
@@ -453,9 +453,11 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
+ int slot,
struct btrfs_dir_item *dir_item)
{
u16 namelen = BTRFS_NAME_LEN;
+ int ret;
u8 type = btrfs_dir_type(leaf, dir_item);
if (type >= BTRFS_FT_MAX) {
@@ -472,6 +474,12 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
return 1;
}
+ namelen = btrfs_dir_name_len(leaf, dir_item);
+ ret = btrfs_is_name_len_valid(leaf, slot,
+ (unsigned long)(dir_item + 1), namelen);
+ if (!ret)
+ return 1;
+
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
if ((btrfs_dir_data_len(leaf, dir_item) +
btrfs_dir_name_len(leaf, dir_item)) >
@@ -484,3 +492,67 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
return 0;
}
+
+bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
+ unsigned long start, u16 name_len)
+{
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_key key;
+ u32 read_start;
+ u32 read_end;
+ u32 item_start;
+ u32 item_end;
+ u32 size;
+ bool ret = true;
+
+ ASSERT(start > BTRFS_LEAF_DATA_OFFSET);
+
+ read_start = start - BTRFS_LEAF_DATA_OFFSET;
+ read_end = read_start + name_len;
+ item_start = btrfs_item_offset_nr(leaf, slot);
+ item_end = btrfs_item_end_nr(leaf, slot);
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ switch (key.type) {
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ size = sizeof(struct btrfs_dir_item);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ size = sizeof(struct btrfs_inode_ref);
+ break;
+ case BTRFS_INODE_EXTREF_KEY:
+ size = sizeof(struct btrfs_inode_extref);
+ break;
+ case BTRFS_ROOT_REF_KEY:
+ case BTRFS_ROOT_BACKREF_KEY:
+ size = sizeof(struct btrfs_root_ref);
+ break;
+ default:
+ ret = false;
+ goto out;
+ }
+
+ if (read_start < item_start) {
+ ret = false;
+ goto out;
+ }
+ if (read_end > item_end) {
+ ret = false;
+ goto out;
+ }
+
+ /* there shall be item(s) before name */
+ if (read_start - item_start < size) {
+ ret = false;
+ goto out;
+ }
+
+out:
+ if (!ret)
+ btrfs_crit(fs_info, "invalid dir item name len: %u",
+ (unsigned int)name_len);
+ return ret;
+}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dcb20e6..086dcbadce09 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,9 +87,8 @@ struct btrfs_end_io_wq {
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
- int error;
+ blk_status_t status;
enum btrfs_wq_endio_type metadata;
- struct list_head list;
struct btrfs_work work;
};
@@ -118,9 +117,9 @@ void btrfs_end_io_wq_exit(void)
* just before they are sent down the IO stack.
*/
struct async_submit_bio {
- struct inode *inode;
+ void *private_data;
+ struct btrfs_fs_info *fs_info;
struct bio *bio;
- struct list_head list;
extent_submit_bio_hook_t *submit_bio_start;
extent_submit_bio_hook_t *submit_bio_done;
int mirror_num;
@@ -131,7 +130,7 @@ struct async_submit_bio {
*/
u64 bio_offset;
struct btrfs_work work;
- int error;
+ blk_status_t status;
};
/*
@@ -799,7 +798,7 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = bio->bi_error;
+ end_io_wq->status = bio->bi_status;
if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +835,19 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_queue_work(wq, &end_io_wq->work);
}
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
- end_io_wq->error = 0;
+ end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
@@ -868,14 +867,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
- int ret;
+ blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
- ret = async->submit_bio_start(async->inode, async->bio,
+ ret = async->submit_bio_start(async->private_data, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
- async->error = ret;
+ async->status = ret;
}
static void run_one_async_done(struct btrfs_work *work)
@@ -885,7 +884,7 @@ static void run_one_async_done(struct btrfs_work *work)
int limit;
async = container_of(work, struct async_submit_bio, work);
- fs_info = BTRFS_I(async->inode)->root->fs_info;
+ fs_info = async->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
@@ -898,13 +897,13 @@ static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */
- if (async->error) {
- async->bio->bi_error = async->error;
+ if (async->status) {
+ async->bio->bi_status = async->status;
bio_endio(async->bio);
return;
}
- async->submit_bio_done(async->inode, async->bio, async->mirror_num,
+ async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
async->bio_flags, async->bio_offset);
}
@@ -916,20 +915,20 @@ static void run_one_async_free(struct btrfs_work *work)
kfree(async);
}
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset, void *private_data,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done)
{
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
- async->inode = inode;
+ async->private_data = private_data;
+ async->fs_info = fs_info;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
- async->error = 0;
+ async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
return 0;
}
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
{
struct bio_vec *bvec;
struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
break;
}
- return ret;
+ return errno_to_blk_status(ret);
}
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
@@ -986,11 +985,12 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret;
+ struct inode *inode = private_data;
+ blk_status_t ret;
/*
* when we're called for a write, we're already in the async
@@ -998,7 +998,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
*/
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1015,13 +1015,14 @@ static int check_async_write(unsigned long bio_flags)
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
+ struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
- int ret;
+ blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
/*
@@ -1043,8 +1044,8 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
* kthread helpers are used to submit writes so that
* checksumming can happen in parallel across all CPUs
*/
- ret = btrfs_wq_submit_bio(fs_info, inode, bio, mirror_num, 0,
- bio_offset,
+ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
+ bio_offset, private_data,
__btree_submit_bio_start,
__btree_submit_bio_done);
}
@@ -1054,7 +1055,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
return 0;
out_w_error:
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
return ret;
}
@@ -1222,10 +1223,10 @@ int btrfs_write_tree_block(struct extent_buffer *buf)
buf->start + buf->len - 1);
}
-int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
+void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
{
- return filemap_fdatawait_range(buf->pages[0]->mapping,
- buf->start, buf->start + buf->len - 1);
+ filemap_fdatawait_range(buf->pages[0]->mapping,
+ buf->start, buf->start + buf->len - 1);
}
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1255,9 +1256,9 @@ void clean_tree_block(struct btrfs_fs_info *fs_info,
btrfs_assert_tree_locked(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
- __percpu_counter_add(&fs_info->dirty_metadata_bytes,
- -buf->len,
- fs_info->dirty_metadata_batch);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+ -buf->len,
+ fs_info->dirty_metadata_batch);
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(buf);
@@ -1347,8 +1348,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->log_transid_committed = -1;
root->last_log_commit = 0;
if (!dummy)
- extent_io_tree_init(&root->dirty_log_pages,
- fs_info->btree_inode->i_mapping);
+ extent_io_tree_init(&root->dirty_log_pages, NULL);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1820,7 +1820,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- bio->bi_error = end_io_wq->error;
+ bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -2309,7 +2309,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
inode->i_mapping->a_ops = &btree_aops;
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
- extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping);
+ extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode);
BTRFS_I(inode)->io_tree.track_uptodate = 0;
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
@@ -2626,7 +2626,6 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
- spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
@@ -2662,12 +2661,11 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
- fs_info->fs_frozen = 0;
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
- fs_info->free_chunk_space = 0;
+ atomic64_set(&fs_info->free_chunk_space, 0);
fs_info->tree_mod_log = RB_ROOT;
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
@@ -2704,10 +2702,8 @@ int open_ctree(struct super_block *sb,
fs_info->block_group_cache_tree = RB_ROOT;
fs_info->first_logical_byte = (u64)-1;
- extent_io_tree_init(&fs_info->freed_extents[0],
- fs_info->btree_inode->i_mapping);
- extent_io_tree_init(&fs_info->freed_extents[1],
- fs_info->btree_inode->i_mapping);
+ extent_io_tree_init(&fs_info->freed_extents[0], NULL);
+ extent_io_tree_init(&fs_info->freed_extents[1], NULL);
fs_info->pinned_extents = &fs_info->freed_extents[0];
set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
@@ -3485,65 +3481,61 @@ static int write_dev_supers(struct btrfs_device *device,
*/
static void btrfs_end_empty_barrier(struct bio *bio)
{
- if (bio->bi_private)
- complete(bio->bi_private);
- bio_put(bio);
+ complete(bio->bi_private);
}
/*
- * trigger flushes for one the devices. If you pass wait == 0, the flushes are
- * sent down. With wait == 1, it waits for the previous flush.
- *
- * any device where the flush fails with eopnotsupp are flagged as not-barrier
- * capable
+ * Submit a flush request to the device if it supports it. Error handling is
+ * done in the waiting counterpart.
*/
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static void write_dev_flush(struct btrfs_device *device)
{
struct request_queue *q = bdev_get_queue(device->bdev);
- struct bio *bio;
- int ret = 0;
+ struct bio *bio = device->flush_bio;
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
- return 0;
+ return;
- if (wait) {
- bio = device->flush_bio;
- if (!bio)
- return 0;
+ bio_reset(bio);
+ bio->bi_end_io = btrfs_end_empty_barrier;
+ bio->bi_bdev = device->bdev;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
+ init_completion(&device->flush_wait);
+ bio->bi_private = &device->flush_wait;
- wait_for_completion(&device->flush_wait);
+ submit_bio(bio);
+ device->flush_bio_sent = 1;
+}
- if (bio->bi_error) {
- ret = bio->bi_error;
- btrfs_dev_stat_inc_and_print(device,
- BTRFS_DEV_STAT_FLUSH_ERRS);
- }
+/*
+ * If the flush bio has been submitted by write_dev_flush, wait for it.
+ */
+static blk_status_t wait_dev_flush(struct btrfs_device *device)
+{
+ struct bio *bio = device->flush_bio;
- /* drop the reference from the wait == 0 run */
- bio_put(bio);
- device->flush_bio = NULL;
+ if (!device->flush_bio_sent)
+ return 0;
- return ret;
- }
+ device->flush_bio_sent = 0;
+ wait_for_completion_io(&device->flush_wait);
- /*
- * one reference for us, and we leave it for the
- * caller
- */
- device->flush_bio = NULL;
- bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
- if (!bio)
- return -ENOMEM;
+ return bio->bi_status;
+}
- bio->bi_end_io = btrfs_end_empty_barrier;
- bio->bi_bdev = device->bdev;
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
- init_completion(&device->flush_wait);
- bio->bi_private = &device->flush_wait;
- device->flush_bio = bio;
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
+{
+ int dev_flush_error = 0;
+ struct btrfs_device *dev;
+
+ list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+ if (!dev->bdev || dev->last_flush_error)
+ dev_flush_error++;
+ }
- bio_get(bio);
- btrfsic_submit_bio(bio);
+ if (dev_flush_error >
+ fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+ return -EIO;
return 0;
}
@@ -3556,25 +3548,21 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
{
struct list_head *head;
struct btrfs_device *dev;
- int errors_send = 0;
int errors_wait = 0;
- int ret;
+ blk_status_t ret;
/* send down all the barriers */
head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
if (dev->missing)
continue;
- if (!dev->bdev) {
- errors_send++;
+ if (!dev->bdev)
continue;
- }
if (!dev->in_fs_metadata || !dev->writeable)
continue;
- ret = write_dev_flush(dev, 0);
- if (ret)
- errors_send++;
+ write_dev_flush(dev);
+ dev->last_flush_error = 0;
}
/* wait for all the barriers */
@@ -3588,13 +3576,23 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
- ret = write_dev_flush(dev, 1);
- if (ret)
+ ret = wait_dev_flush(dev);
+ if (ret) {
+ dev->last_flush_error = ret;
+ btrfs_dev_stat_inc_and_print(dev,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
errors_wait++;
+ }
+ }
+
+ if (errors_wait) {
+ /*
+ * At some point we need the status of all disks
+ * to arrive at the volume status. So error checking
+ * is being pushed to a separate loop.
+ */
+ return check_barrier_error(info->fs_devices);
}
- if (errors_send > info->num_tolerated_disk_barrier_failures ||
- errors_wait > info->num_tolerated_disk_barrier_failures)
- return -EIO;
return 0;
}
@@ -4049,9 +4047,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->start, transid, fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty)
- __percpu_counter_add(&fs_info->dirty_metadata_bytes,
- buf->len,
- fs_info->dirty_metadata_batch);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+ buf->len,
+ fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
btrfs_print_leaf(fs_info, buf);
@@ -4578,11 +4576,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
-
- /*
- memset(cur_trans, 0, sizeof(*cur_trans));
- kmem_cache_free(btrfs_transaction_cachep, cur_trans);
- */
}
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
@@ -4638,6 +4631,12 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
return 0;
}
+static struct btrfs_fs_info *btree_fs_info(void *private_data)
+{
+ struct inode *inode = private_data;
+ return btrfs_sb(inode->i_sb);
+}
+
static const struct extent_io_ops btree_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btree_submit_bio_hook,
@@ -4645,6 +4644,8 @@ static const struct extent_io_ops btree_extent_io_ops = {
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btree_io_failed_hook,
+ .set_range_writeback = btrfs_set_range_writeback,
+ .tree_fs_info = btree_fs_info,
/* optional callbacks */
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..0a634d3ffc16 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,16 +118,16 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset,
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset, void *private_data,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
-int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
+void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 87144c9f9593..fa66980726c9 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -282,6 +282,11 @@ static int btrfs_get_name(struct dentry *parent, char *name,
name_len = btrfs_inode_ref_name_len(leaf, iref);
}
+ ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len);
+ if (!ret) {
+ btrfs_free_path(path);
+ return -EIO;
+ }
read_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_free_path(path);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 33d979e9ea2a..375f8c728d91 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -97,10 +97,11 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-static int __reserve_metadata_bytes(struct btrfs_root *root,
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush);
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk);
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
@@ -766,6 +767,26 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
return NULL;
}
+static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
+ u64 owner, u64 root_objectid)
+{
+ struct btrfs_space_info *space_info;
+ u64 flags;
+
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ } else {
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ }
+
+ space_info = __find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
+}
+
/*
* after adding space to the filesystem, we need to clear the full flags
* on all the space infos.
@@ -2092,6 +2113,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
{
+ int old_ref_mod, new_ref_mod;
int ret;
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
@@ -2099,15 +2121,21 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL);
+ num_bytes, parent,
+ root_objectid, (int)owner,
+ BTRFS_ADD_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes, parent, root_objectid,
- owner, offset, 0,
- BTRFS_ADD_DELAYED_REF);
+ num_bytes, parent,
+ root_objectid, owner, offset,
+ 0, BTRFS_ADD_DELAYED_REF,
+ &old_ref_mod, &new_ref_mod);
}
+
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
+ add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+
return ret;
}
@@ -2411,6 +2439,16 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
head = btrfs_delayed_node_to_head(node);
trace_run_delayed_ref_head(fs_info, node, head, node->action);
+ if (head->total_ref_mod < 0) {
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(fs_info, node->bytenr);
+ ASSERT(cache);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned,
+ -node->num_bytes);
+ btrfs_put_block_group(cache);
+ }
+
if (insert_reserved) {
btrfs_pin_extent(fs_info, node->bytenr,
node->num_bytes, 1);
@@ -3364,6 +3402,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root = fs_info->tree_root;
struct inode *inode = NULL;
+ struct extent_changeset *data_reserved = NULL;
u64 alloc_hint = 0;
int dcs = BTRFS_DC_ERROR;
u64 num_pages = 0;
@@ -3483,7 +3522,7 @@ again:
num_pages *= 16;
num_pages *= PAGE_SIZE;
- ret = btrfs_check_data_free_space(inode, 0, num_pages);
+ ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
if (ret)
goto out_put;
@@ -3514,6 +3553,7 @@ out:
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -3924,88 +3964,83 @@ static const char *alloc_name(u64 flags)
};
}
-static int update_space_info(struct btrfs_fs_info *info, u64 flags,
- u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
- struct btrfs_space_info **space_info)
+static int create_space_info(struct btrfs_fs_info *info, u64 flags,
+ struct btrfs_space_info **new)
{
- struct btrfs_space_info *found;
+
+ struct btrfs_space_info *space_info;
int i;
- int factor;
int ret;
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
-
- found = __find_space_info(info, flags);
- if (found) {
- spin_lock(&found->lock);
- found->total_bytes += total_bytes;
- found->disk_total += total_bytes * factor;
- found->bytes_used += bytes_used;
- found->disk_used += bytes_used * factor;
- found->bytes_readonly += bytes_readonly;
- if (total_bytes > 0)
- found->full = 0;
- space_info_add_new_bytes(info, found, total_bytes -
- bytes_used - bytes_readonly);
- spin_unlock(&found->lock);
- *space_info = found;
- return 0;
- }
- found = kzalloc(sizeof(*found), GFP_NOFS);
- if (!found)
+ space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
+ if (!space_info)
return -ENOMEM;
- ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
+ ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
+ GFP_KERNEL);
if (ret) {
- kfree(found);
+ kfree(space_info);
return ret;
}
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- INIT_LIST_HEAD(&found->block_groups[i]);
- init_rwsem(&found->groups_sem);
- spin_lock_init(&found->lock);
- found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
- found->total_bytes = total_bytes;
- found->disk_total = total_bytes * factor;
- found->bytes_used = bytes_used;
- found->disk_used = bytes_used * factor;
- found->bytes_pinned = 0;
- found->bytes_reserved = 0;
- found->bytes_readonly = bytes_readonly;
- found->bytes_may_use = 0;
- found->full = 0;
- found->max_extent_size = 0;
- found->force_alloc = CHUNK_ALLOC_NO_FORCE;
- found->chunk_alloc = 0;
- found->flush = 0;
- init_waitqueue_head(&found->wait);
- INIT_LIST_HEAD(&found->ro_bgs);
- INIT_LIST_HEAD(&found->tickets);
- INIT_LIST_HEAD(&found->priority_tickets);
-
- ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
+ INIT_LIST_HEAD(&space_info->block_groups[i]);
+ init_rwsem(&space_info->groups_sem);
+ spin_lock_init(&space_info->lock);
+ space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+ init_waitqueue_head(&space_info->wait);
+ INIT_LIST_HEAD(&space_info->ro_bgs);
+ INIT_LIST_HEAD(&space_info->tickets);
+ INIT_LIST_HEAD(&space_info->priority_tickets);
+
+ ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
info->space_info_kobj, "%s",
- alloc_name(found->flags));
+ alloc_name(space_info->flags));
if (ret) {
- percpu_counter_destroy(&found->total_bytes_pinned);
- kfree(found);
+ percpu_counter_destroy(&space_info->total_bytes_pinned);
+ kfree(space_info);
return ret;
}
- *space_info = found;
- list_add_rcu(&found->list, &info->space_info);
+ *new = space_info;
+ list_add_rcu(&space_info->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
- info->data_sinfo = found;
+ info->data_sinfo = space_info;
return ret;
}
+static void update_space_info(struct btrfs_fs_info *info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ u64 bytes_readonly,
+ struct btrfs_space_info **space_info)
+{
+ struct btrfs_space_info *found;
+ int factor;
+
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
+
+ found = __find_space_info(info, flags);
+ ASSERT(found);
+ spin_lock(&found->lock);
+ found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
+ found->bytes_used += bytes_used;
+ found->disk_used += bytes_used * factor;
+ found->bytes_readonly += bytes_readonly;
+ if (total_bytes > 0)
+ found->full = 0;
+ space_info_add_new_bytes(info, found, total_bytes -
+ bytes_used - bytes_readonly);
+ spin_unlock(&found->lock);
+ *space_info = found;
+}
+
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = chunk_to_extended(flags) &
@@ -4121,7 +4156,7 @@ static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
return btrfs_reduce_alloc_profile(fs_info, flags);
}
-u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
+static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 flags;
@@ -4138,6 +4173,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return ret;
}
+u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
+}
+
+u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+}
+
+u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+}
+
static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
bool may_use_included)
{
@@ -4187,7 +4237,7 @@ again:
data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&data_sinfo->lock);
alloc:
- alloc_target = btrfs_get_alloc_profile(root, 1);
+ alloc_target = btrfs_data_alloc_profile(fs_info);
/*
* It is ugly that we don't call nolock join
* transaction for the free space inode case here.
@@ -4238,7 +4288,7 @@ commit_trans:
if (need_commit > 0) {
btrfs_start_delalloc_roots(fs_info, 0, -1);
- btrfs_wait_ordered_roots(fs_info, -1, 0,
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
(u64)-1);
}
@@ -4278,12 +4328,8 @@ commit_trans:
return ret;
}
-/*
- * New check_data_free_space() with ability for precious data reservation
- * Will replace old btrfs_check_data_free_space(), but for patch split,
- * add a new function first and then replace it.
- */
-int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
+int btrfs_check_data_free_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int ret;
@@ -4298,9 +4344,11 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
return ret;
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
- ret = btrfs_qgroup_reserve_data(inode, start, len);
- if (ret)
+ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
+ if (ret < 0)
btrfs_free_reserved_data_space_noquota(inode, start, len);
+ else
+ ret = 0;
return ret;
}
@@ -4341,7 +4389,8 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
* This one will handle the per-inode data rsv map for accurate reserved
* space framework.
*/
-void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
+void btrfs_free_reserved_data_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4351,7 +4400,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
start = round_down(start, root->fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(inode, start, len);
- btrfs_qgroup_free_data(inode, start, len);
+ btrfs_qgroup_free_data(inode, reserved, start, len);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
@@ -4463,9 +4512,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
}
if (left < thresh) {
- u64 flags;
+ u64 flags = btrfs_system_alloc_profile(fs_info);
- flags = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
/*
* Ignore failure to create system chunk. We might end up not
* needing it, as we might not need to COW all nodes/leafs from
@@ -4506,10 +4554,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(fs_info, flags);
if (!space_info) {
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
- BUG_ON(ret); /* -ENOMEM */
+ ret = create_space_info(fs_info, flags, &space_info);
+ if (ret)
+ return ret;
}
- BUG_ON(!space_info); /* Logic error */
again:
spin_lock(&space_info->lock);
@@ -4614,11 +4662,11 @@ out:
return ret;
}
-static int can_overcommit(struct btrfs_root *root,
+static int can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush)
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 profile;
u64 space_size;
@@ -4629,7 +4677,11 @@ static int can_overcommit(struct btrfs_root *root,
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
return 0;
- profile = btrfs_get_alloc_profile(root, 0);
+ if (system_chunk)
+ profile = btrfs_system_alloc_profile(fs_info);
+ else
+ profile = btrfs_metadata_alloc_profile(fs_info);
+
used = btrfs_space_info_used(space_info, false);
/*
@@ -4646,9 +4698,7 @@ static int can_overcommit(struct btrfs_root *root,
used += space_info->bytes_may_use;
- spin_lock(&fs_info->free_chunk_lock);
- avail = fs_info->free_chunk_space;
- spin_unlock(&fs_info->free_chunk_lock);
+ avail = atomic64_read(&fs_info->free_chunk_space);
/*
* If we have dup, raid1 or raid10 then only half of the free
@@ -4698,14 +4748,14 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
}
}
-static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
+static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
u64 to_reclaim)
{
u64 bytes;
- int nr;
+ u64 nr;
bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- nr = (int)div64_u64(to_reclaim, bytes);
+ nr = div64_u64(to_reclaim, bytes);
if (!nr)
nr = 1;
return nr;
@@ -4716,24 +4766,23 @@ static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
/*
* shrink metadata reservation for delalloc
*/
-static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
- bool wait_ordered)
+static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
+ u64 orig, bool wait_ordered)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
u64 max_reclaim;
+ u64 items;
long time_left;
unsigned long nr_pages;
int loops;
- int items;
enum btrfs_reserve_flush_enum flush;
/* Calc the number of the pages we need flush for space reservation */
items = calc_reclaim_items_nr(fs_info, to_reclaim);
- to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &fs_info->delalloc_block_rsv;
@@ -4776,7 +4825,7 @@ skip_async:
else
flush = BTRFS_RESERVE_NO_FLUSH;
spin_lock(&space_info->lock);
- if (can_overcommit(root, space_info, orig, flush)) {
+ if (can_overcommit(fs_info, space_info, orig, flush, false)) {
spin_unlock(&space_info->lock);
break;
}
@@ -4838,7 +4887,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_rsv->lock);
if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes - delayed_rsv->size) >= 0) {
+ bytes - delayed_rsv->size) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
@@ -4886,7 +4935,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT:
- shrink_delalloc(root, num_bytes * 2, orig_bytes,
+ shrink_delalloc(fs_info, num_bytes * 2, orig_bytes,
state == FLUSH_DELALLOC_WAIT);
break;
case ALLOC_CHUNK:
@@ -4896,7 +4945,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break;
}
ret = do_chunk_alloc(trans, fs_info,
- btrfs_get_alloc_profile(root, 0),
+ btrfs_metadata_alloc_profile(fs_info),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
if (ret > 0 || ret == -ENOSPC)
@@ -4917,8 +4966,9 @@ static int flush_space(struct btrfs_fs_info *fs_info,
}
static inline u64
-btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
- struct btrfs_space_info *space_info)
+btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ bool system_chunk)
{
struct reserve_ticket *ticket;
u64 used;
@@ -4933,14 +4983,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- if (can_overcommit(root, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL))
+ if (can_overcommit(fs_info, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
return 0;
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
- if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
+ used = btrfs_space_info_used(space_info, true);
+
+ if (can_overcommit(fs_info, space_info, SZ_1M,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
expected = div_factor_fine(space_info->total_bytes, 95);
else
expected = div_factor_fine(space_info->total_bytes, 90);
@@ -4954,17 +5004,18 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim;
}
-static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
- struct btrfs_root *root, u64 used)
+static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 used, bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
- if (!btrfs_calc_reclaim_metadata_size(root, space_info))
+ if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ system_chunk))
return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@@ -5001,8 +5052,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
if (!to_reclaim) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
@@ -5024,8 +5075,9 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
spin_unlock(&space_info->lock);
return;
}
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
+ space_info,
+ false);
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (last_tickets_id == space_info->tickets_id) {
@@ -5063,8 +5115,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
return;
@@ -5143,12 +5195,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-static int __reserve_metadata_bytes(struct btrfs_root *root,
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct reserve_ticket ticket;
u64 used;
int ret = 0;
@@ -5170,7 +5222,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
ret = 0;
- } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
+ } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
+ system_chunk)) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
@@ -5197,7 +5250,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
orig_bytes, flush,
"enospc");
queue_work(system_unbound_wq,
- &root->fs_info->async_reclaim_work);
+ &fs_info->async_reclaim_work);
}
} else {
list_add_tail(&ticket.list,
@@ -5211,7 +5264,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
* the async reclaim as we will panic.
*/
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
- need_do_async_reclaim(space_info, root, used) &&
+ need_do_async_reclaim(fs_info, space_info,
+ used, system_chunk) &&
!work_busy(&fs_info->async_reclaim_work)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt");
@@ -5269,9 +5323,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
int ret;
+ bool system_chunk = (root == fs_info->chunk_root);
- ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
- flush);
+ ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
+ orig_bytes, flush, system_chunk);
if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
if (block_rsv != global_rsv &&
@@ -5380,9 +5435,7 @@ static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
* overcommit, and if we can't then we just need to free up our space
* and not satisfy any requests.
*/
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = btrfs_space_info_used(space_info, true);
if (used - num_bytes >= space_info->total_bytes)
check_overcommit = true;
again:
@@ -5394,8 +5447,7 @@ again:
* adding the ticket space would be a double count.
*/
if (check_overcommit &&
- !can_overcommit(fs_info->extent_root, space_info, 0,
- flush))
+ !can_overcommit(fs_info, space_info, 0, flush, false))
break;
if (num_bytes >= ticket->bytes) {
list_del_init(&ticket->list);
@@ -6124,6 +6176,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* @inode: inode we're writing to
* @start: start range we are writing to
* @len: how long the range we are writing to
+ * @reserved: mandatory parameter, record actually reserved qgroup ranges of
+ * current reservation.
*
* This will do the following things
*
@@ -6141,16 +6195,17 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* Return 0 for success
* Return <0 for error(-ENOSPC or -EQUOT)
*/
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
+int btrfs_delalloc_reserve_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
{
int ret;
- ret = btrfs_check_data_free_space(inode, start, len);
+ ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
if (ret < 0)
- btrfs_free_reserved_data_space(inode, start, len);
+ btrfs_free_reserved_data_space(inode, *reserved, start, len);
return ret;
}
@@ -6169,10 +6224,11 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
* list if there are no delalloc bytes left.
* Also it will handle the qgroup reserved space.
*/
-void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
+void btrfs_delalloc_release_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
- btrfs_free_reserved_data_space(inode, start, len);
+ btrfs_free_reserved_data_space(inode, reserved, start, len);
}
static int update_block_group(struct btrfs_trans_handle *trans,
@@ -6248,6 +6304,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(info, "pinned",
cache->space_info->flags,
num_bytes, 1);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned,
+ num_bytes);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -6324,6 +6382,7 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
set_extent_dirty(fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
@@ -6794,27 +6853,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
return 0;
}
-static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
- u64 owner, u64 root_objectid)
-{
- struct btrfs_space_info *space_info;
- u64 flags;
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- } else {
- flags = BTRFS_BLOCK_GROUP_DATA;
- }
-
- space_info = __find_space_info(fs_info, flags);
- BUG_ON(!space_info); /* Logic bug */
- percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
-}
-
-
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *info,
struct btrfs_delayed_ref_node *node, u64 parent,
@@ -7037,8 +7075,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
}
- add_pinned_bytes(info, -num_bytes, owner_objectid,
- root_objectid);
} else {
if (found_extent) {
BUG_ON(is_data && refs_to_drop !=
@@ -7170,19 +7206,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans,
- buf->start, buf->len,
- parent,
+ int old_ref_mod, new_ref_mod;
+
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
+ buf->len, parent,
root->root_key.objectid,
btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL);
+ BTRFS_DROP_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
BUG_ON(ret); /* -ENOMEM */
+ pin = old_ref_mod >= 0 && new_ref_mod < 0;
}
- if (!last_ref)
- return;
-
- if (btrfs_header_generation(buf) == trans->transid) {
+ if (last_ref && btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group_cache *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -7191,6 +7227,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
goto out;
}
+ pin = 0;
cache = btrfs_lookup_block_group(fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
@@ -7206,18 +7243,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_free_reserved_bytes(cache, buf->len, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
- pin = 0;
}
out:
if (pin)
add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
root->root_key.objectid);
- /*
- * Deleting the buffer, clear the corrupt flag since it doesn't matter
- * anymore.
- */
- clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
+ if (last_ref) {
+ /*
+ * Deleting the buffer, clear the corrupt flag since it doesn't
+ * matter anymore.
+ */
+ clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
+ }
}
/* Can return -ENOMEM */
@@ -7226,12 +7264,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset)
{
+ int old_ref_mod, new_ref_mod;
int ret;
if (btrfs_is_testing(fs_info))
return 0;
- add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
/*
* tree log blocks never actually go into the extent allocation
@@ -7241,19 +7279,25 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
/* unlocks the pinned mutex */
btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
+ old_ref_mod = new_ref_mod = 0;
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL);
+ num_bytes, parent,
+ root_objectid, (int)owner,
+ BTRFS_DROP_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, owner,
- offset, 0,
- BTRFS_DROP_DELAYED_REF);
+ num_bytes, parent,
+ root_objectid, owner, offset,
+ 0, BTRFS_DROP_DELAYED_REF,
+ &old_ref_mod, &new_ref_mod);
}
+
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
+ add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+
return ret;
}
@@ -7956,7 +8000,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 flags;
int ret;
- flags = btrfs_get_alloc_profile(root, is_data);
+ flags = get_alloc_profile_by_root(root, is_data);
again:
WARN_ON(num_bytes < fs_info->sectorsize);
ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
@@ -8200,9 +8244,9 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
- ins->offset, 0,
- root_objectid, owner, offset,
- ram_bytes, BTRFS_ADD_DELAYED_EXTENT);
+ ins->offset, 0, root_objectid, owner,
+ offset, ram_bytes,
+ BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
return ret;
}
@@ -8422,11 +8466,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->is_data = false;
extent_op->level = level;
- ret = btrfs_add_delayed_tree_ref(fs_info, trans,
- ins.objectid, ins.offset,
- parent, root_objectid, level,
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
+ ins.offset, parent,
+ root_objectid, level,
BTRFS_ADD_DELAYED_EXTENT,
- extent_op);
+ extent_op, NULL, NULL);
if (ret)
goto out_free_delayed;
}
@@ -10059,19 +10103,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
trace_btrfs_add_block_group(info, cache, 0);
- ret = update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- cache->bytes_super, &space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- spin_lock(&info->block_group_cache_lock);
- rb_erase(&cache->cache_node,
- &info->block_group_cache_tree);
- RB_CLEAR_NODE(&cache->cache_node);
- spin_unlock(&info->block_group_cache_lock);
- btrfs_put_block_group(cache);
- goto error;
- }
+ update_space_info(info, cache->flags, found_key.offset,
+ btrfs_block_group_used(&cache->item),
+ cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -10203,16 +10237,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
}
#endif
/*
- * Call to ensure the corresponding space_info object is created and
- * assigned to our block group, but don't update its counters just yet.
- * We want our bg to be added to the rbtree with its ->space_info set.
+ * Ensure the corresponding space_info object is created and
+ * assigned to our block group. We want our bg to be added to the rbtree
+ * with its ->space_info set.
*/
- ret = update_space_info(fs_info, cache->flags, 0, 0, 0,
- &cache->space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- btrfs_put_block_group(cache);
- return ret;
+ cache->space_info = __find_space_info(fs_info, cache->flags);
+ if (!cache->space_info) {
+ ret = create_space_info(fs_info, cache->flags,
+ &cache->space_info);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
}
ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10227,18 +10264,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
- ret = update_space_info(fs_info, cache->flags, size, bytes_used,
+ update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, &cache->space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- spin_lock(&fs_info->block_group_cache_lock);
- rb_erase(&cache->cache_node,
- &fs_info->block_group_cache_tree);
- RB_CLEAR_NODE(&cache->cache_node);
- spin_unlock(&fs_info->block_group_cache_lock);
- btrfs_put_block_group(cache);
- return ret;
- }
update_global_block_rsv(fs_info);
__link_block_group(cache->space_info, cache);
@@ -10786,21 +10813,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
}
out:
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d3619e010005..556484cf5d93 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -87,19 +87,9 @@ void btrfs_leak_debug_check(void)
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct extent_io_tree *tree, u64 start, u64 end)
{
- struct inode *inode;
- u64 isize;
-
- if (!tree->mapping)
- return;
-
- inode = tree->mapping->host;
- isize = i_size_read(inode);
- if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
- btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
- "%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
- }
+ if (tree->ops && tree->ops->check_extent_io_range)
+ tree->ops->check_extent_io_range(tree->private_data, caller,
+ start, end);
}
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
@@ -154,9 +144,9 @@ static noinline void flush_write_bio(void *data);
static inline struct btrfs_fs_info *
tree_fs_info(struct extent_io_tree *tree)
{
- if (!tree->mapping)
- return NULL;
- return btrfs_sb(tree->mapping->host->i_sb);
+ if (tree->ops)
+ return tree->ops->tree_fs_info(tree->private_data);
+ return NULL;
}
int __init extent_io_init(void)
@@ -174,7 +164,8 @@ int __init extent_io_init(void)
goto free_state_cache;
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio));
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS);
if (!btrfs_bioset)
goto free_buffer_cache;
@@ -213,13 +204,13 @@ void extent_io_exit(void)
}
void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping)
+ void *private_data)
{
tree->state = RB_ROOT;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
- tree->mapping = mapping;
+ tree->private_data = private_data;
}
static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -369,8 +360,7 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
if (tree->ops && tree->ops->merge_extent_hook)
- tree->ops->merge_extent_hook(tree->mapping->host, new,
- other);
+ tree->ops->merge_extent_hook(tree->private_data, new, other);
}
/*
@@ -421,15 +411,14 @@ static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->set_bit_hook)
- tree->ops->set_bit_hook(tree->mapping->host, state, bits);
+ tree->ops->set_bit_hook(tree->private_data, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
- tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host),
- state, bits);
+ tree->ops->clear_bit_hook(tree->private_data, state, bits);
}
static void set_state_bits(struct extent_io_tree *tree,
@@ -478,7 +467,7 @@ static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
u64 split)
{
if (tree->ops && tree->ops->split_extent_hook)
- tree->ops->split_extent_hook(tree->mapping->host, orig, split);
+ tree->ops->split_extent_hook(tree->private_data, orig, split);
}
/*
@@ -1402,17 +1391,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
*/
static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page); /* Pages should be in the extent_io_tree */
- set_page_writeback(page);
- put_page(page);
- index++;
- }
+ tree->ops->set_range_writeback(tree->private_data, start, end);
}
/* find the first state struct with 'bits' set after 'start', and
@@ -1961,11 +1940,12 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
SetPageUptodate(page);
}
-int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree,
+ struct io_failure_record *rec)
{
int ret;
int err = 0;
- struct extent_io_tree *failure_tree = &inode->io_failure_tree;
set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
@@ -1974,7 +1954,7 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
if (ret)
err = ret;
- ret = clear_extent_bits(&inode->io_tree, rec->start,
+ ret = clear_extent_bits(io_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_DAMAGED);
if (ret && !err)
@@ -1994,11 +1974,10 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
-int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
- u64 logical, struct page *page,
- unsigned int pg_offset, int mirror_num)
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ u64 length, u64 logical, struct page *page,
+ unsigned int pg_offset, int mirror_num)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio;
struct btrfs_device *dev;
u64 map_length = 0;
@@ -2009,9 +1988,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
+ bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
map_length = length;
@@ -2070,7 +2047,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- btrfs_ino(inode), start,
+ ino, start,
rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2090,8 +2067,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
- ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start,
- PAGE_SIZE, start, p,
+ ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
start - page_offset(p), mirror_num);
if (ret)
break;
@@ -2105,24 +2081,24 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
*/
-int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
- unsigned int pg_offset)
+int clean_io_failure(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree, u64 start,
+ struct page *page, u64 ino, unsigned int pg_offset)
{
u64 private;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *state;
int num_copies;
int ret;
private = 0;
- ret = count_range_bits(&inode->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY, 0);
+ ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
+ EXTENT_DIRTY, 0);
if (!ret)
return 0;
- ret = get_state_failrec(&inode->io_failure_tree, start,
- &failrec);
+ ret = get_state_failrec(failure_tree, start, &failrec);
if (ret)
return 0;
@@ -2138,25 +2114,25 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
if (fs_info->sb->s_flags & MS_RDONLY)
goto out;
- spin_lock(&inode->io_tree.lock);
- state = find_first_extent_bit_state(&inode->io_tree,
+ spin_lock(&io_tree->lock);
+ state = find_first_extent_bit_state(io_tree,
failrec->start,
EXTENT_LOCKED);
- spin_unlock(&inode->io_tree.lock);
+ spin_unlock(&io_tree->lock);
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
- repair_io_failure(inode, start, failrec->len,
- failrec->logical, page,
- pg_offset, failrec->failed_mirror);
+ repair_io_failure(fs_info, ino, start, failrec->len,
+ failrec->logical, page, pg_offset,
+ failrec->failed_mirror);
}
}
out:
- free_io_failure(inode, failrec);
+ free_io_failure(failure_tree, io_tree, failrec);
return 0;
}
@@ -2356,10 +2332,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct btrfs_io_bio *btrfs_failed_bio;
struct btrfs_io_bio *btrfs_bio;
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return NULL;
-
+ bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
bio->bi_bdev = fs_info->fs_devices->latest_bdev;
@@ -2397,8 +2370,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct io_failure_record *failrec;
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *bio;
int read_mode = 0;
+ blk_status_t status;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2409,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
if (!ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
@@ -2422,7 +2397,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
(int)phy_offset, failed_bio->bi_end_io,
NULL);
if (!bio) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -2431,11 +2406,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+ status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
failrec->bio_flags, 0);
- if (ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ if (status) {
+ free_io_failure(failure_tree, tree, failrec);
bio_put(bio);
+ ret = blk_status_to_errno(status);
}
return ret;
@@ -2474,6 +2450,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio)
{
+ int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
u64 start;
u64 end;
@@ -2503,7 +2480,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- end_extent_writepage(page, bio->bi_error, start, end);
+ end_extent_writepage(page, error, start, end);
end_page_writeback(page);
}
@@ -2536,9 +2513,9 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- struct extent_io_tree *tree;
+ struct extent_io_tree *tree, *failure_tree;
u64 offset = 0;
u64 start;
u64 end;
@@ -2556,9 +2533,10 @@ static void end_bio_extent_readpage(struct bio *bio)
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
- (u64)bio->bi_iter.bi_sector, bio->bi_error,
+ (u64)bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
+ failure_tree = &BTRFS_I(inode)->io_failure_tree;
/* We always issue full-page reads, but if some block
* in a page fails to read, blk_update_request() will
@@ -2588,8 +2566,10 @@ static void end_bio_extent_readpage(struct bio *bio)
if (ret)
uptodate = 0;
else
- clean_io_failure(BTRFS_I(inode), start,
- page, 0);
+ clean_io_failure(BTRFS_I(inode)->root->fs_info,
+ failure_tree, tree, start,
+ page,
+ btrfs_ino(BTRFS_I(inode)), 0);
}
if (likely(uptodate))
@@ -2615,7 +2595,7 @@ static void end_bio_extent_readpage(struct bio *bio)
ret = bio_readpage_error(bio, offset, page,
start, end, mirror);
if (ret == 0) {
- uptodate = !bio->bi_error;
+ uptodate = !bio->bi_status;
offset += len;
continue;
}
@@ -2673,77 +2653,80 @@ readpage_ok:
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, bio->bi_error);
+ io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
/*
- * this allocates from the btrfs_bioset. We're returning a bio right now
- * but you can call btrfs_io_bio for the appropriate container_of magic
+ * Initialize the members up to but not including 'bio'. Use after allocating a
+ * new bio by bio_alloc_bioset as it does not initialize the bytes outside of
+ * 'bio' because use of __GFP_ZERO is not supported.
*/
-struct bio *
-btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
+static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
{
- struct btrfs_io_bio *btrfs_bio;
- struct bio *bio;
-
- bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
+ memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
+}
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2)) {
- bio = bio_alloc_bioset(gfp_flags,
- nr_vecs, btrfs_bioset);
- }
- }
+/*
+ * The following helpers allocate a bio. As it's backed by a bioset, it'll
+ * never fail. We're returning a bio right now but you can call btrfs_io_bio
+ * for the appropriate container_of magic
+ */
+struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+{
+ struct bio *bio;
- if (bio) {
- bio->bi_bdev = bdev;
- bio->bi_iter.bi_sector = first_sector;
- btrfs_bio = btrfs_io_bio(bio);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = first_byte >> 9;
+ btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
-struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+struct bio *btrfs_bio_clone(struct bio *bio)
{
struct btrfs_io_bio *btrfs_bio;
struct bio *new;
- new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
- if (new) {
- btrfs_bio = btrfs_io_bio(new);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ /* Bio allocation backed by a bioset does not fail */
+ new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+ btrfs_bio = btrfs_io_bio(new);
+ btrfs_io_bio_init(btrfs_bio);
+ btrfs_bio->iter = bio->bi_iter;
return new;
}
-/* this also allocates from the btrfs_bioset */
-struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
{
- struct btrfs_io_bio *btrfs_bio;
struct bio *bio;
- bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
- if (bio) {
- btrfs_bio = btrfs_io_bio(bio);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ /* Bio allocation backed by a bioset does not fail */
+ bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+ btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
+struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
+{
+ struct bio *bio;
+ struct btrfs_io_bio *btrfs_bio;
+
+ /* this will never fail when it's backed by a bioset */
+ bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+ ASSERT(bio);
+
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_io_bio_init(btrfs_bio);
+
+ bio_trim(bio, offset >> 9, size >> 9);
+ btrfs_bio->iter = bio->bi_iter;
+ return bio;
+}
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
- int ret = 0;
+ blk_status_t ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
@@ -2755,13 +2738,13 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
bio_get(bio);
if (tree->ops)
- ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
+ ret = tree->ops->submit_bio_hook(tree->private_data, bio,
mirror_num, bio_flags, start);
else
btrfsic_submit_bio(bio);
bio_put(bio);
- return ret;
+ return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2818,14 +2801,11 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
- GFP_NOFS | __GFP_HIGH);
- if (!bio)
- return -ENOMEM;
-
+ bio = btrfs_bio_alloc(bdev, sector << 9);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio->bi_write_hint = page->mapping->host->i_write_hint;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
@@ -3597,9 +3577,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
- __percpu_counter_add(&fs_info->dirty_metadata_bytes,
- -eb->len,
- fs_info->dirty_metadata_batch);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+ -eb->len,
+ fs_info->dirty_metadata_batch);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
@@ -3707,7 +3687,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (bio->bi_error ||
+ if (bio->bi_status ||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
@@ -3757,7 +3737,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
* header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
*/
start = btrfs_item_nr_offset(nritems);
- end = btrfs_leaf_data(eb) + leaf_data_end(fs_info, eb);
+ end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
memzero_extent_buffer(eb, start, end - start);
}
@@ -4463,29 +4443,25 @@ try_submit_last:
}
/*
- * Sanity check for fiemap cache
+ * Emit last fiemap cache
*
- * All fiemap cache should be submitted by emit_fiemap_extent()
- * Iteration should be terminated either by last fiemap extent or
- * fieinfo->fi_extents_max.
- * So no cached fiemap should exist.
+ * The last fiemap cache may still be cached in the following case:
+ * 0 4k 8k
+ * |<- Fiemap range ->|
+ * |<------------ First extent ----------->|
+ *
+ * In this case, the first extent range will be cached but not emitted.
+ * So we must emit it before ending extent_fiemap().
*/
-static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
- struct fiemap_extent_info *fieinfo,
- struct fiemap_cache *cache)
+static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
+ struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
{
int ret;
if (!cache->cached)
return 0;
- /* Small and recoverbale problem, only to info developer */
-#ifdef CONFIG_BTRFS_DEBUG
- WARN_ON(1);
-#endif
- btrfs_warn(fs_info,
- "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
- cache->offset, cache->phys, cache->len, cache->flags);
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
cache->len, cache->flags);
cache->cached = false;
@@ -4701,7 +4677,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
out_free:
if (!ret)
- ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
+ ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..3fb8513bf02e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,7 +92,7 @@ struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset);
struct extent_io_ops {
@@ -108,32 +108,36 @@ struct extent_io_ops {
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
+ struct btrfs_fs_info *(*tree_fs_info)(void *private_data);
+ void (*set_range_writeback)(void *private_data, u64 start, u64 end);
/*
* Optional hooks, called if the pointer is not NULL
*/
- int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
+ int (*fill_delalloc)(void *private_data, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
- void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+ void (*set_bit_hook)(void *private_data, struct extent_state *state,
unsigned *bits);
- void (*clear_bit_hook)(struct btrfs_inode *inode,
+ void (*clear_bit_hook)(void *private_data,
struct extent_state *state,
unsigned *bits);
- void (*merge_extent_hook)(struct inode *inode,
+ void (*merge_extent_hook)(void *private_data,
struct extent_state *new,
struct extent_state *other);
- void (*split_extent_hook)(struct inode *inode,
+ void (*split_extent_hook)(void *private_data,
struct extent_state *orig, u64 split);
+ void (*check_extent_io_range)(void *private_data, const char *caller,
+ u64 start, u64 end);
};
struct extent_io_tree {
struct rb_root state;
- struct address_space *mapping;
+ void *private_data;
u64 dirty_bytes;
int track_uptodate;
spinlock_t lock;
@@ -205,12 +209,46 @@ struct extent_buffer {
*/
struct extent_changeset {
/* How many bytes are set/cleared in this operation */
- u64 bytes_changed;
+ unsigned int bytes_changed;
/* Changed ranges */
struct ulist range_changed;
};
+static inline void extent_changeset_init(struct extent_changeset *changeset)
+{
+ changeset->bytes_changed = 0;
+ ulist_init(&changeset->range_changed);
+}
+
+static inline struct extent_changeset *extent_changeset_alloc(void)
+{
+ struct extent_changeset *ret;
+
+ ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ extent_changeset_init(ret);
+ return ret;
+}
+
+static inline void extent_changeset_release(struct extent_changeset *changeset)
+{
+ if (!changeset)
+ return;
+ changeset->bytes_changed = 0;
+ ulist_release(&changeset->range_changed);
+}
+
+static inline void extent_changeset_free(struct extent_changeset *changeset)
+{
+ if (!changeset)
+ return;
+ extent_changeset_release(changeset);
+ kfree(changeset);
+}
+
static inline void extent_set_compress_type(unsigned long *bio_flags,
int compress_type)
{
@@ -230,8 +268,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
u64 start, u64 len,
int create);
-void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping);
+void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
@@ -459,20 +496,21 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
u64 delalloc_end, struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
-struct bio *
-btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags);
-struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
-struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
+struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte);
+struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio);
+struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
struct btrfs_fs_info;
struct btrfs_inode;
-int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
- u64 logical, struct page *page,
- unsigned int pg_offset, int mirror_num);
-int clean_io_failure(struct btrfs_inode *inode, u64 start,
- struct page *page, unsigned int pg_offset);
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ u64 length, u64 logical, struct page *page,
+ unsigned int pg_offset, int mirror_num);
+int clean_io_failure(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree, u64 start,
+ struct page *page, u64 ino, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int mirror_num);
@@ -507,7 +545,9 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
-int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec);
+int free_io_failure(struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree,
+ struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..fdcb41002623 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,11 +160,12 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
kfree(bio->csum_allocated);
}
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -177,12 +178,12 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 page_bytes_left;
u32 diff;
int nblocks;
- int count = 0, i;
+ int count = 0;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
@@ -191,7 +192,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
csum_size, GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
btrfs_bio->csum = btrfs_bio->csum_allocated;
btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -206,8 +207,6 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
path->reada = READA_FORWARD;
- WARN_ON(bio->bi_vcnt <= 0);
-
/*
* the free space stuff is only read when it hasn't been
* updated in the current transaction. So, we can safely
@@ -223,13 +222,13 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
if (dio)
offset = logical_offset;
- bio_for_each_segment_all(bvec, bio, i) {
- page_bytes_left = bvec->bv_len;
+ bio_for_each_segment(bvec, bio, iter) {
+ page_bytes_left = bvec.bv_len;
if (count)
goto next;
if (!dio)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = page_offset(bvec.bv_page) + bvec.bv_offset;
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
(u32 *)csum, nblocks);
if (count)
@@ -303,12 +302,12 @@ next:
return 0;
}
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
}
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
{
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
}
@@ -433,26 +432,26 @@ fail:
return ret;
}
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
char *data;
- struct bio_vec *bvec;
+ struct bvec_iter iter;
+ struct bio_vec bvec;
int index;
int nr_sectors;
- int i, j;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
+ int i;
u64 offset;
- WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
@@ -465,19 +464,19 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
index = 0;
- bio_for_each_segment_all(bvec, bio, j) {
+ bio_for_each_segment(bvec, bio, iter) {
if (!contig)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = page_offset(bvec.bv_page) + bvec.bv_offset;
if (!ordered) {
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
}
- data = kmap_atomic(bvec->bv_page);
+ data = kmap_atomic(bvec.bv_page);
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
- bvec->bv_len + fs_info->sectorsize
+ bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < nr_sectors; i++) {
@@ -504,12 +503,12 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+ total_bytes;
index = 0;
- data = kmap_atomic(bvec->bv_page);
+ data = kmap_atomic(bvec.bv_page);
}
sums->sums[index] = ~(u32)0;
sums->sums[index]
- = btrfs_csum_data(data + bvec->bv_offset
+ = btrfs_csum_data(data + bvec.bv_offset
+ (i * fs_info->sectorsize),
sums->sums[index],
fs_info->sectorsize);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096eb1a40..9e75d8a39aac 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1581,6 +1581,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
u64 lockstart;
u64 lockend;
@@ -1628,7 +1629,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes = round_up(write_bytes + sector_offset,
fs_info->sectorsize);
- ret = btrfs_check_data_free_space(inode, pos, write_bytes);
+ extent_changeset_release(data_reserved);
+ ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
+ write_bytes);
if (ret < 0) {
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
@@ -1657,8 +1660,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes);
if (ret) {
if (!only_release_metadata)
- btrfs_free_reserved_data_space(inode, pos,
- write_bytes);
+ btrfs_free_reserved_data_space(inode,
+ data_reserved, pos,
+ write_bytes);
else
btrfs_end_write_no_snapshoting(root);
break;
@@ -1740,8 +1744,9 @@ again:
__pos = round_down(pos,
fs_info->sectorsize) +
(dirty_pages << PAGE_SHIFT);
- btrfs_delalloc_release_space(inode, __pos,
- release_bytes);
+ btrfs_delalloc_release_space(inode,
+ data_reserved, __pos,
+ release_bytes);
}
}
@@ -1796,12 +1801,13 @@ again:
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
} else {
- btrfs_delalloc_release_space(inode,
- round_down(pos, fs_info->sectorsize),
- release_bytes);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ round_down(pos, fs_info->sectorsize),
+ release_bytes);
}
}
+ extent_changeset_free(data_reserved);
return num_written ? num_written : ret;
}
@@ -1876,17 +1882,36 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
loff_t pos;
- size_t count;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
return err;
}
+ pos = iocb->ki_pos;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ }
+
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err) {
@@ -1914,8 +1939,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
@@ -2011,7 +2034,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
- int ret = 0;
+ int ret = 0, err;
bool full_sync = 0;
u64 len;
@@ -2030,7 +2053,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
ret = start_ordered_ops(inode, start, end);
if (ret)
- return ret;
+ goto out;
inode_lock(inode);
atomic_inc(&root->log_batch);
@@ -2135,10 +2158,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* An ordered extent might have started before and completed
* already with io errors, in which case the inode was not
* updated and we end up here. So check the inode's mapping
- * flags for any errors that might have happened while doing
- * writeback of file data.
+ * for any errors that might have happened since we last
+ * checked called fsync.
*/
- ret = filemap_check_errors(inode->i_mapping);
+ ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
inode_unlock(inode);
goto out;
}
@@ -2227,6 +2250,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_end_transaction(trans);
}
out:
+ err = file_check_and_advance_wb_err(file);
+ if (!ret)
+ ret = err;
return ret > 0 ? -EIO : ret;
}
@@ -2390,10 +2416,13 @@ out:
*/
static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em;
int ret = 0;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
+ round_down(*start, fs_info->sectorsize),
+ round_up(*len, fs_info->sectorsize), 0);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2769,6 +2798,7 @@ static long btrfs_fallocate(struct file *file, int mode,
{
struct inode *inode = file_inode(file);
struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
struct falloc_range *range;
struct falloc_range *tmp;
struct list_head reserve_list;
@@ -2898,8 +2928,8 @@ static long btrfs_fallocate(struct file *file, int mode,
free_extent_map(em);
break;
}
- ret = btrfs_qgroup_reserve_data(inode, cur_offset,
- last_byte - cur_offset);
+ ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
+ cur_offset, last_byte - cur_offset);
if (ret < 0) {
free_extent_map(em);
break;
@@ -2910,8 +2940,8 @@ static long btrfs_fallocate(struct file *file, int mode,
* range, free reserved data space first, otherwise
* it'll result in false ENOSPC error.
*/
- btrfs_free_reserved_data_space(inode, cur_offset,
- last_byte - cur_offset);
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ cur_offset, last_byte - cur_offset);
}
free_extent_map(em);
cur_offset = last_byte;
@@ -2930,8 +2960,9 @@ static long btrfs_fallocate(struct file *file, int mode,
range->len, i_blocksize(inode),
offset + len, &alloc_hint);
else
- btrfs_free_reserved_data_space(inode, range->start,
- range->len);
+ btrfs_free_reserved_data_space(inode,
+ data_reserved, range->start,
+ range->len);
list_del(&range->list);
kfree(range);
}
@@ -2969,8 +3000,9 @@ out:
inode_unlock(inode);
/* Let go of our reservation. */
if (ret != 0)
- btrfs_free_reserved_data_space(inode, alloc_start,
- alloc_end - cur_offset);
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ alloc_start, alloc_end - cur_offset);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -3071,13 +3103,19 @@ out:
return offset;
}
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+ return generic_file_open(inode, filp);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
- .open = generic_file_open,
+ .open = btrfs_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
.fallocate = btrfs_fallocate,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index fc0bd8406758..a5e34de06c2f 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -17,7 +17,7 @@
*/
#include <linux/kernel.h>
-#include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "locking.h"
@@ -153,21 +153,21 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
static u8 *alloc_bitmap(u32 bitmap_size)
{
- void *mem;
+ u8 *ret;
+ unsigned int nofs_flag;
/*
- * The allocation size varies, observed numbers were < 4K up to 16K.
- * Using vmalloc unconditionally would be too heavy, we'll try
- * contiguous allocations first.
+ * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
+ * into the filesystem as the free space bitmap can be modified in the
+ * critical section of a transaction commit.
+ *
+ * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
+ * know that recursion is unsafe.
*/
- if (bitmap_size <= PAGE_SIZE)
- return kzalloc(bitmap_size, GFP_NOFS);
-
- mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
- if (mem)
- return mem;
-
- return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL);
+ nofs_flag = memalloc_nofs_save();
+ ret = kvzalloc(bitmap_size, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+ return ret;
}
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
@@ -1188,11 +1188,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ret;
-
- return 0;
+ return btrfs_commit_transaction(trans);
abort:
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
@@ -1277,11 +1273,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
free_extent_buffer(free_space_root->commit_root);
kfree(free_space_root);
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ret;
-
- return 0;
+ return btrfs_commit_transaction(trans);
abort:
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index a97fdc156a03..baacc1866861 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 5c6c20ec64d8..d02019747d00 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -400,6 +400,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_path *path;
struct inode *inode;
struct btrfs_block_rsv *rsv;
+ struct extent_changeset *data_reserved = NULL;
u64 num_bytes;
u64 alloc_hint = 0;
int ret;
@@ -492,7 +493,7 @@ again:
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_SIZE;
- ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
if (ret)
goto out_put;
@@ -516,6 +517,7 @@ out:
trans->bytes_reserved = num_bytes;
btrfs_free_path(path);
+ extent_changeset_free(data_reserved);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ef3c98c527c1..06dea7c89bbd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -86,7 +86,6 @@ static const struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
-struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
@@ -178,7 +177,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
char *kaddr;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
- int err = 0;
int ret;
size_t cur_size = size;
unsigned long offset;
@@ -200,10 +198,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
- if (ret) {
- err = ret;
+ if (ret)
goto fail;
- }
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -258,9 +254,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->disk_i_size = inode->i_size;
ret = btrfs_update_inode(trans, root, inode);
- return ret;
fail:
- return err;
+ return ret;
}
@@ -350,7 +345,7 @@ out:
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
- btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
@@ -608,12 +603,11 @@ cont:
/*
* one last check to make sure the compression is really a
- * win, compare the page count read with the blocks on disk
+ * win, compare the page count read with the blocks on disk,
+ * compression must free at least one sector size
*/
total_in = ALIGN(total_in, PAGE_SIZE);
- if (total_compressed >= total_in) {
- will_compress = 0;
- } else {
+ if (total_compressed + blocksize <= total_in) {
num_bytes = total_in;
*num_added += 1;
@@ -842,13 +836,12 @@ retry:
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
- ret = btrfs_submit_compressed_write(inode,
+ if (btrfs_submit_compressed_write(inode,
async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
- async_extent->nr_pages);
- if (ret) {
+ async_extent->nr_pages)) {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
@@ -1569,10 +1562,11 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
/*
* extent_io.c call back to do delayed allocation processing
*/
-static int run_delalloc_range(struct inode *inode, struct page *locked_page,
+static int run_delalloc_range(void *private_data, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
{
+ struct inode *inode = private_data;
int ret;
int force_cow = need_force_cow(inode, start, end);
@@ -1596,9 +1590,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
return ret;
}
-static void btrfs_split_extent_hook(struct inode *inode,
+static void btrfs_split_extent_hook(void *private_data,
struct extent_state *orig, u64 split)
{
+ struct inode *inode = private_data;
u64 size;
/* not delalloc, ignore it */
@@ -1633,10 +1628,11 @@ static void btrfs_split_extent_hook(struct inode *inode,
* extents, such as when we are doing sequential writes, so we can properly
* account for the metadata space we'll need.
*/
-static void btrfs_merge_extent_hook(struct inode *inode,
+static void btrfs_merge_extent_hook(void *private_data,
struct extent_state *new,
struct extent_state *other)
{
+ struct inode *inode = private_data;
u64 new_size, old_size;
u32 num_extents;
@@ -1736,9 +1732,10 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static void btrfs_set_bit_hook(struct inode *inode,
+static void btrfs_set_bit_hook(void *private_data,
struct extent_state *state, unsigned *bits)
{
+ struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -1766,8 +1763,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (btrfs_is_testing(fs_info))
return;
- __percpu_counter_add(&fs_info->delalloc_bytes, len,
- fs_info->delalloc_batch);
+ percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
+ fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len;
if (*bits & EXTENT_DEFRAG)
@@ -1790,10 +1787,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
/*
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
-static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
+static void btrfs_clear_bit_hook(void *private_data,
struct extent_state *state,
unsigned *bits)
{
+ struct btrfs_inode *inode = BTRFS_I((struct inode *)private_data);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
@@ -1840,8 +1838,8 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
&inode->vfs_inode,
state->start, len);
- __percpu_counter_add(&fs_info->delalloc_bytes, -len,
- fs_info->delalloc_batch);
+ percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
+ fs_info->delalloc_batch);
spin_lock(&inode->lock);
inode->delalloc_bytes -= len;
if (do_list && inode->delalloc_bytes == 0 &&
@@ -1901,11 +1899,12 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
- int ret = 0;
+ struct inode *inode = private_data;
+ blk_status_t ret = 0;
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,17 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
+ struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
+ blk_status_t ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1939,14 +1939,15 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
+ struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
- int ret = 0;
+ blk_status_t ret = 0;
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -1976,8 +1977,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
/* we're doing a write, do the async checksumming */
- ret = btrfs_wq_submit_bio(fs_info, inode, bio, mirror_num,
- bio_flags, bio_offset,
+ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
+ bio_offset, inode,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
goto out;
@@ -1991,8 +1992,8 @@ mapit:
ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
out:
- if (ret < 0) {
- bio->bi_error = ret;
+ if (ret) {
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -2035,6 +2036,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct btrfs_writepage_fixup *fixup;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
struct page *page;
struct inode *inode;
u64 page_start;
@@ -2072,7 +2074,7 @@ again:
goto again;
}
- ret = btrfs_delalloc_reserve_space(inode, page_start,
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
PAGE_SIZE);
if (ret) {
mapping_set_error(page->mapping, ret);
@@ -2092,6 +2094,7 @@ out_page:
unlock_page(page);
put_page(page);
kfree(fixup);
+ extent_changeset_free(data_reserved);
}
/*
@@ -2143,6 +2146,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
+ u64 qg_released;
int extent_inserted = 0;
int ret;
@@ -2198,13 +2202,17 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
- btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins);
+
/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
*/
- btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
+ ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
+ if (ret < 0)
+ goto out;
+ qg_released = ret;
+ ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
+ btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins);
out:
btrfs_free_path(path);
@@ -2926,7 +2934,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
* space for NOCOW range.
* As NOCOW won't cause a new delayed ref, just free the space
*/
- btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
+ btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
ordered_extent->len);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (nolock)
@@ -4762,6 +4770,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
char *kaddr;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
@@ -4776,7 +4785,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
(!len || ((len & (blocksize - 1)) == 0)))
goto out;
- ret = btrfs_delalloc_reserve_space(inode,
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
round_down(from, blocksize), blocksize);
if (ret)
goto out;
@@ -4784,7 +4793,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
- btrfs_delalloc_release_space(inode,
+ btrfs_delalloc_release_space(inode, data_reserved,
round_down(from, blocksize),
blocksize);
ret = -ENOMEM;
@@ -4856,11 +4865,12 @@ again:
out_unlock:
if (ret)
- btrfs_delalloc_release_space(inode, block_start,
+ btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize);
unlock_page(page);
put_page(page);
out:
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -5255,7 +5265,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
if (state->state & EXTENT_DELALLOC)
- btrfs_qgroup_free_data(inode, start, end - start + 1);
+ btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -5868,7 +5878,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_item *item;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key found_key;
@@ -5919,7 +5928,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
continue;
}
- item = btrfs_item_nr(slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid)
@@ -5934,7 +5942,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
ctx->pos = found_key.offset;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
- if (verify_dir_item(fs_info, leaf, di))
+ if (verify_dir_item(fs_info, leaf, slot, di))
goto next;
name_len = btrfs_dir_name_len(leaf, di);
@@ -7480,7 +7488,7 @@ out:
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
{
struct radix_tree_root *root = &inode->i_mapping->page_tree;
- int found = false;
+ bool found = false;
void **pagep = NULL;
struct page *page = NULL;
unsigned long start_idx;
@@ -7978,9 +7986,12 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
bio_end_io_t *repair_endio, void *repair_arg)
{
struct io_failure_record *failrec;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *bio;
int isector;
int read_mode = 0;
+ int segs;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -7992,13 +8003,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
failed_mirror);
if (!ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, io_tree, failrec);
return -EIO;
}
- if ((failed_bio->bi_vcnt > 1)
- || (failed_bio->bi_io_vec->bv_len
- > btrfs_inode_sectorsize(inode)))
+ segs = bio_segments(failed_bio);
+ if (segs > 1 ||
+ (failed_bio->bi_io_vec->bv_len > btrfs_inode_sectorsize(inode)))
read_mode |= REQ_FAILFAST_DEV;
isector = start - btrfs_io_bio(failed_bio)->logical;
@@ -8006,7 +8017,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
pgoff, isector, repair_endio, repair_arg);
if (!bio) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, io_tree, failrec);
return -EIO;
}
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -8017,7 +8028,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
if (ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, io_tree, failrec);
bio_put(bio);
}
@@ -8034,19 +8045,24 @@ struct btrfs_retry_complete {
static void btrfs_retry_endio_nocsum(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
+ struct inode *inode = done->inode;
struct bio_vec *bvec;
+ struct extent_io_tree *io_tree, *failure_tree;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
ASSERT(bio->bi_vcnt == 1);
- ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
+ io_tree = &BTRFS_I(inode)->io_tree;
+ failure_tree = &BTRFS_I(inode)->io_failure_tree;
+ ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
done->uptodate = 1;
bio_for_each_segment_all(bvec, bio, i)
- clean_io_failure(BTRFS_I(done->inode), done->start,
- bvec->bv_page, 0);
+ clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
+ io_tree, done->start, bvec->bv_page,
+ btrfs_ino(BTRFS_I(inode)), 0);
end:
complete(&done->done);
bio_put(bio);
@@ -8056,36 +8072,40 @@ static int __btrfs_correct_data_nocsum(struct inode *inode,
struct btrfs_io_bio *io_bio)
{
struct btrfs_fs_info *fs_info;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct btrfs_retry_complete done;
u64 start;
unsigned int pgoff;
u32 sectorsize;
int nr_sectors;
- int i;
int ret;
+ int err = 0;
fs_info = BTRFS_I(inode)->root->fs_info;
sectorsize = fs_info->sectorsize;
start = io_bio->logical;
done.inode = inode;
+ io_bio->bio.bi_iter = io_bio->iter;
- bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
- pgoff = bvec->bv_offset;
+ bio_for_each_segment(bvec, &io_bio->bio, iter) {
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
+ pgoff = bvec.bv_offset;
next_block_or_try_again:
done.uptodate = 0;
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
pgoff, start, start + sectorsize - 1,
io_bio->mirror_num,
btrfs_retry_endio_nocsum, &done);
- if (ret)
- return ret;
+ if (ret) {
+ err = ret;
+ goto next;
+ }
wait_for_completion(&done.done);
@@ -8094,6 +8114,7 @@ next_block_or_try_again:
goto next_block_or_try_again;
}
+next:
start += sectorsize;
nr_sectors--;
@@ -8104,19 +8125,21 @@ next_block_or_try_again:
}
}
- return 0;
+ return err;
}
static void btrfs_retry_endio(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct extent_io_tree *io_tree, *failure_tree;
+ struct inode *inode = done->inode;
struct bio_vec *bvec;
int uptodate;
int ret;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
uptodate = 1;
@@ -8124,13 +8147,19 @@ static void btrfs_retry_endio(struct bio *bio)
ASSERT(bio->bi_vcnt == 1);
ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
+ io_tree = &BTRFS_I(inode)->io_tree;
+ failure_tree = &BTRFS_I(inode)->io_failure_tree;
+
bio_for_each_segment_all(bvec, bio, i) {
- ret = __readpage_endio_check(done->inode, io_bio, i,
- bvec->bv_page, bvec->bv_offset,
- done->start, bvec->bv_len);
+ ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
+ bvec->bv_offset, done->start,
+ bvec->bv_len);
if (!ret)
- clean_io_failure(BTRFS_I(done->inode), done->start,
- bvec->bv_page, bvec->bv_offset);
+ clean_io_failure(BTRFS_I(inode)->root->fs_info,
+ failure_tree, io_tree, done->start,
+ bvec->bv_page,
+ btrfs_ino(BTRFS_I(inode)),
+ bvec->bv_offset);
else
uptodate = 0;
}
@@ -8141,11 +8170,12 @@ end:
bio_put(bio);
}
-static int __btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
struct btrfs_fs_info *fs_info;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct btrfs_retry_complete done;
u64 start;
u64 offset = 0;
@@ -8153,7 +8183,7 @@ static int __btrfs_subio_endio_read(struct inode *inode,
int nr_sectors;
unsigned int pgoff;
int csum_pos;
- int i;
+ bool uptodate = (err == 0);
int ret;
fs_info = BTRFS_I(inode)->root->fs_info;
@@ -8162,29 +8192,31 @@ static int __btrfs_subio_endio_read(struct inode *inode,
err = 0;
start = io_bio->logical;
done.inode = inode;
+ io_bio->bio.bi_iter = io_bio->iter;
- bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+ bio_for_each_segment(bvec, &io_bio->bio, iter) {
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
- pgoff = bvec->bv_offset;
+ pgoff = bvec.bv_offset;
next_block:
- csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
- ret = __readpage_endio_check(inode, io_bio, csum_pos,
- bvec->bv_page, pgoff, start,
- sectorsize);
- if (likely(!ret))
- goto next;
+ if (uptodate) {
+ csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
+ ret = __readpage_endio_check(inode, io_bio, csum_pos,
+ bvec.bv_page, pgoff, start, sectorsize);
+ if (likely(!ret))
+ goto next;
+ }
try_again:
done.uptodate = 0;
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
pgoff, start, start + sectorsize - 1,
io_bio->mirror_num,
btrfs_retry_endio, &done);
if (ret) {
- err = ret;
+ err = errno_to_blk_status(ret);
goto next;
}
@@ -8211,8 +8243,8 @@ next:
return err;
}
-static int btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8232,10 +8264,13 @@ static void btrfs_endio_direct_read(struct bio *bio)
struct inode *inode = dip->inode;
struct bio *dio_bio;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
- if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
+ if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) {
err = btrfs_subio_endio_read(inode, io_bio, err);
+ if (!err)
+ bio->bi_status = 0;
+ }
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
@@ -8243,11 +8278,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
if (io_bio->end_io)
- io_bio->end_io(io_bio, err);
+ io_bio->end_io(io_bio, blk_status_to_errno(err));
bio_put(bio);
}
@@ -8299,20 +8334,21 @@ static void btrfs_endio_direct_write(struct bio *bio)
struct bio *dio_bio = dip->dio_bio;
__endio_write_update_ordered(dip->inode, dip->logical_offset,
- dip->bytes, !bio->bi_error);
+ dip->bytes, !bio->bi_status);
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
- int ret;
+ struct inode *inode = private_data;
+ blk_status_t ret;
ret = btrfs_csum_one_bio(inode, bio, offset, 1);
BUG_ON(ret); /* -ENOMEM */
return 0;
@@ -8321,7 +8357,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,31 +8387,21 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (dip->errors) {
bio_io_error(dip->orig_bio);
} else {
- dip->dio_bio->bi_error = 0;
+ dip->dio_bio->bi_status = 0;
bio_endio(dip->orig_bio);
}
out:
bio_put(bio);
}
-static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
- u64 first_sector, gfp_t gfp_flags)
-{
- struct bio *bio;
- bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
- if (bio)
- bio_associate_current(bio);
- return bio;
-}
-
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
struct btrfs_dio_private *dip,
struct bio *bio,
u64 file_offset)
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
- int ret;
+ blk_status_t ret;
/*
* We load all the csum data we need when we submit
@@ -8406,7 +8432,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
bool write = bio_op(bio) == REQ_OP_WRITE;
- int ret;
+ blk_status_t ret;
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8423,8 +8449,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
goto map;
if (write && async_submit) {
- ret = btrfs_wq_submit_bio(fs_info, inode, bio, 0, 0,
- file_offset,
+ ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
+ file_offset, inode,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
goto err;
@@ -8454,103 +8480,83 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
{
struct inode *inode = dip->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
- struct bio_vec *bvec;
u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
- u64 submit_len = 0;
u64 map_length;
- u32 blocksize = fs_info->sectorsize;
int async_submit = 0;
- int nr_sectors;
+ u64 submit_len;
+ int clone_offset = 0;
+ int clone_len;
int ret;
- int i, j;
map_length = orig_bio->bi_iter.bi_size;
+ submit_len = map_length;
ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
&map_length, NULL, 0);
if (ret)
return -EIO;
- if (map_length >= orig_bio->bi_iter.bi_size) {
+ if (map_length >= submit_len) {
bio = orig_bio;
dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
goto submit;
}
/* async crcs make it difficult to collect full stripe writes. */
- if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
+ if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
async_submit = 0;
else
async_submit = 1;
- bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
- if (!bio)
- return -ENOMEM;
-
- bio->bi_opf = orig_bio->bi_opf;
- bio->bi_private = dip;
- bio->bi_end_io = btrfs_end_dio_bio;
- btrfs_io_bio(bio)->logical = file_offset;
+ /* bio split */
+ ASSERT(map_length <= INT_MAX);
atomic_inc(&dip->pending_bios);
+ do {
+ clone_len = min_t(int, submit_len, map_length);
- bio_for_each_segment_all(bvec, orig_bio, j) {
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
- i = 0;
-next_block:
- if (unlikely(map_length < submit_len + blocksize ||
- bio_add_page(bio, bvec->bv_page, blocksize,
- bvec->bv_offset + (i * blocksize)) < blocksize)) {
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the dip might get freed
- * before we're done setting it up
- */
- atomic_inc(&dip->pending_bios);
- ret = __btrfs_submit_dio_bio(bio, inode,
- file_offset, skip_sum,
- async_submit);
- if (ret) {
- bio_put(bio);
- atomic_dec(&dip->pending_bios);
- goto out_err;
- }
-
- start_sector += submit_len >> 9;
- file_offset += submit_len;
+ /*
+ * This will never fail as it's passing GPF_NOFS and
+ * the allocation is backed by btrfs_bioset.
+ */
+ bio = btrfs_bio_clone_partial(orig_bio, clone_offset,
+ clone_len);
+ bio->bi_private = dip;
+ bio->bi_end_io = btrfs_end_dio_bio;
+ btrfs_io_bio(bio)->logical = file_offset;
+
+ ASSERT(submit_len >= clone_len);
+ submit_len -= clone_len;
+ if (submit_len == 0)
+ break;
- submit_len = 0;
+ /*
+ * Increase the count before we submit the bio so we know
+ * the end IO handler won't happen before we increase the
+ * count. Otherwise, the dip might get freed before we're
+ * done setting it up.
+ */
+ atomic_inc(&dip->pending_bios);
- bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
- start_sector, GFP_NOFS);
- if (!bio)
- goto out_err;
- bio->bi_opf = orig_bio->bi_opf;
- bio->bi_private = dip;
- bio->bi_end_io = btrfs_end_dio_bio;
- btrfs_io_bio(bio)->logical = file_offset;
+ ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+ async_submit);
+ if (ret) {
+ bio_put(bio);
+ atomic_dec(&dip->pending_bios);
+ goto out_err;
+ }
- map_length = orig_bio->bi_iter.bi_size;
- ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
- start_sector << 9,
- &map_length, NULL, 0);
- if (ret) {
- bio_put(bio);
- goto out_err;
- }
+ clone_offset += clone_len;
+ start_sector += clone_len >> 9;
+ file_offset += clone_len;
- goto next_block;
- } else {
- submit_len += blocksize;
- if (--nr_sectors) {
- i++;
- goto next_block;
- }
- }
- }
+ map_length = submit_len;
+ ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
+ start_sector << 9, &map_length, NULL, 0);
+ if (ret)
+ goto out_err;
+ } while (submit_len > 0);
submit:
ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
@@ -8577,19 +8583,15 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
loff_t file_offset)
{
struct btrfs_dio_private *dip = NULL;
- struct bio *io_bio = NULL;
- struct btrfs_io_bio *btrfs_bio;
+ struct bio *bio = NULL;
+ struct btrfs_io_bio *io_bio;
int skip_sum;
bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
- if (!io_bio) {
- ret = -ENOMEM;
- goto free_ordered;
- }
+ bio = btrfs_bio_clone(dio_bio);
dip = kzalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
@@ -8602,17 +8604,17 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
dip->logical_offset = file_offset;
dip->bytes = dio_bio->bi_iter.bi_size;
dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
- io_bio->bi_private = dip;
- dip->orig_bio = io_bio;
+ bio->bi_private = dip;
+ dip->orig_bio = bio;
dip->dio_bio = dio_bio;
atomic_set(&dip->pending_bios, 0);
- btrfs_bio = btrfs_io_bio(io_bio);
- btrfs_bio->logical = file_offset;
+ io_bio = btrfs_io_bio(bio);
+ io_bio->logical = file_offset;
if (write) {
- io_bio->bi_end_io = btrfs_endio_direct_write;
+ bio->bi_end_io = btrfs_endio_direct_write;
} else {
- io_bio->bi_end_io = btrfs_endio_direct_read;
+ bio->bi_end_io = btrfs_endio_direct_read;
dip->subio_endio = btrfs_subio_endio_read;
}
@@ -8635,8 +8637,8 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
if (!ret)
return;
- if (btrfs_bio->end_io)
- btrfs_bio->end_io(btrfs_bio, ret);
+ if (io_bio->end_io)
+ io_bio->end_io(io_bio, ret);
free_ordered:
/*
@@ -8648,16 +8650,15 @@ free_ordered:
* same as btrfs_endio_direct_[write|read] because we can't call these
* callbacks - they require an allocated dip and a clone of dio_bio.
*/
- if (io_bio && dip) {
- io_bio->bi_error = -EIO;
- bio_endio(io_bio);
+ if (bio && dip) {
+ bio_io_error(bio);
/*
- * The end io callbacks free our dip, do the final put on io_bio
+ * The end io callbacks free our dip, do the final put on bio
* and all the cleanup and final put for dio_bio (through
* dio_end_io()).
*/
dip = NULL;
- io_bio = NULL;
+ bio = NULL;
} else {
if (write)
__endio_write_update_ordered(inode,
@@ -8668,15 +8669,15 @@ free_ordered:
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- dio_bio->bi_error = -EIO;
+ dio_bio->bi_status = BLK_STS_IOERR;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
* nor bio_endio()/bio_io_error() against dio_bio.
*/
- dio_end_io(dio_bio, ret);
+ dio_end_io(dio_bio);
}
- if (io_bio)
- bio_put(io_bio);
+ if (bio)
+ bio_put(bio);
kfree(dip);
}
@@ -8720,6 +8721,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = file->f_mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_data dio_data = { 0 };
+ struct extent_changeset *data_reserved = NULL;
loff_t offset = iocb->ki_pos;
size_t count = 0;
int flags = 0;
@@ -8755,8 +8757,12 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
- ret = btrfs_delalloc_reserve_space(inode, offset, count);
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ offset, count);
if (ret)
goto out;
dio_data.outstanding_extents = count_max_extents(count);
@@ -8788,8 +8794,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
- btrfs_delalloc_release_space(inode, offset,
- dio_data.reserve);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, dio_data.reserve);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@@ -8804,8 +8810,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.unsubmitted_oe_range_start,
false);
} else if (ret >= 0 && (size_t)ret < count)
- btrfs_delalloc_release_space(inode, offset,
- count - (size_t)ret);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, count - (size_t)ret);
}
out:
if (wakeup)
@@ -8813,6 +8819,7 @@ out:
if (relock)
inode_lock(inode);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -9003,7 +9010,7 @@ again:
* free the entire extent.
*/
if (PageDirty(page))
- btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -9045,6 +9052,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
char *kaddr;
unsigned long zero_start;
loff_t size;
@@ -9070,7 +9078,7 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
* end up waiting indefinitely to get a lock on the page currently
* being processed by btrfs_page_mkwrite() function.
*/
- ret = btrfs_delalloc_reserve_space(inode, page_start,
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
reserved_space);
if (!ret) {
ret = file_update_time(vmf->vma->vm_file);
@@ -9124,8 +9132,8 @@ again:
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(inode, page_start,
- PAGE_SIZE - reserved_space);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ page_start, PAGE_SIZE - reserved_space);
}
}
@@ -9176,13 +9184,16 @@ again:
out_unlock:
if (!ret) {
sb_end_pagefault(inode->i_sb);
+ extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
}
unlock_page(page);
out:
- btrfs_delalloc_release_space(inode, page_start, reserved_space);
+ btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ reserved_space);
out_noreserve:
sb_end_pagefault(inode->i_sb);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -9404,8 +9415,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
- extent_io_tree_init(&ei->io_tree, &inode->i_data);
- extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
+ extent_io_tree_init(&ei->io_tree, inode);
+ extent_io_tree_init(&ei->io_failure_tree, inode);
ei->io_tree.track_uptodate = 1;
ei->io_failure_tree.track_uptodate = 1;
atomic_set(&ei->sync_writers, 0);
@@ -9514,7 +9525,6 @@ void btrfs_destroy_cachep(void)
rcu_barrier();
kmem_cache_destroy(btrfs_inode_cachep);
kmem_cache_destroy(btrfs_trans_handle_cachep);
- kmem_cache_destroy(btrfs_transaction_cachep);
kmem_cache_destroy(btrfs_path_cachep);
kmem_cache_destroy(btrfs_free_space_cachep);
}
@@ -9534,12 +9544,6 @@ int btrfs_init_cachep(void)
if (!btrfs_trans_handle_cachep)
goto fail;
- btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
- sizeof(struct btrfs_transaction), 0,
- SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
- if (!btrfs_transaction_cachep)
- goto fail;
-
btrfs_path_cachep = kmem_cache_create("btrfs_path",
sizeof(struct btrfs_path), 0,
SLAB_MEM_SPREAD, NULL);
@@ -9564,6 +9568,24 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
u64 delalloc_bytes;
struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
+ u32 bi_flags = BTRFS_I(inode)->flags;
+
+ stat->result_mask |= STATX_BTIME;
+ stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
+ stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
+ if (bi_flags & BTRFS_INODE_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (bi_flags & BTRFS_INODE_COMPRESS)
+ stat->attributes |= STATX_ATTR_COMPRESSED;
+ if (bi_flags & BTRFS_INODE_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (bi_flags & BTRFS_INODE_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
+
+ stat->attributes_mask |= (STATX_ATTR_APPEND |
+ STATX_ATTR_COMPRESSED |
+ STATX_ATTR_IMMUTABLE |
+ STATX_ATTR_NODUMP);
generic_fillattr(inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
@@ -10538,7 +10560,7 @@ next:
btrfs_end_transaction(trans);
}
if (cur_offset < end)
- btrfs_free_reserved_data_space(inode, cur_offset,
+ btrfs_free_reserved_data_space(inode, NULL, cur_offset,
end - cur_offset + 1);
return ret;
}
@@ -10659,6 +10681,42 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
return -EAGAIN;
}
+static struct btrfs_fs_info *iotree_fs_info(void *private_data)
+{
+ struct inode *inode = private_data;
+ return btrfs_sb(inode->i_sb);
+}
+
+static void btrfs_check_extent_io_range(void *private_data, const char *caller,
+ u64 start, u64 end)
+{
+ struct inode *inode = private_data;
+ u64 isize;
+
+ isize = i_size_read(inode);
+ if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
+ btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
+ "%s: ino %llu isize %llu odd range [%llu,%llu]",
+ caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
+ }
+}
+
+void btrfs_set_range_writeback(void *private_data, u64 start, u64 end)
+{
+ struct inode *inode = private_data;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(inode->i_mapping, index);
+ ASSERT(page); /* Pages should be in the extent_io_tree */
+ set_page_writeback(page);
+ put_page(page);
+ index++;
+ }
+}
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,
@@ -10702,6 +10760,8 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
.merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
+ .tree_fs_info = iotree_fs_info,
+ .set_range_writeback = btrfs_set_range_writeback,
/* optional callbacks */
.fill_delalloc = run_delalloc_range,
@@ -10711,6 +10771,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
.clear_bit_hook = btrfs_clear_bit_hook,
.merge_extent_hook = btrfs_merge_extent_hook,
.split_extent_hook = btrfs_split_extent_hook,
+ .check_extent_io_range = btrfs_check_extent_io_range,
};
/*
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e176375f374f..fa1b78cf25f6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -37,7 +37,7 @@
#include <linux/bit_spinlock.h>
#include <linux/security.h>
#include <linux/xattr.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/uuid.h>
@@ -689,7 +689,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
goto dec_and_free;
- btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
@@ -1127,6 +1127,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_io_tree *tree;
+ struct extent_changeset *data_reserved = NULL;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
file_end = (isize - 1) >> PAGE_SHIFT;
@@ -1135,7 +1136,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
- ret = btrfs_delalloc_reserve_space(inode,
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
if (ret)
@@ -1226,7 +1227,7 @@ again:
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(inode,
+ btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
(page_cnt - i_done) << PAGE_SHIFT);
}
@@ -1247,15 +1248,17 @@ again:
unlock_page(pages[i]);
put_page(pages[i]);
}
+ extent_changeset_free(data_reserved);
return i_done;
out:
for (i = 0; i < i_done; i++) {
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_delalloc_release_space(inode,
+ btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -4588,7 +4591,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
out:
btrfs_free_path(path);
- vfree(inodes);
+ kvfree(inodes);
kfree(loi);
return ret;
@@ -4897,7 +4900,6 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
goto out;
}
- /* FIXME: check if the IDs really exist */
if (sa->assign) {
ret = btrfs_add_qgroup_relation(trans, fs_info,
sa->src, sa->dst);
@@ -4956,7 +4958,6 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
- /* FIXME: check if the IDs really exist */
if (sa->create) {
ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid);
} else {
@@ -5010,7 +5011,6 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
qgroupid = root->root_key.objectid;
}
- /* FIXME: check if the IDs really exist */
ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim);
err = btrfs_end_transaction(trans);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index f48c8c14dc14..d433e75d489a 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -18,13 +18,14 @@
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/lzo.h>
+#include <linux/refcount.h>
#include "compression.h"
#define LZO_LEN 4
@@ -40,9 +41,9 @@ static void lzo_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- vfree(workspace->buf);
- vfree(workspace->cbuf);
- vfree(workspace->mem);
+ kvfree(workspace->buf);
+ kvfree(workspace->cbuf);
+ kvfree(workspace->mem);
kfree(workspace);
}
@@ -50,13 +51,13 @@ static struct list_head *lzo_alloc_workspace(void)
{
struct workspace *workspace;
- workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+ workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
if (!workspace)
return ERR_PTR(-ENOMEM);
- workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
- workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
- workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
+ workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
+ workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@@ -141,7 +142,7 @@ static int lzo_compress_pages(struct list_head *ws,
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
&out_len, workspace->mem);
if (ret != LZO_E_OK) {
- pr_debug("BTRFS: deflate in loop returned %d\n",
+ pr_debug("BTRFS: lzo in loop returned %d\n",
ret);
ret = -EIO;
goto out;
@@ -229,8 +230,10 @@ static int lzo_compress_pages(struct list_head *ws,
in_len = min(bytes_left, PAGE_SIZE);
}
- if (tot_out > tot_in)
+ if (tot_out >= tot_in) {
+ ret = -E2BIG;
goto out;
+ }
/* store the size of all chunks of compressed data */
cpage_out = kmap(pages[0]);
@@ -254,16 +257,13 @@ out:
return ret;
}
-static int lzo_decompress_bio(struct list_head *ws,
- struct page **pages_in,
- u64 disk_start,
- struct bio *orig_bio,
- size_t srclen)
+static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
char *data_in;
unsigned long page_in_index = 0;
+ size_t srclen = cb->compressed_len;
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
unsigned long buf_offset = 0;
@@ -278,6 +278,9 @@ static int lzo_decompress_bio(struct list_head *ws,
unsigned long tot_len;
char *buf;
bool may_late_unmap, need_unmap;
+ struct page **pages_in = cb->compressed_pages;
+ u64 disk_start = cb->start;
+ struct bio *orig_bio = cb->orig_bio;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7b40e2e7292a..a3aca495e33e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -663,7 +663,7 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -671,7 +671,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
LIST_HEAD(skipped);
LIST_HEAD(works);
struct btrfs_ordered_extent *ordered, *next;
- int count = 0;
+ u64 count = 0;
const u64 range_end = range_start + range_len;
mutex_lock(&root->ordered_extent_mutex);
@@ -701,7 +701,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
cond_resched();
spin_lock(&root->ordered_extent_lock);
- if (nr != -1)
+ if (nr != U64_MAX)
nr--;
count++;
}
@@ -720,13 +720,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
return count;
}
-int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
- const u64 range_start, const u64 range_len)
+u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
+ const u64 range_start, const u64 range_len)
{
struct btrfs_root *root;
struct list_head splice;
- int done;
- int total_done = 0;
+ u64 total_done = 0;
+ u64 done;
INIT_LIST_HEAD(&splice);
@@ -748,9 +748,8 @@ int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
total_done += done;
spin_lock(&fs_info->ordered_root_lock);
- if (nr != -1) {
+ if (nr != U64_MAX) {
nr -= done;
- WARN_ON(nr < 0);
}
}
list_splice_tail(&splice, &fs_info->ordered_roots);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index e0c1d5b8d859..56c4c0ee6381 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -200,9 +200,9 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
-int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct btrfs_inode *inode,
struct list_head *logged_list,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index cdafbf92ef0c..fcae61e175f3 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -261,8 +261,11 @@ void btrfs_print_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *l)
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
- pr_info("\t\tblock group used %llu\n",
- btrfs_disk_block_group_used(l, bi));
+ pr_info(
+ "\t\tblock group used %llu chunk_objectid %llu flags %llu\n",
+ btrfs_disk_block_group_used(l, bi),
+ btrfs_disk_block_group_chunk_objectid(l, bi),
+ btrfs_disk_block_group_flags(l, bi));
break;
case BTRFS_CHUNK_ITEM_KEY:
print_chunk(l, btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index d6cb155ef7a1..4b23ae5d0e5c 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -164,6 +164,7 @@ static int iterate_object_props(struct btrfs_root *root,
size_t),
void *ctx)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
char *name_buf = NULL;
char *value_buf = NULL;
@@ -214,6 +215,12 @@ static int iterate_object_props(struct btrfs_root *root,
name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len;
+ if (verify_dir_item(fs_info, leaf,
+ path->slots[0], di)) {
+ ret = -EIO;
+ goto out;
+ }
+
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index deffbeb74a0b..4ce351efe281 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1406,38 +1406,6 @@ out:
return ret;
}
-int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_qgroup_extent_record *record;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- u64 qgroup_to_skip;
- int ret = 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
- qgroup_to_skip = delayed_refs->qgroup_to_skip;
-
- /*
- * No need to do lock, since this function will only be called in
- * btrfs_commit_transaction().
- */
- node = rb_first(&delayed_refs->dirty_extent_root);
- while (node) {
- record = rb_entry(node, struct btrfs_qgroup_extent_record,
- node);
- if (WARN_ON(!record->old_roots))
- ret = btrfs_find_all_roots(NULL, fs_info,
- record->bytenr, 0, &record->old_roots);
- if (ret < 0)
- break;
- if (qgroup_to_skip)
- ulist_del(record->old_roots, qgroup_to_skip, 0);
- node = rb_next(node);
- }
- return ret;
-}
-
int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_qgroup_extent_record *record)
@@ -1559,6 +1527,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
}
+ cond_resched();
return 0;
}
@@ -1918,6 +1887,35 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
return 0;
}
+/*
+ * Check if the @roots potentially is a list of fs tree roots
+ *
+ * Return 0 for definitely not a fs/subvol tree roots ulist
+ * Return 1 for possible fs/subvol tree roots in the list (considering an empty
+ * one as well)
+ */
+static int maybe_fs_roots(struct ulist *roots)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+
+ /* Empty one, still possible for fs roots */
+ if (!roots || roots->nnodes == 0)
+ return 1;
+
+ ULIST_ITER_INIT(&uiter);
+ unode = ulist_next(roots, &uiter);
+ if (!unode)
+ return 1;
+
+ /*
+ * If it contains fs tree roots, then it must belong to fs/subvol
+ * trees.
+ * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
+ */
+ return is_fstree(unode->val);
+}
+
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -1934,10 +1932,20 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
- if (new_roots)
+ if (new_roots) {
+ if (!maybe_fs_roots(new_roots))
+ goto out_free;
nr_new_roots = new_roots->nnodes;
- if (old_roots)
+ }
+ if (old_roots) {
+ if (!maybe_fs_roots(old_roots))
+ goto out_free;
nr_old_roots = old_roots->nnodes;
+ }
+
+ /* Quick exit, either not fs tree roots, or won't affect any qgroup */
+ if (nr_old_roots == 0 && nr_new_roots == 0)
+ goto out_free;
BUG_ON(!fs_info->quota_root);
@@ -2017,6 +2025,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
if (!ret) {
/*
+ * Old roots should be searched when inserting qgroup
+ * extent record
+ */
+ if (WARN_ON(!record->old_roots)) {
+ /* Search commit root to find old_roots */
+ ret = btrfs_find_all_roots(NULL, fs_info,
+ record->bytenr, 0,
+ &record->old_roots);
+ if (ret < 0)
+ goto cleanup;
+ }
+
+ /*
* Use SEQ_LAST as time_seq to do special search, which
* doesn't lock tree or delayed_refs and search current
* root. It's safe inside commit_transaction().
@@ -2025,8 +2046,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
record->bytenr, SEQ_LAST, &new_roots);
if (ret < 0)
goto cleanup;
- if (qgroup_to_skip)
+ if (qgroup_to_skip) {
ulist_del(new_roots, qgroup_to_skip, 0);
+ ulist_del(record->old_roots, qgroup_to_skip,
+ 0);
+ }
ret = btrfs_qgroup_account_extent(trans, fs_info,
record->bytenr, record->num_bytes,
record->old_roots, new_roots);
@@ -2338,6 +2362,11 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
if (num_bytes == 0)
return 0;
+
+ if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
+ capable(CAP_SYS_RESOURCE))
+ enforce = false;
+
retry:
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
@@ -2376,7 +2405,7 @@ retry:
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
return ret;
- btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
+ btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -2806,55 +2835,130 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
* Return <0 for error (including -EQUOT)
*
* NOTE: this function may sleep for memory allocation.
+ * if btrfs_qgroup_reserve_data() is called multiple times with
+ * same @reserved, caller must ensure when error happens it's OK
+ * to free *ALL* reserved space.
*/
-int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_reserve_data(struct inode *inode,
+ struct extent_changeset **reserved_ret, u64 start,
+ u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_changeset changeset;
struct ulist_node *unode;
struct ulist_iterator uiter;
+ struct extent_changeset *reserved;
+ u64 orig_reserved;
+ u64 to_reserve;
int ret;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
!is_fstree(root->objectid) || len == 0)
return 0;
- changeset.bytes_changed = 0;
- ulist_init(&changeset.range_changed);
+ /* @reserved parameter is mandatory for qgroup */
+ if (WARN_ON(!reserved_ret))
+ return -EINVAL;
+ if (!*reserved_ret) {
+ *reserved_ret = extent_changeset_alloc();
+ if (!*reserved_ret)
+ return -ENOMEM;
+ }
+ reserved = *reserved_ret;
+ /* Record already reserved space */
+ orig_reserved = reserved->bytes_changed;
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
- start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
+ start + len -1, EXTENT_QGROUP_RESERVED, reserved);
+
+ /* Newly reserved space */
+ to_reserve = reserved->bytes_changed - orig_reserved;
trace_btrfs_qgroup_reserve_data(inode, start, len,
- changeset.bytes_changed,
- QGROUP_RESERVE);
+ to_reserve, QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
- ret = qgroup_reserve(root, changeset.bytes_changed, true);
+ ret = qgroup_reserve(root, to_reserve, true);
if (ret < 0)
goto cleanup;
- ulist_release(&changeset.range_changed);
return ret;
cleanup:
- /* cleanup already reserved ranges */
+ /* cleanup *ALL* already reserved ranges */
ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(&changeset.range_changed, &uiter)))
+ while ((unode = ulist_next(&reserved->range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
GFP_NOFS);
- ulist_release(&changeset.range_changed);
+ extent_changeset_release(reserved);
+ return ret;
+}
+
+/* Free ranges specified by @reserved, normally in error path */
+static int qgroup_free_reserved_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct extent_changeset changeset;
+ int freed = 0;
+ int ret;
+
+ extent_changeset_init(&changeset);
+ len = round_up(start + len, root->fs_info->sectorsize);
+ start = round_down(start, root->fs_info->sectorsize);
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
+ u64 range_start = unode->val;
+ /* unode->aux is the inclusive end */
+ u64 range_len = unode->aux - range_start + 1;
+ u64 free_start;
+ u64 free_len;
+
+ extent_changeset_release(&changeset);
+
+ /* Only free range in range [start, start + len) */
+ if (range_start >= start + len ||
+ range_start + range_len <= start)
+ continue;
+ free_start = max(range_start, start);
+ free_len = min(start + len, range_start + range_len) -
+ free_start;
+ /*
+ * TODO: To also modify reserved->ranges_reserved to reflect
+ * the modification.
+ *
+ * However as long as we free qgroup reserved according to
+ * EXTENT_QGROUP_RESERVED, we won't double free.
+ * So not need to rush.
+ */
+ ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
+ free_start, free_start + free_len - 1,
+ EXTENT_QGROUP_RESERVED, &changeset);
+ if (ret < 0)
+ goto out;
+ freed += changeset.bytes_changed;
+ }
+ btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
+ ret = freed;
+out:
+ extent_changeset_release(&changeset);
return ret;
}
-static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
- int free)
+static int __btrfs_qgroup_release_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len,
+ int free)
{
struct extent_changeset changeset;
int trace_op = QGROUP_RELEASE;
int ret;
- changeset.bytes_changed = 0;
- ulist_init(&changeset.range_changed);
+ /* In release case, we shouldn't have @reserved */
+ WARN_ON(!free && reserved);
+ if (free && reserved)
+ return qgroup_free_reserved_data(inode, reserved, start, len);
+ extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
@@ -2868,8 +2972,9 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
changeset.bytes_changed);
+ ret = changeset.bytes_changed;
out:
- ulist_release(&changeset.range_changed);
+ extent_changeset_release(&changeset);
return ret;
}
@@ -2878,14 +2983,17 @@ out:
*
* Should be called when a range of pages get invalidated before reaching disk.
* Or for error cleanup case.
+ * if @reserved is given, only reserved range in [@start, @start + @len) will
+ * be freed.
*
* For data written to disk, use btrfs_qgroup_release_data().
*
* NOTE: This function may sleep for memory allocation.
*/
-int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_free_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
- return __btrfs_qgroup_release_data(inode, start, len, 1);
+ return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
}
/*
@@ -2905,7 +3013,7 @@ int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
*/
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
{
- return __btrfs_qgroup_release_data(inode, start, len, 0);
+ return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
}
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
@@ -2969,8 +3077,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
struct ulist_iterator iter;
int ret;
- changeset.bytes_changed = 0;
- ulist_init(&changeset.range_changed);
+ extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_QGROUP_RESERVED, &changeset);
@@ -2987,5 +3094,5 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
changeset.bytes_changed);
}
- ulist_release(&changeset.range_changed);
+ extent_changeset_release(&changeset);
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index fe04d3f295c6..d9984e87cddf 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -134,8 +134,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
-int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
* So qgroup can account it at transaction committing time.
@@ -243,9 +242,11 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
#endif
/* New io_tree based accurate qgroup reserve API */
-int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
+int btrfs_qgroup_reserve_data(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
-int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
+int btrfs_qgroup_free_data(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
bool enforce);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..6f845d219cd6 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -31,7 +31,7 @@
#include <linux/hash.h>
#include <linux/list_sort.h>
#include <linux/raid/xor.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
@@ -218,12 +218,9 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
* of a failing mount.
*/
table_size = sizeof(*table) + sizeof(*h) * num_entries;
- table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!table) {
- table = vzalloc(table_size);
- if (!table)
- return -ENOMEM;
- }
+ table = kvzalloc(table_size, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
spin_lock_init(&table->cache_lock);
INIT_LIST_HEAD(&table->stripe_cache);
@@ -871,7 +868,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
@@ -884,7 +881,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_error = err;
+ cur->bi_status = err;
bio_endio(cur);
cur = next;
}
@@ -897,7 +894,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
int max_errors;
if (err)
@@ -914,7 +911,7 @@ static void raid_write_end_io(struct bio *bio)
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0 : rbio->bbio->max_errors;
if (atomic_read(&rbio->error) > max_errors)
- err = -EIO;
+ err = BLK_STS_IOERR;
rbio_orig_end_io(rbio, err);
}
@@ -1092,7 +1089,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
- !last->bi_error &&
+ !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
@@ -1101,10 +1098,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
}
/* put a new bio on the list */
- bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
- if (!bio)
- return -ENOMEM;
-
+ bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
bio->bi_iter.bi_size = 0;
bio->bi_bdev = stripe->dev->bdev;
bio->bi_iter.bi_sector = disk_start >> 9;
@@ -1448,7 +1442,7 @@ static void raid_rmw_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -1991,7 +1985,7 @@ static void raid_recover_end_io(struct bio *bio)
* we only read stripe pages off the disk, set them
* up to date if there were no errors
*/
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -2530,7 +2524,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index a17e775a4a89..ab852b8e3e37 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -66,7 +66,6 @@ struct reada_extctl {
struct reada_extent {
u64 logical;
struct btrfs_key top;
- int err;
struct list_head extctl;
int refcnt;
spinlock_t lock;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d60df51959f7..65661d1aae4e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3093,11 +3093,12 @@ int prealloc_file_extent_cluster(struct inode *inode,
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset;
+ struct extent_changeset *data_reserved = NULL;
BUG_ON(cluster->start != cluster->boundary[0]);
inode_lock(inode);
- ret = btrfs_check_data_free_space(inode, prealloc_start,
+ ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
prealloc_end + 1 - prealloc_start);
if (ret)
goto out;
@@ -3113,8 +3114,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
num_bytes = end + 1 - start;
if (cur_offset < start)
- btrfs_free_reserved_data_space(inode, cur_offset,
- start - cur_offset);
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ cur_offset, start - cur_offset);
ret = btrfs_prealloc_file_range(inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
@@ -3125,10 +3126,11 @@ int prealloc_file_extent_cluster(struct inode *inode,
nr++;
}
if (cur_offset < prealloc_end)
- btrfs_free_reserved_data_space(inode, cur_offset,
- prealloc_end + 1 - cur_offset);
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ cur_offset, prealloc_end + 1 - cur_offset);
out:
inode_unlock(inode);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -4269,8 +4271,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&rc->reloc_roots);
backref_cache_init(&rc->backref_cache);
mapping_tree_init(&rc->reloc_root_tree);
- extent_io_tree_init(&rc->processed_blocks,
- fs_info->btree_inode->i_mapping);
+ extent_io_tree_init(&rc->processed_blocks, NULL);
return rc;
}
@@ -4372,7 +4373,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
btrfs_wait_block_group_reservations(rc->block_group);
btrfs_wait_nocow_writers(rc->block_group);
- btrfs_wait_ordered_roots(fs_info, -1,
+ btrfs_wait_ordered_roots(fs_info, U64_MAX,
rc->block_group->key.objectid,
rc->block_group->key.offset);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7d6bc308bf43..460db0cb2d07 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -390,6 +390,13 @@ again:
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
+ ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr,
+ name_len);
+ if (!ret) {
+ err = -EIO;
+ goto out;
+ }
+
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
*sequence = btrfs_root_ref_sequence(leaf, ref);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..6f1e4c984b94 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -18,6 +18,7 @@
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
@@ -95,7 +96,7 @@ struct scrub_bio {
struct scrub_ctx *sctx;
struct btrfs_device *dev;
struct bio *bio;
- int err;
+ blk_status_t status;
u64 logical;
u64 physical;
#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -161,14 +162,6 @@ struct scrub_parity {
unsigned long bitmap[0];
};
-struct scrub_wr_ctx {
- struct scrub_bio *wr_curr_bio;
- struct btrfs_device *tgtdev;
- int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
- atomic_t flush_all_writes;
- struct mutex wr_lock;
-};
-
struct scrub_ctx {
struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
struct btrfs_fs_info *fs_info;
@@ -183,11 +176,14 @@ struct scrub_ctx {
atomic_t cancel_req;
int readonly;
int pages_per_rd_bio;
- u32 sectorsize;
- u32 nodesize;
int is_dev_replace;
- struct scrub_wr_ctx wr_ctx;
+
+ struct scrub_bio *wr_curr_bio;
+ struct mutex wr_lock;
+ int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
+ atomic_t flush_all_writes;
+ struct btrfs_device *wr_tgtdev;
/*
* statistics
@@ -289,10 +285,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num);
-static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
- struct btrfs_device *dev,
- int is_dev_replace);
-static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage);
static void scrub_wr_submit(struct scrub_ctx *sctx);
@@ -643,8 +635,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
if (!sctx)
return;
- scrub_free_wr_ctx(&sctx->wr_ctx);
-
/* this can happen when scrub is cancelled */
if (sctx->curr != -1) {
struct scrub_bio *sbio = sctx->bios[sctx->curr];
@@ -664,6 +654,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
kfree(sbio);
}
+ kfree(sctx->wr_curr_bio);
scrub_free_csums(sctx);
kfree(sctx);
}
@@ -680,7 +671,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct scrub_ctx *sctx;
int i;
struct btrfs_fs_info *fs_info = dev->fs_info;
- int ret;
sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
@@ -710,8 +700,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sctx->bios[i]->next_free = -1;
}
sctx->first_free = 0;
- sctx->nodesize = fs_info->nodesize;
- sctx->sectorsize = fs_info->sectorsize;
atomic_set(&sctx->bios_in_flight, 0);
atomic_set(&sctx->workers_pending, 0);
atomic_set(&sctx->cancel_req, 0);
@@ -722,12 +710,16 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
spin_lock_init(&sctx->stat_lock);
init_waitqueue_head(&sctx->list_wait);
- ret = scrub_setup_wr_ctx(&sctx->wr_ctx,
- fs_info->dev_replace.tgtdev, is_dev_replace);
- if (ret) {
- scrub_free_ctx(sctx);
- return ERR_PTR(ret);
+ WARN_ON(sctx->wr_curr_bio != NULL);
+ mutex_init(&sctx->wr_lock);
+ sctx->wr_curr_bio = NULL;
+ if (is_dev_replace) {
+ WARN_ON(!fs_info->dev_replace.tgtdev);
+ sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
+ sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
+ atomic_set(&sctx->flush_all_writes, 0);
}
+
return sctx;
nomem:
@@ -742,6 +734,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
u32 nlink;
int ret;
int i;
+ unsigned nofs_flag;
struct extent_buffer *eb;
struct btrfs_inode_item *inode_item;
struct scrub_warning *swarn = warn_ctx;
@@ -780,7 +773,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
nlink = btrfs_inode_nlink(eb, inode_item);
btrfs_release_path(swarn->path);
+ /*
+ * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub
+ * uses GFP_NOFS in this context, so we keep it consistent but it does
+ * not seem to be strictly necessary.
+ */
+ nofs_flag = memalloc_nofs_save();
ipath = init_ipath(4096, local_root, swarn->path);
+ memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ipath)) {
ret = PTR_ERR(ipath);
ipath = NULL;
@@ -954,7 +954,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
ret = -EIO;
goto out;
}
- ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE,
+ ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
fixup->logical, page,
offset - page_offset(page),
fixup->mirror_num);
@@ -1668,14 +1668,14 @@ leave_nomem:
struct scrub_bio_ret {
struct completion event;
- int error;
+ blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->status = bio->bi_status;
complete(&ret->event);
}
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
int ret;
init_completion(&done.event);
- done.error = 0;
+ done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
return ret;
wait_for_completion(&done.event);
- if (done.error)
+ if (done.status)
return -EIO;
return 0;
@@ -1737,12 +1737,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
WARN_ON(!page->page);
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio) {
- page->io_error = 1;
- sblock->no_io_error_seen = 0;
- continue;
- }
+ bio = btrfs_io_bio_alloc(1);
bio->bi_bdev = page->dev->bdev;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
@@ -1830,9 +1825,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
+ bio = btrfs_io_bio_alloc(1);
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_iter.bi_sector = page_bad->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -1898,37 +1891,31 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage)
{
- struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
struct scrub_bio *sbio;
int ret;
- mutex_lock(&wr_ctx->wr_lock);
+ mutex_lock(&sctx->wr_lock);
again:
- if (!wr_ctx->wr_curr_bio) {
- wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
+ if (!sctx->wr_curr_bio) {
+ sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
GFP_KERNEL);
- if (!wr_ctx->wr_curr_bio) {
- mutex_unlock(&wr_ctx->wr_lock);
+ if (!sctx->wr_curr_bio) {
+ mutex_unlock(&sctx->wr_lock);
return -ENOMEM;
}
- wr_ctx->wr_curr_bio->sctx = sctx;
- wr_ctx->wr_curr_bio->page_count = 0;
+ sctx->wr_curr_bio->sctx = sctx;
+ sctx->wr_curr_bio->page_count = 0;
}
- sbio = wr_ctx->wr_curr_bio;
+ sbio = sctx->wr_curr_bio;
if (sbio->page_count == 0) {
struct bio *bio;
sbio->physical = spage->physical_for_dev_replace;
sbio->logical = spage->logical;
- sbio->dev = wr_ctx->tgtdev;
+ sbio->dev = sctx->wr_tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_KERNEL,
- wr_ctx->pages_per_wr_bio);
- if (!bio) {
- mutex_unlock(&wr_ctx->wr_lock);
- return -ENOMEM;
- }
+ bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
sbio->bio = bio;
}
@@ -1937,7 +1924,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1951,7 +1938,7 @@ again:
if (sbio->page_count < 1) {
bio_put(sbio->bio);
sbio->bio = NULL;
- mutex_unlock(&wr_ctx->wr_lock);
+ mutex_unlock(&sctx->wr_lock);
return -EIO;
}
scrub_wr_submit(sctx);
@@ -1961,23 +1948,22 @@ again:
sbio->pagev[sbio->page_count] = spage;
scrub_page_get(spage);
sbio->page_count++;
- if (sbio->page_count == wr_ctx->pages_per_wr_bio)
+ if (sbio->page_count == sctx->pages_per_wr_bio)
scrub_wr_submit(sctx);
- mutex_unlock(&wr_ctx->wr_lock);
+ mutex_unlock(&sctx->wr_lock);
return 0;
}
static void scrub_wr_submit(struct scrub_ctx *sctx)
{
- struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
struct scrub_bio *sbio;
- if (!wr_ctx->wr_curr_bio)
+ if (!sctx->wr_curr_bio)
return;
- sbio = wr_ctx->wr_curr_bio;
- wr_ctx->wr_curr_bio = NULL;
+ sbio = sctx->wr_curr_bio;
+ sctx->wr_curr_bio = NULL;
WARN_ON(!sbio->bio->bi_bdev);
scrub_pending_bio_inc(sctx);
/* process all writes in a single worker thread. Then the block layer
@@ -1992,7 +1978,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +1993,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
int i;
WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
- if (sbio->err) {
+ if (sbio->status) {
struct btrfs_dev_replace *dev_replace =
&sbio->sctx->fs_info->dev_replace;
@@ -2081,7 +2067,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
page = sblock->pagev[0]->page;
buffer = kmap_atomic(page);
- len = sctx->sectorsize;
+ len = sctx->fs_info->sectorsize;
index = 0;
for (;;) {
u64 l = min_t(u64, len, PAGE_SIZE);
@@ -2146,7 +2132,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
BTRFS_UUID_SIZE))
sblock->header_error = 1;
- len = sctx->nodesize - BTRFS_CSUM_SIZE;
+ len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
index = 0;
@@ -2329,10 +2315,7 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_KERNEL,
- sctx->pages_per_rd_bio);
- if (!bio)
- return -ENOMEM;
+ bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
sbio->bio = bio;
}
@@ -2341,7 +2324,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2360,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
sblock->no_io_error_seen = 0;
bio_put(bio);
@@ -2420,10 +2403,10 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
scrub_block_put(sblock);
if (sctx->is_dev_replace &&
- atomic_read(&sctx->wr_ctx.flush_all_writes)) {
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ atomic_read(&sctx->flush_all_writes)) {
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
}
scrub_pending_bio_dec(sctx);
@@ -2458,10 +2441,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
goto bbio_out;
}
- bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
- if (!bio)
- goto bbio_out;
-
+ bio = btrfs_io_bio_alloc(0);
bio->bi_iter.bi_sector = logical >> 9;
bio->bi_private = sblock;
bio->bi_end_io = scrub_missing_raid56_end_io;
@@ -2588,7 +2568,7 @@ static void scrub_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2581,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
int i;
BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
- if (sbio->err) {
+ if (sbio->status) {
for (i = 0; i < sbio->page_count; i++) {
struct scrub_page *spage = sbio->pagev[i];
@@ -2628,10 +2608,10 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
spin_unlock(&sctx->list_lock);
if (sctx->is_dev_replace &&
- atomic_read(&sctx->wr_ctx.flush_all_writes)) {
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ atomic_read(&sctx->flush_all_writes)) {
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
}
scrub_pending_bio_dec(sctx);
@@ -2726,8 +2706,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
if (!sum)
return 0;
- index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
- num_sectors = sum->len / sctx->sectorsize;
+ index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize;
+ num_sectors = sum->len / sctx->fs_info->sectorsize;
memcpy(csum, sum->sums + index, sctx->csum_size);
if (index == num_sectors - 1) {
list_del(&sum->list);
@@ -2746,19 +2726,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
u32 blocksize;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
- blocksize = sctx->sectorsize;
+ blocksize = sctx->fs_info->sectorsize;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- blocksize = sctx->nodesize;
+ blocksize = sctx->fs_info->nodesize;
spin_lock(&sctx->stat_lock);
sctx->stat.tree_extents_scrubbed++;
sctx->stat.tree_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else {
- blocksize = sctx->sectorsize;
+ blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
}
@@ -2892,11 +2872,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
}
if (flags & BTRFS_EXTENT_FLAG_DATA) {
- blocksize = sctx->sectorsize;
+ blocksize = sctx->fs_info->sectorsize;
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- blocksize = sctx->nodesize;
+ blocksize = sctx->fs_info->nodesize;
} else {
- blocksize = sctx->sectorsize;
+ blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
}
@@ -3004,7 +2984,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
@@ -3037,10 +3017,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
- bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
- if (!bio)
- goto bbio_out;
-
+ bio = btrfs_io_bio_alloc(0);
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
bio->bi_private = sparity;
bio->bi_end_io = scrub_parity_bio_endio;
@@ -3305,9 +3282,9 @@ out:
logic_end - logic_start);
scrub_parity_put(sparity);
scrub_submit(sctx);
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
btrfs_release_path(path);
return ret < 0 ? ret : 0;
@@ -3463,14 +3440,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/
if (atomic_read(&fs_info->scrub_pause_req)) {
/* push queued extents */
- atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
+ atomic_set(&sctx->flush_all_writes, 1);
scrub_submit(sctx);
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
- atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
+ atomic_set(&sctx->flush_all_writes, 0);
scrub_blocked_if_needed(fs_info);
}
@@ -3677,9 +3654,9 @@ skip:
out:
/* push queued extents */
scrub_submit(sctx);
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
blk_finish_plug(&plug);
btrfs_free_path(path);
@@ -3859,7 +3836,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
btrfs_wait_block_group_reservations(cache);
btrfs_wait_nocow_writers(cache);
- ret = btrfs_wait_ordered_roots(fs_info, -1,
+ ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
cache->key.objectid,
cache->key.offset);
if (ret > 0) {
@@ -3916,11 +3893,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* write requests are really completed when bios_in_flight
* changes to 0.
*/
- atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
+ atomic_set(&sctx->flush_all_writes, 1);
scrub_submit(sctx);
- mutex_lock(&sctx->wr_ctx.wr_lock);
+ mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_ctx.wr_lock);
+ mutex_unlock(&sctx->wr_lock);
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
@@ -3934,7 +3911,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
wait_event(sctx->list_wait,
atomic_read(&sctx->workers_pending) == 0);
- atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
+ atomic_set(&sctx->flush_all_writes, 0);
scrub_pause_off(fs_info);
@@ -4337,32 +4314,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
btrfs_put_bbio(bbio);
}
-static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
- struct btrfs_device *dev,
- int is_dev_replace)
-{
- WARN_ON(wr_ctx->wr_curr_bio != NULL);
-
- mutex_init(&wr_ctx->wr_lock);
- wr_ctx->wr_curr_bio = NULL;
- if (!is_dev_replace)
- return 0;
-
- WARN_ON(!dev->bdev);
- wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
- wr_ctx->tgtdev = dev;
- atomic_set(&wr_ctx->flush_all_writes, 0);
- return 0;
-}
-
-static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
-{
- mutex_lock(&wr_ctx->wr_lock);
- kfree(wr_ctx->wr_curr_bio);
- wr_ctx->wr_curr_bio = NULL;
- mutex_unlock(&wr_ctx->wr_lock);
-}
-
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace)
{
@@ -4665,7 +4616,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
struct btrfs_device *dev;
int ret;
- dev = sctx->wr_ctx.tgtdev;
+ dev = sctx->wr_tgtdev;
if (!dev)
return -EIO;
if (!dev->bdev) {
@@ -4673,13 +4624,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
"scrub write_page_nocow(bdev == NULL) is unexpected");
return -EIO;
}
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- return -ENOMEM;
- }
+ bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fc496a6f842a..e937c10b8287 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1069,6 +1069,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
}
}
+ ret = btrfs_is_name_len_valid(eb, path->slots[0],
+ (unsigned long)(di + 1), name_len + data_len);
+ if (!ret) {
+ ret = -EIO;
+ goto out;
+ }
if (name_len + data_len > buf_len) {
buf_len = name_len + data_len;
if (is_vmalloc_addr(buf)) {
@@ -1083,7 +1089,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
buf = tmp;
}
if (!buf) {
- buf = vmalloc(buf_len);
+ buf = kvmalloc(buf_len, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
goto out;
@@ -2769,15 +2775,20 @@ out:
struct recorded_ref {
struct list_head list;
- char *dir_path;
char *name;
struct fs_path *full_path;
u64 dir;
u64 dir_gen;
- int dir_path_len;
int name_len;
};
+static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
+{
+ ref->full_path = path;
+ ref->name = (char *)kbasename(ref->full_path->start);
+ ref->name_len = ref->full_path->end - ref->name;
+}
+
/*
* We need to process new refs before deleted refs, but compare_tree gives us
* everything mixed. So we first record all refs and later process them.
@@ -2794,17 +2805,7 @@ static int __record_ref(struct list_head *head, u64 dir,
ref->dir = dir;
ref->dir_gen = dir_gen;
- ref->full_path = path;
-
- ref->name = (char *)kbasename(ref->full_path->start);
- ref->name_len = ref->full_path->end - ref->name;
- ref->dir_path = ref->full_path->start;
- if (ref->name == ref->full_path->start)
- ref->dir_path_len = 0;
- else
- ref->dir_path_len = ref->full_path->end -
- ref->full_path->start - 1 - ref->name_len;
-
+ set_ref_path(ref, path);
list_add_tail(&ref->list, head);
return 0;
}
@@ -3546,9 +3547,17 @@ static int is_ancestor(struct btrfs_root *root,
struct fs_path *fs_path)
{
u64 ino = ino2;
+ bool free_path = false;
+ int ret = 0;
+
+ if (!fs_path) {
+ fs_path = fs_path_alloc();
+ if (!fs_path)
+ return -ENOMEM;
+ free_path = true;
+ }
while (ino > BTRFS_FIRST_FREE_OBJECTID) {
- int ret;
u64 parent;
u64 parent_gen;
@@ -3557,13 +3566,18 @@ static int is_ancestor(struct btrfs_root *root,
if (ret < 0) {
if (ret == -ENOENT && ino == ino2)
ret = 0;
- return ret;
+ goto out;
+ }
+ if (parent == ino1) {
+ ret = parent_gen == ino1_gen ? 1 : 0;
+ goto out;
}
- if (parent == ino1)
- return parent_gen == ino1_gen ? 1 : 0;
ino = parent;
}
- return 0;
+ out:
+ if (free_path)
+ fs_path_free(fs_path);
+ return ret;
}
static int wait_for_parent_move(struct send_ctx *sctx,
@@ -3686,6 +3700,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
int is_orphan = 0;
u64 last_dir_ino_rm = 0;
bool can_rename = true;
+ bool orphanized_ancestor = false;
btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
@@ -3837,9 +3852,16 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* might contain the pre-orphanization name of
* ow_inode, which is no longer valid.
*/
- fs_path_reset(valid_path);
- ret = get_cur_path(sctx, sctx->cur_ino,
- sctx->cur_inode_gen, valid_path);
+ ret = is_ancestor(sctx->parent_root,
+ ow_inode, ow_gen,
+ sctx->cur_ino, NULL);
+ if (ret > 0) {
+ orphanized_ancestor = true;
+ fs_path_reset(valid_path);
+ ret = get_cur_path(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen,
+ valid_path);
+ }
if (ret < 0)
goto out;
} else {
@@ -3960,6 +3982,43 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
if (ret < 0)
goto out;
if (!ret) {
+ /*
+ * If we orphanized any ancestor before, we need
+ * to recompute the full path for deleted names,
+ * since any such path was computed before we
+ * processed any references and orphanized any
+ * ancestor inode.
+ */
+ if (orphanized_ancestor) {
+ struct fs_path *new_path;
+
+ /*
+ * Our reference's name member points to
+ * its full_path member string, so we
+ * use here a new path.
+ */
+ new_path = fs_path_alloc();
+ if (!new_path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = get_cur_path(sctx, cur->dir,
+ cur->dir_gen,
+ new_path);
+ if (ret < 0) {
+ fs_path_free(new_path);
+ goto out;
+ }
+ ret = fs_path_add(new_path,
+ cur->name,
+ cur->name_len);
+ if (ret < 0) {
+ fs_path_free(new_path);
+ goto out;
+ }
+ fs_path_free(cur->full_path);
+ set_ref_path(cur, new_path);
+ }
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
goto out;
@@ -6397,13 +6456,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
- sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
+ sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
if (!sctx->clone_roots) {
- sctx->clone_roots = vzalloc(alloc_size);
- if (!sctx->clone_roots) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
}
alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4f1cdd5058f1..74e47794e63f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -601,18 +601,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
}
break;
case Opt_alloc_start:
- num = match_strdup(&args[0]);
- if (num) {
- mutex_lock(&info->chunk_mutex);
- info->alloc_start = memparse(num, NULL);
- mutex_unlock(&info->chunk_mutex);
- kfree(num);
- btrfs_info(info, "allocations start at %llu",
- info->alloc_start);
- } else {
- ret = -ENOMEM;
- goto out;
- }
+ btrfs_info(info,
+ "option alloc_start is obsolete, ignored");
break;
case Opt_acl:
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
@@ -1187,7 +1177,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -1232,8 +1222,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",nobarrier");
if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
seq_printf(seq, ",max_inline=%llu", info->max_inline);
- if (info->alloc_start != 0)
- seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -1716,7 +1704,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
unsigned long old_opts = fs_info->mount_opt;
unsigned long old_compress_type = fs_info->compress_type;
u64 old_max_inline = fs_info->max_inline;
- u64 old_alloc_start = fs_info->alloc_start;
int old_thread_pool_size = fs_info->thread_pool_size;
unsigned int old_metadata_ratio = fs_info->metadata_ratio;
int ret;
@@ -1855,9 +1842,6 @@ restore:
fs_info->mount_opt = old_opts;
fs_info->compress_type = old_compress_type;
fs_info->max_inline = old_max_inline;
- mutex_lock(&fs_info->chunk_mutex);
- fs_info->alloc_start = old_alloc_start;
- mutex_unlock(&fs_info->chunk_mutex);
btrfs_resize_thread_pool(fs_info,
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
@@ -1898,18 +1882,15 @@ static inline void btrfs_descending_sort_devices(
static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 *free_bytes)
{
- struct btrfs_root *root = fs_info->tree_root;
struct btrfs_device_info *devices_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
u64 skip_space;
u64 type;
u64 avail_space;
- u64 used_space;
u64 min_stripe_size;
int min_stripes = 1, num_stripes = 1;
int i = 0, nr_devices;
- int ret;
/*
* We aren't under the device list lock, so this is racy-ish, but good
@@ -1927,12 +1908,12 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
}
devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
- GFP_NOFS);
+ GFP_KERNEL);
if (!devices_info)
return -ENOMEM;
/* calc min stripe number for data space allocation */
- type = btrfs_get_alloc_profile(root, 1);
+ type = btrfs_data_alloc_profile(fs_info);
if (type & BTRFS_BLOCK_GROUP_RAID0) {
min_stripes = 2;
num_stripes = nr_devices;
@@ -1949,8 +1930,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
else
min_stripe_size = BTRFS_STRIPE_LEN;
- if (fs_info->alloc_start)
- mutex_lock(&fs_devices->device_list_mutex);
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (!device->in_fs_metadata || !device->bdev ||
@@ -1973,34 +1952,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
*/
skip_space = SZ_1M;
- /* user can set the offset in fs_info->alloc_start. */
- if (fs_info->alloc_start &&
- fs_info->alloc_start + BTRFS_STRIPE_LEN <=
- device->total_bytes) {
- rcu_read_unlock();
- skip_space = max(fs_info->alloc_start, skip_space);
-
- /*
- * btrfs can not use the free space in
- * [0, skip_space - 1], we must subtract it from the
- * total. In order to implement it, we account the used
- * space in this range first.
- */
- ret = btrfs_account_dev_extents_size(device, 0,
- skip_space - 1,
- &used_space);
- if (ret) {
- kfree(devices_info);
- mutex_unlock(&fs_devices->device_list_mutex);
- return ret;
- }
-
- rcu_read_lock();
-
- /* calc the free space in [0, skip_space - 1] */
- skip_space -= used_space;
- }
-
/*
* we can use the free space in [0, skip_space - 1], subtract
* it from the total.
@@ -2019,8 +1970,6 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
i++;
}
rcu_read_unlock();
- if (fs_info->alloc_start)
- mutex_unlock(&fs_devices->device_list_mutex);
nr_devices = i;
@@ -2057,10 +2006,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
* multiplier to scale the sizes.
*
* Unused device space usage is based on simulating the chunk allocator
- * algorithm that respects the device sizes, order of allocations and the
- * 'alloc_start' value, this is a close approximation of the actual use but
- * there are other factors that may change the result (like a new metadata
- * chunk).
+ * algorithm that respects the device sizes and order of allocations. This is
+ * a close approximation of the actual use but there are other factors that may
+ * change the result (like a new metadata chunk).
*
* If metadata is exhausted, f_bavail will be 0.
*/
@@ -2243,7 +2191,7 @@ static int btrfs_freeze(struct super_block *sb)
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- fs_info->fs_frozen = 1;
+ set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
/*
* We don't need a barrier here, we'll wait for any transaction that
* could be in progress on other threads (and do delayed iputs that
@@ -2262,7 +2210,9 @@ static int btrfs_freeze(struct super_block *sb)
static int btrfs_unfreeze(struct super_block *sb)
{
- btrfs_sb(sb)->fs_frozen = 0;
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+ clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
return 0;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 1f157fba8940..c2d5f3580b4c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -447,11 +447,52 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
+static ssize_t quota_override_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ int quota_override;
+
+ quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
+ return snprintf(buf, PAGE_SIZE, "%d\n", quota_override);
+}
+
+static ssize_t quota_override_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ unsigned long knob;
+ int err;
+
+ if (!fs_info)
+ return -EPERM;
+
+ if (!capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ err = kstrtoul(buf, 10, &knob);
+ if (err)
+ return err;
+ if (knob > 1)
+ return -EINVAL;
+
+ if (knob)
+ set_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
+ else
+ clear_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
+
+ return len;
+}
+
+BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store);
+
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(label),
BTRFS_ATTR_PTR(nodesize),
BTRFS_ATTR_PTR(sectorsize),
BTRFS_ATTR_PTR(clone_alignment),
+ BTRFS_ATTR_PTR(quota_override),
NULL,
};
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 133753232a94..d06b1c931d05 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -87,7 +87,7 @@ static int test_find_delalloc(u32 sectorsize)
return -ENOMEM;
}
- extent_io_tree_init(&tmp, &inode->i_data);
+ extent_io_tree_init(&tmp, inode);
/*
* First go through and create and mark all of our pages dirty, we pin
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2168654c90a1..f615d59b0489 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -93,7 +93,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
btrfs_put_block_group_trimming(cache);
btrfs_put_block_group(cache);
}
- kmem_cache_free(btrfs_transaction_cachep, transaction);
+ kfree(transaction);
}
}
@@ -228,7 +228,7 @@ loop:
*/
BUG_ON(type == TRANS_JOIN_NOLOCK);
- cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
+ cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
if (!cur_trans)
return -ENOMEM;
@@ -238,11 +238,11 @@ loop:
* someone started a transaction after we unlocked. Make sure
* to redo the checks above
*/
- kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+ kfree(cur_trans);
goto loop;
} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
spin_unlock(&fs_info->trans_lock);
- kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+ kfree(cur_trans);
return -EROFS;
}
@@ -294,7 +294,7 @@ loop:
spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
- fs_info->btree_inode->i_mapping);
+ fs_info->btree_inode);
fs_info->generation++;
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
@@ -1374,9 +1374,6 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
ret = commit_fs_roots(trans, fs_info);
if (ret)
goto out;
- ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
- if (ret < 0)
- goto out;
ret = btrfs_qgroup_account_extents(trans, fs_info);
if (ret < 0)
goto out;
@@ -1926,7 +1923,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
- btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
static inline void
@@ -2180,13 +2177,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
- ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
- if (ret) {
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
-
/*
* Since fs roots are all committed, we can get a quite accurate
* new_roots. So let's do quota accounting.
@@ -2314,7 +2304,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* it'll result in deadlock about SB_FREEZE_FS.
*/
if (current != fs_info->transaction_kthread &&
- current != fs_info->cleaner_kthread && !fs_info->fs_frozen)
+ current != fs_info->cleaner_kthread &&
+ !test_bit(BTRFS_FS_FROZEN, &fs_info->flags))
btrfs_run_delayed_iputs(fs_info);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ccfe9fe7754a..f20ef211a73d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1175,15 +1175,19 @@ next:
return 0;
}
-static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
- u32 *namelen, char **name, u64 *index,
- u64 *parent_objectid)
+static int extref_get_fields(struct extent_buffer *eb, int slot,
+ unsigned long ref_ptr, u32 *namelen, char **name,
+ u64 *index, u64 *parent_objectid)
{
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)ref_ptr;
*namelen = btrfs_inode_extref_name_len(eb, extref);
+ if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name,
+ *namelen))
+ return -EIO;
+
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@@ -1198,14 +1202,19 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
return 0;
}
-static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
- u32 *namelen, char **name, u64 *index)
+static int ref_get_fields(struct extent_buffer *eb, int slot,
+ unsigned long ref_ptr, u32 *namelen, char **name,
+ u64 *index)
{
struct btrfs_inode_ref *ref;
ref = (struct btrfs_inode_ref *)ref_ptr;
*namelen = btrfs_inode_ref_name_len(eb, ref);
+ if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1),
+ *namelen))
+ return -EIO;
+
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@@ -1280,8 +1289,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
while (ref_ptr < ref_end) {
if (log_ref_ver) {
- ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
- &ref_index, &parent_objectid);
+ ret = extref_get_fields(eb, slot, ref_ptr, &namelen,
+ &name, &ref_index, &parent_objectid);
/*
* parent object can change from one array
* item to another.
@@ -1293,8 +1302,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}
} else {
- ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
- &ref_index);
+ ret = ref_get_fields(eb, slot, ref_ptr, &namelen,
+ &name, &ref_index);
}
if (ret)
goto out;
@@ -1841,7 +1850,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(fs_info, eb, di))
+ if (verify_dir_item(fs_info, eb, slot, di))
return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
@@ -2017,7 +2026,7 @@ again:
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(fs_info, eb, di)) {
+ if (verify_dir_item(fs_info, eb, slot, di)) {
ret = -EIO;
goto out;
}
@@ -2102,6 +2111,7 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
const u64 ino)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key search_key;
struct btrfs_path *log_path;
int i;
@@ -2143,6 +2153,12 @@ process_leaf:
u32 this_len = sizeof(*di) + name_len + data_len;
char *name;
+ ret = verify_dir_item(fs_info, path->nodes[0],
+ path->slots[0], di);
+ if (ret) {
+ ret = -EIO;
+ goto out;
+ }
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
ret = -ENOMEM;
@@ -4546,6 +4562,12 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
this_len = sizeof(*extref) + this_name_len;
}
+ ret = btrfs_is_name_len_valid(eb, slot, name_ptr,
+ this_name_len);
+ if (!ret) {
+ ret = -EIO;
+ goto out;
+ }
if (this_name_len > name_len) {
char *new_name;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..5eb7217738ed 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -242,6 +242,17 @@ static struct btrfs_device *__alloc_device(void)
if (!dev)
return ERR_PTR(-ENOMEM);
+ /*
+ * Preallocate a bio that's always going to be used for flushing device
+ * barriers and matches the device lifespan
+ */
+ dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
+ if (!dev->flush_bio) {
+ kfree(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+ bio_get(dev->flush_bio);
+
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
INIT_LIST_HEAD(&dev->resized_list);
@@ -838,6 +849,7 @@ static void __free_device(struct work_struct *work)
device = container_of(work, struct btrfs_device, rcu_work);
rcu_string_free(device->name);
+ bio_put(device->flush_bio);
kfree(device);
}
@@ -1353,15 +1365,13 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
int ret;
int slot;
struct extent_buffer *l;
- u64 min_search_start;
/*
* We don't want to overwrite the superblock on the drive nor any area
* used by the boot loader (grub for example), so we make sure to start
* at an offset of at least 1MB.
*/
- min_search_start = max(fs_info->alloc_start, 1024ull * 1024);
- search_start = max(search_start, min_search_start);
+ search_start = max_t(u64, search_start, SZ_1M);
path = btrfs_alloc_path();
if (!path)
@@ -2387,7 +2397,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
device->io_width = fs_info->sectorsize;
device->io_align = fs_info->sectorsize;
device->sector_size = fs_info->sectorsize;
- device->total_bytes = i_size_read(bdev->bd_inode);
+ device->total_bytes = round_down(i_size_read(bdev->bd_inode),
+ fs_info->sectorsize);
device->disk_total_bytes = device->total_bytes;
device->commit_total_bytes = device->total_bytes;
device->fs_info = fs_info;
@@ -2417,16 +2428,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
fs_info->fs_devices->total_devices++;
fs_info->fs_devices->total_rw_bytes += device->total_bytes;
- spin_lock(&fs_info->free_chunk_lock);
- fs_info->free_chunk_space += device->total_bytes;
- spin_unlock(&fs_info->free_chunk_lock);
+ atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q))
fs_info->fs_devices->rotating = 1;
tmp = btrfs_super_total_bytes(fs_info->super_copy);
btrfs_set_super_total_bytes(fs_info->super_copy,
- tmp + device->total_bytes);
+ round_down(tmp + device->total_bytes, fs_info->sectorsize));
tmp = btrfs_super_num_devices(fs_info->super_copy);
btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
@@ -2574,7 +2583,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
goto error;
}
- name = rcu_string_strdup(device_path, GFP_NOFS);
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
kfree(device);
ret = -ENOMEM;
@@ -2689,6 +2698,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
if (!device->writeable)
return -EACCES;
+ new_size = round_down(new_size, fs_info->sectorsize);
+
mutex_lock(&fs_info->chunk_mutex);
old_total = btrfs_super_total_bytes(super_copy);
diff = new_size - device->total_bytes;
@@ -2701,7 +2712,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
fs_devices = fs_info->fs_devices;
- btrfs_set_super_total_bytes(super_copy, old_total + diff);
+ btrfs_set_super_total_bytes(super_copy,
+ round_down(old_total + diff, fs_info->sectorsize));
device->fs_devices->total_rw_bytes += diff;
btrfs_device_set_total_bytes(device, new_size);
@@ -2874,9 +2886,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
mutex_lock(&fs_info->chunk_mutex);
btrfs_device_set_bytes_used(device,
device->bytes_used - dev_extent_len);
- spin_lock(&fs_info->free_chunk_lock);
- fs_info->free_chunk_space += dev_extent_len;
- spin_unlock(&fs_info->free_chunk_lock);
+ atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
}
@@ -4393,7 +4403,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
struct btrfs_super_block *super_copy = fs_info->super_copy;
u64 old_total = btrfs_super_total_bytes(super_copy);
u64 old_size = btrfs_device_get_total_bytes(device);
- u64 diff = old_size - new_size;
+ u64 diff;
+
+ new_size = round_down(new_size, fs_info->sectorsize);
+ diff = old_size - new_size;
if (device->is_tgtdev_for_dev_replace)
return -EINVAL;
@@ -4409,9 +4422,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
btrfs_device_set_total_bytes(device, new_size);
if (device->writeable) {
device->fs_devices->total_rw_bytes -= diff;
- spin_lock(&fs_info->free_chunk_lock);
- fs_info->free_chunk_space -= diff;
- spin_unlock(&fs_info->free_chunk_lock);
+ atomic64_sub(diff, &fs_info->free_chunk_space);
}
mutex_unlock(&fs_info->chunk_mutex);
@@ -4522,7 +4533,8 @@ again:
&fs_info->fs_devices->resized_devices);
WARN_ON(diff > old_total);
- btrfs_set_super_total_bytes(super_copy, old_total - diff);
+ btrfs_set_super_total_bytes(super_copy,
+ round_down(old_total - diff, fs_info->sectorsize));
mutex_unlock(&fs_info->chunk_mutex);
/* Now btrfs_update_device() will change the on-disk size. */
@@ -4535,9 +4547,7 @@ done:
btrfs_device_set_total_bytes(device, old_size);
if (device->writeable)
device->fs_devices->total_rw_bytes += diff;
- spin_lock(&fs_info->free_chunk_lock);
- fs_info->free_chunk_space += diff;
- spin_unlock(&fs_info->free_chunk_lock);
+ atomic64_add(diff, &fs_info->free_chunk_space);
mutex_unlock(&fs_info->chunk_mutex);
}
return ret;
@@ -4882,9 +4892,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes);
}
- spin_lock(&info->free_chunk_lock);
- info->free_chunk_space -= (stripe_size * map->num_stripes);
- spin_unlock(&info->free_chunk_lock);
+ atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
free_extent_map(em);
check_raid56_incompat_flag(info, type);
@@ -5029,20 +5037,19 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *extent_root = fs_info->extent_root;
u64 chunk_offset;
u64 sys_chunk_offset;
u64 alloc_profile;
int ret;
chunk_offset = find_next_chunk(fs_info);
- alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
+ alloc_profile = btrfs_metadata_alloc_profile(fs_info);
ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
if (ret)
return ret;
sys_chunk_offset = find_next_chunk(fs_info);
- alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
+ alloc_profile = btrfs_system_alloc_profile(fs_info);
ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
return ret;
}
@@ -6042,9 +6049,10 @@ static void btrfs_end_bio(struct bio *bio)
struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
- if (bio->bi_error) {
+ if (bio->bi_status) {
atomic_inc(&bbio->error);
- if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+ if (bio->bi_status == BLK_STS_IOERR ||
+ bio->bi_status == BLK_STS_TARGET) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
@@ -6082,13 +6090,13 @@ static void btrfs_end_bio(struct bio *bio)
* beyond the tolerance of the btrfs bio
*/
if (atomic_read(&bbio->error) > bbio->max_errors) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
} else {
/*
* this bio is actually up to date, we didn't
* go over the max number of errors
*/
- bio->bi_error = 0;
+ bio->bi_status = 0;
}
btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6207,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
btrfs_end_bbio(bbio, bio);
}
}
@@ -6266,10 +6274,9 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
continue;
}
- if (dev_nr < total_devs - 1) {
- bio = btrfs_bio_clone(first_bio, GFP_NOFS);
- BUG_ON(!bio); /* -ENOMEM */
- } else
+ if (dev_nr < total_devs - 1)
+ bio = btrfs_bio_clone(first_bio);
+ else
bio = first_bio;
submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
@@ -6684,10 +6691,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
device->in_fs_metadata = 1;
if (device->writeable && !device->is_tgtdev_for_dev_replace) {
device->fs_devices->total_rw_bytes += device->total_bytes;
- spin_lock(&fs_info->free_chunk_lock);
- fs_info->free_chunk_space += device->total_bytes -
- device->bytes_used;
- spin_unlock(&fs_info->free_chunk_lock);
+ atomic64_add(device->total_bytes - device->bytes_used,
+ &fs_info->free_chunk_space);
}
ret = 0;
return ret;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index c7d0fbc915ca..6f45fd60d15a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -74,6 +74,8 @@ struct btrfs_device {
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
+ int last_flush_error;
+ int flush_bio_sent;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
seqcount_t data_seqcount;
@@ -279,6 +281,11 @@ struct btrfs_io_bio {
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
u8 *csum_allocated;
btrfs_io_bio_end_io_t *end_io;
+ struct bvec_iter iter;
+ /*
+ * This member must come last, bio_alloc_bioset will allocate enough
+ * bytes for entire btrfs_io_bio but relies on bio being last.
+ */
struct bio bio;
};
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b3cbf80c5acf..2c7e53f9ff1b 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -336,7 +336,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
u32 this_len = sizeof(*di) + name_len + data_len;
unsigned long name_ptr = (unsigned long)(di + 1);
- if (verify_dir_item(fs_info, leaf, di)) {
+ if (verify_dir_item(fs_info, leaf, slot, di)) {
ret = -EIO;
goto err;
}
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 135b10823c6d..c248f9286366 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -24,12 +24,13 @@
#include <linux/slab.h>
#include <linux/zlib.h>
#include <linux/zutil.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
+#include <linux/refcount.h>
#include "compression.h"
struct workspace {
@@ -42,7 +43,7 @@ static void zlib_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- vfree(workspace->strm.workspace);
+ kvfree(workspace->strm.workspace);
kfree(workspace->buf);
kfree(workspace);
}
@@ -52,14 +53,14 @@ static struct list_head *zlib_alloc_workspace(void)
struct workspace *workspace;
int workspacesize;
- workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+ workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
if (!workspace)
return ERR_PTR(-ENOMEM);
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
- workspace->strm.workspace = vmalloc(workspacesize);
- workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS);
+ workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
+ workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!workspace->strm.workspace || !workspace->buf)
goto fail;
@@ -211,10 +212,7 @@ out:
return ret;
}
-static int zlib_decompress_bio(struct list_head *ws, struct page **pages_in,
- u64 disk_start,
- struct bio *orig_bio,
- size_t srclen)
+static int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
@@ -222,8 +220,12 @@ static int zlib_decompress_bio(struct list_head *ws, struct page **pages_in,
char *data_in;
size_t total_out = 0;
unsigned long page_in_index = 0;
+ size_t srclen = cb->compressed_len;
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
+ struct page **pages_in = cb->compressed_pages;
+ u64 disk_start = cb->start;
+ struct bio *orig_bio = cb->orig_bio;
data_in = kmap(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..ea0e05ec2916 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -178,7 +178,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh, ", lost sync page write");
- set_buffer_write_io_error(bh);
+ mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
@@ -352,8 +352,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh, ", lost async page write");
- mapping_set_error(page->mapping, -EIO);
- set_buffer_write_io_error(bh);
+ mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
SetPageError(page);
}
@@ -481,8 +480,6 @@ static void __remove_assoc_queue(struct buffer_head *bh)
{
list_del_init(&bh->b_assoc_buffers);
WARN_ON(!bh->b_assoc_map);
- if (buffer_write_io_error(bh))
- set_bit(AS_EIO, &bh->b_assoc_map->flags);
bh->b_assoc_map = NULL;
}
@@ -1181,6 +1178,17 @@ void mark_buffer_dirty(struct buffer_head *bh)
}
EXPORT_SYMBOL(mark_buffer_dirty);
+void mark_buffer_write_io_error(struct buffer_head *bh)
+{
+ set_buffer_write_io_error(bh);
+ /* FIXME: do we need to set this in both places? */
+ if (bh->b_page && bh->b_page->mapping)
+ mapping_set_error(bh->b_page->mapping, -EIO);
+ if (bh->b_assoc_map)
+ mapping_set_error(bh->b_assoc_map, -EIO);
+}
+EXPORT_SYMBOL(mark_buffer_write_io_error);
+
/*
* Decrement a buffer_head's reference count. If all buffers against a page
* have zero reference count, are clean and unlocked, and if the page is clean
@@ -1829,7 +1837,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -1883,7 +1892,8 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -3021,11 +3031,11 @@ EXPORT_SYMBOL(block_write_full_page);
sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
get_block_t *get_block)
{
- struct buffer_head tmp;
struct inode *inode = mapping->host;
- tmp.b_state = 0;
- tmp.b_blocknr = 0;
- tmp.b_size = i_blocksize(inode);
+ struct buffer_head tmp = {
+ .b_size = i_blocksize(inode),
+ };
+
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
@@ -3038,7 +3048,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
@@ -3091,7 +3101,7 @@ void guard_bio_eod(int op, struct bio *bio)
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc)
+ enum rw_hint write_hint, struct writeback_control *wbc)
{
struct bio *bio;
@@ -3120,6 +3130,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
+ bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3153,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
- return submit_bh_wbc(op, op_flags, bh, NULL);
+ return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
@@ -3279,8 +3290,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
bh = head;
do {
- if (buffer_write_io_error(bh) && page->mapping)
- mapping_set_error(page->mapping, -EIO);
if (buffer_busy(bh))
goto failed;
bh = bh->b_this_page;
@@ -3492,6 +3501,130 @@ int bh_submit_read(struct buffer_head *bh)
}
EXPORT_SYMBOL(bh_submit_read);
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
+ *
+ * Returns the offset within the file on success, and -ENOENT otherwise.
+ */
+static loff_t
+page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
+{
+ loff_t offset = page_offset(page);
+ struct buffer_head *bh, *head;
+ bool seek_data = whence == SEEK_DATA;
+
+ if (lastoff < offset)
+ lastoff = offset;
+
+ bh = head = page_buffers(page);
+ do {
+ offset += bh->b_size;
+ if (lastoff >= offset)
+ continue;
+
+ /*
+ * Unwritten extents that have data in the page cache covering
+ * them can be identified by the BH_Unwritten state flag.
+ * Pages with multiple buffers might have a mix of holes, data
+ * and unwritten extents - any buffer with valid data in it
+ * should have BH_Uptodate flag set on it.
+ */
+
+ if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
+ return lastoff;
+
+ lastoff = offset;
+ } while ((bh = bh->b_this_page) != head);
+ return -ENOENT;
+}
+
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ *
+ * Within unwritten extents, the page cache determines which parts are holes
+ * and which are data: unwritten and uptodate buffer heads count as data;
+ * everything else counts as a hole.
+ *
+ * Returns the resulting offset on successs, and -ENOENT otherwise.
+ */
+loff_t
+page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
+ int whence)
+{
+ pgoff_t index = offset >> PAGE_SHIFT;
+ pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
+ loff_t lastoff = offset;
+ struct pagevec pvec;
+
+ if (length <= 0)
+ return -ENOENT;
+
+ pagevec_init(&pvec, 0);
+
+ do {
+ unsigned want, nr_pages, i;
+
+ want = min_t(unsigned, end - index, PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /*
+ * At this point, the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or
+ * even swizzled back from swapper_space to tmpfs file
+ * mapping. However, page->index will not change
+ * because we have a reference on the page.
+ *
+ * If current page offset is beyond where we've ended,
+ * we've found a hole.
+ */
+ if (whence == SEEK_HOLE &&
+ lastoff < page_offset(page))
+ goto check_range;
+
+ /* Searching done if the page index is out of range. */
+ if (page->index >= end)
+ goto not_found;
+
+ lock_page(page);
+ if (likely(page->mapping == inode->i_mapping) &&
+ page_has_buffers(page)) {
+ lastoff = page_seek_hole_data(page, lastoff, whence);
+ if (lastoff >= 0) {
+ unlock_page(page);
+ goto check_range;
+ }
+ }
+ unlock_page(page);
+ lastoff = page_offset(page) + PAGE_SIZE;
+ }
+
+ /* Searching done if fewer pages returned than wanted. */
+ if (nr_pages < want)
+ break;
+
+ index = pvec.pages[i - 1]->index + 1;
+ pagevec_release(&pvec);
+ } while (index < end);
+
+ /* When no page at lastoff and we are not done, we found a hole. */
+ if (whence != SEEK_HOLE)
+ goto not_found;
+
+check_range:
+ if (lastoff < offset + length)
+ goto out;
+not_found:
+ lastoff = -ENOENT;
+out:
+ pagevec_release(&pvec);
+ return lastoff;
+}
+
void __init buffer_init(void)
{
unsigned long nrpages;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9bf90bcc56ac..bb3a02ca9da4 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -18,7 +18,7 @@
#include <linux/fscache-cache.h>
#include <linux/timer.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
#include <linux/cred.h>
#include <linux/workqueue.h>
#include <linux/security.h>
@@ -97,7 +97,7 @@ struct cachefiles_cache {
* backing file read tracking
*/
struct cachefiles_one_read {
- wait_queue_t monitor; /* link into monitored waitqueue */
+ wait_queue_entry_t monitor; /* link into monitored waitqueue */
struct page *back_page; /* backing file page we're waiting for */
struct page *netfs_page; /* netfs page we're going to fill */
struct fscache_retrieval *op; /* retrieval op covering this */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 41df8a27d7eb..3978b324cbca 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -204,7 +204,7 @@ wait_for_old_object:
wait_queue_head_t *wq;
signed long timeout = 60 * HZ;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
bool requeue;
/* if the object we're waiting for is queued for processing,
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index afbdc418966d..18d7aa61ef0f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -21,7 +21,7 @@
* - we use this to detect read completion of backing pages
* - the caller holds the waitqueue lock
*/
-static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
int sync, void *_key)
{
struct cachefiles_one_read *monitor =
@@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
}
/* remove from the waitqueue */
- list_del(&wait->task_list);
+ list_del(&wait->entry);
/* move onto the action list and queue for FS-Cache thread pool */
ASSERT(monitor->op);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 987044bca1c2..59cb307b15fb 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
if (new_mode != old_mode) {
+ newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
ret = __ceph_setattr(inode, &newattrs);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e8f11fa565c5..7df550c13d7f 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
}
return d_obtain_alias(inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index dcce79b84406..4de6cdddf059 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2022,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
attr->ia_size > inode->i_size) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
- inode->i_ctime = attr->ia_ctime;
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
@@ -2044,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
- inode->i_ctime = attr->ia_ctime;
if (only) {
/*
* if kernel wants to dirty ctime but nothing else,
@@ -2067,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = attr->ia_ctime;
}
release &= issued;
@@ -2085,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
+ req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f38e56fa9712..0c05df44cc6c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1687,7 +1687,6 @@ struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
- struct timespec ts;
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1706,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- ktime_get_real_ts(&ts);
- req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+ req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 034f00f21390..afeefe79c25e 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -146,6 +146,15 @@ config CIFS_DEBUG2
option can be turned off unless you are debugging
cifs problems. If unsure, say N.
+config CIFS_DEBUG_DUMP_KEYS
+ bool "Dump encryption keys for offline decryption (Unsafe)"
+ depends on CIFS_DEBUG && CIFS_SMB2
+ help
+ Enabling this will dump the encryption and decryption keys
+ used to communicate on an encrypted share connection on the
+ console. This allows Wireshark to decrypt and dissect
+ encrypted network captures. Enable this carefully.
+
config CIFS_DFS_UPCALL
bool "DFS feature support"
depends on CIFS && KEYS
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index a0b3e7d1be48..e0445e2075b2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -79,6 +79,10 @@ convert_sfu_char(const __u16 src_char, char *target)
static bool
convert_sfm_char(const __u16 src_char, char *target)
{
+ if (src_char >= 0xF001 && src_char <= 0xF01F) {
+ *target = src_char - 0xF000;
+ return true;
+ }
switch (src_char) {
case SFM_COLON:
*target = ':';
@@ -417,6 +421,10 @@ static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
{
__le16 dest_char;
+ if (src_char >= 0x01 && src_char <= 0x1F) {
+ dest_char = cpu_to_le16(src_char + 0xF000);
+ return dest_char;
+ }
switch (src_char) {
case ':':
dest_char = cpu_to_le16(SFM_COLON);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0fd081bd2a2f..bc09df6b473a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2234,14 +2234,16 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
set_page_writeback(page);
retry_write:
rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
- if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
- goto retry_write;
- else if (rc == -EAGAIN)
+ if (rc == -EAGAIN) {
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ goto retry_write;
redirty_page_for_writepage(wbc, page);
- else if (rc != 0)
+ } else if (rc != 0) {
SetPageError(page);
- else
+ mapping_set_error(page->mapping, rc);
+ } else {
SetPageUptodate(page);
+ }
end_page_writeback(page);
put_page(page);
free_xid(xid);
@@ -2810,12 +2812,12 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
ssize_t rc;
+ inode_lock(inode);
/*
* We need to hold the sem to be sure nobody modifies lock list
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
- inode_lock(inode);
rc = generic_write_checks(iocb, from);
if (rc <= 0)
@@ -2828,11 +2830,11 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
else
rc = -EACCES;
out:
+ up_read(&cinode->lock_sem);
inode_unlock(inode);
if (rc > 0)
rc = generic_write_sync(iocb, rc);
- up_read(&cinode->lock_sem);
return rc;
}
@@ -3271,7 +3273,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
- if (to->type & ITER_IOVEC)
+ if (to->type == ITER_IOVEC)
ctx->should_dirty = true;
rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4d1fcd76d022..a8693632235f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -24,6 +24,7 @@
#include <linux/pagemap.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
+#include <linux/wait_bit.h>
#include <asm/div64.h>
#include "cifsfs.h"
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b08531977daa..3b147dc6af63 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
if (!pages) {
pages = vmalloc(max_pages * sizeof(struct page *));
- if (!bv) {
+ if (!pages) {
kvfree(bv);
return -ENOMEM;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360c7ffd..a723df3e0197 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid, __u16 search_flags,
struct cifs_search_info *srch_inf)
{
- return CIFSFindFirst(xid, tcon, path, cifs_sb,
- &fid->netfid, search_flags, srch_inf, true);
+ int rc;
+
+ rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+ &fid->netfid, search_flags, srch_inf, true);
+ if (rc)
+ cifs_dbg(FYI, "find first failed=%d\n", rc);
+ return rc;
}
static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c58691834eb2..ccbb397debbc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
kfree(utf16_path);
if (rc) {
- cifs_dbg(VFS, "open dir failed\n");
+ cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
return rc;
}
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
if (rc) {
- cifs_dbg(VFS, "query directory failed\n");
+ cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
return rc;
@@ -1288,6 +1288,108 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+#ifdef CONFIG_CIFS_ACL
+static struct cifs_ntsd *
+get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
+ const struct cifs_fid *cifsfid, u32 *pacllen)
+{
+ struct cifs_ntsd *pntsd = NULL;
+ unsigned int xid;
+ int rc = -EOPNOTSUPP;
+ struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+
+ if (IS_ERR(tlink))
+ return ERR_CAST(tlink);
+
+ xid = get_xid();
+ cifs_dbg(FYI, "trying to get acl\n");
+
+ rc = SMB2_query_acl(xid, tlink_tcon(tlink), cifsfid->persistent_fid,
+ cifsfid->volatile_fid, (void **)&pntsd, pacllen);
+ free_xid(xid);
+
+ cifs_put_tlink(tlink);
+
+ cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
+ return pntsd;
+
+}
+
+static struct cifs_ntsd *
+get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
+ const char *path, u32 *pacllen)
+{
+ struct cifs_ntsd *pntsd = NULL;
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ unsigned int xid;
+ int rc;
+ struct cifs_tcon *tcon;
+ struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+ struct cifs_fid fid;
+ struct cifs_open_parms oparms;
+ __le16 *utf16_path;
+
+ cifs_dbg(FYI, "get smb3 acl for path %s\n", path);
+ if (IS_ERR(tlink))
+ return ERR_CAST(tlink);
+
+ tcon = tlink_tcon(tlink);
+ xid = get_xid();
+
+ if (backup_cred(cifs_sb))
+ oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+ else
+ oparms.create_options = 0;
+
+ utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+ if (!utf16_path)
+ return ERR_PTR(-ENOMEM);
+
+ oparms.tcon = tcon;
+ oparms.desired_access = READ_CONTROL;
+ oparms.disposition = FILE_OPEN;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+ kfree(utf16_path);
+ if (!rc) {
+ rc = SMB2_query_acl(xid, tlink_tcon(tlink), fid.persistent_fid,
+ fid.volatile_fid, (void **)&pntsd, pacllen);
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ }
+
+ cifs_put_tlink(tlink);
+ free_xid(xid);
+
+ cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
+ return pntsd;
+}
+
+/* Retrieve an ACL from the server */
+static struct cifs_ntsd *
+get_smb2_acl(struct cifs_sb_info *cifs_sb,
+ struct inode *inode, const char *path,
+ u32 *pacllen)
+{
+ struct cifs_ntsd *pntsd = NULL;
+ struct cifsFileInfo *open_file = NULL;
+
+ if (inode)
+ open_file = find_readable_file(CIFS_I(inode), true);
+ if (!open_file)
+ return get_smb2_acl_by_path(cifs_sb, path, pacllen);
+
+ pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen);
+ cifsFileInfo_put(open_file);
+ return pntsd;
+}
+#endif
+
static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len, bool keep_size)
{
@@ -1809,7 +1911,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
sg = init_sg(rqst, sign);
if (!sg) {
- cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+ cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+ rc = -ENOMEM;
goto free_req;
}
@@ -1817,6 +1920,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
iv = kzalloc(iv_len, GFP_KERNEL);
if (!iv) {
cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+ rc = -ENOMEM;
goto free_sg;
}
iv[0] = 3;
@@ -2391,6 +2495,11 @@ struct smb_version_operations smb20_operations = {
.dir_needs_close = smb2_dir_needs_close,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_smb2_acl,
+ .get_acl_by_fid = get_smb2_acl_by_fid,
+/* .set_acl = set_smb3_acl, */
+#endif /* CIFS_ACL */
};
struct smb_version_operations smb21_operations = {
@@ -2475,6 +2584,11 @@ struct smb_version_operations smb21_operations = {
.enum_snapshots = smb3_enum_snapshots,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_smb2_acl,
+ .get_acl_by_fid = get_smb2_acl_by_fid,
+/* .set_acl = set_smb3_acl, */
+#endif /* CIFS_ACL */
};
struct smb_version_operations smb30_operations = {
@@ -2569,6 +2683,11 @@ struct smb_version_operations smb30_operations = {
.receive_transform = smb3_receive_transform,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_smb2_acl,
+ .get_acl_by_fid = get_smb2_acl_by_fid,
+/* .set_acl = set_smb3_acl, */
+#endif /* CIFS_ACL */
};
#ifdef CONFIG_CIFS_SMB311
@@ -2751,7 +2870,7 @@ struct smb_version_values smb302_values = {
struct smb_version_values smb311_values = {
.version_string = SMB311_VERSION_STRING,
.protocol_id = SMB311_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e4afdaae743f..4938e8b6d32f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2081,8 +2081,9 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length,
static int
query_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, u8 info_class,
- size_t output_len, size_t min_len, void *data)
+ u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type,
+ u32 additional_info, size_t output_len, size_t min_len, void **data,
+ u32 *dlen)
{
struct smb2_query_info_req *req;
struct smb2_query_info_rsp *rsp = NULL;
@@ -2108,10 +2109,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->InfoType = SMB2_O_INFO_FILE;
+ req->InfoType = info_type;
req->FileInfoClass = info_class;
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
+ req->AdditionalInformation = cpu_to_le32(additional_info);
/* 4 for rfc1002 length field and 1 for Buffer */
req->InputBufferOffset =
cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
@@ -2130,24 +2132,51 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
goto qinf_exit;
}
+ if (dlen) {
+ *dlen = le32_to_cpu(rsp->OutputBufferLength);
+ if (!*data) {
+ *data = kmalloc(*dlen, GFP_KERNEL);
+ if (!*data) {
+ cifs_dbg(VFS,
+ "Error %d allocating memory for acl\n",
+ rc);
+ *dlen = 0;
+ goto qinf_exit;
+ }
+ }
+ }
+
rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
- &rsp->hdr, min_len, data);
+ &rsp->hdr, min_len, *data);
qinf_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
}
+int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data)
+{
+ return query_info(xid, tcon, persistent_fid, volatile_fid,
+ FILE_ALL_INFORMATION, SMB2_O_INFO_FILE, 0,
+ sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
+ sizeof(struct smb2_file_all_info), (void **)&data,
+ NULL);
+}
+
int
-SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
+SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
- struct smb2_file_all_info *data)
+ void **data, u32 *plen)
{
+ __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO;
+ *plen = 0;
+
return query_info(xid, tcon, persistent_fid, volatile_fid,
- FILE_ALL_INFORMATION,
- sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
- sizeof(struct smb2_file_all_info), data);
+ 0, SMB2_O_INFO_SECURITY, additional_info,
+ SMB2_MAX_BUFFER_SIZE,
+ sizeof(struct smb2_file_all_info), data, plen);
}
int
@@ -2155,9 +2184,10 @@ SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid)
{
return query_info(xid, tcon, persistent_fid, volatile_fid,
- FILE_INTERNAL_INFORMATION,
+ FILE_INTERNAL_INFORMATION, SMB2_O_INFO_FILE, 0,
+ sizeof(struct smb2_file_internal_info),
sizeof(struct smb2_file_internal_info),
- sizeof(struct smb2_file_internal_info), uniqueid);
+ (void **)&uniqueid, NULL);
}
/*
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 6853454fc871..3595cd755147 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -135,6 +135,9 @@ extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct smb2_file_all_info *data);
+extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_file_id, u64 volatile_file_id,
+ void **data, unsigned int *plen);
extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
__le64 *uniqueid);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index c69ec96e92ac..67367cf1f8cd 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -335,9 +335,31 @@ generate_smb3signingkey(struct cifs_ses *ses,
if (rc)
return rc;
- return generate_key(ses, ptriplet->decryption.label,
- ptriplet->decryption.context,
- ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+ rc = generate_key(ses, ptriplet->decryption.label,
+ ptriplet->decryption.context,
+ ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+
+ if (rc)
+ return rc;
+
+#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
+ cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
+ /*
+ * The session id is opaque in terms of endianness, so we can't
+ * print it as a long long. we dump it as we got it on the wire
+ */
+ cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid),
+ &ses->Suid);
+ cifs_dbg(VFS, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response);
+ cifs_dbg(VFS, "Signing Key %*ph\n",
+ SMB3_SIGN_KEY_SIZE, ses->smb3signingkey);
+ cifs_dbg(VFS, "ServerIn Key %*ph\n",
+ SMB3_SIGN_KEY_SIZE, ses->smb3encryptionkey);
+ cifs_dbg(VFS, "ServerOut Key %*ph\n",
+ SMB3_SIGN_KEY_SIZE, ses->smb3decryptionkey);
+#endif
+ return rc;
}
int
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 47a125ece11e..7efbab013957 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -536,11 +536,14 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
list_add_tail(&mid->qhead, &server->pending_mid_q);
spin_unlock(&GlobalMid_Lock);
-
+ /*
+ * Need to store the time in mid before calling I/O. For call_async,
+ * I/O response may come back and free the mid entry on another thread.
+ */
+ cifs_save_when_sent(mid);
cifs_in_send_inc(server);
rc = smb_send_rqst(server, rqst, flags);
cifs_in_send_dec(server);
- cifs_save_when_sent(mid);
if (rc < 0) {
server->sequence_number -= 2;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3cb5c9e2d4e7..de50e749ff05 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
pcreatetime = (__u64 *)value;
*pcreatetime = CIFS_I(inode)->createtime;
return sizeof(__u64);
-
- return rc;
}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9d956cd6d46f..363402fcb3ed 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -34,7 +34,7 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
+ return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0);
}
static ssize_t
@@ -51,7 +51,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
host_file = cfi->cfi_container;
file_start_write(host_file);
inode_lock(coda_inode);
- ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
+ ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6116d5275a3e..2dd4a7af7dd7 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -739,23 +739,22 @@ static int do_i2c_smbus_ioctl(struct file *file,
unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
{
struct i2c_smbus_ioctl_data __user *tdata;
- compat_caddr_t datap;
+ union {
+ /* beginnings of those have identical layouts */
+ struct i2c_smbus_ioctl_data32 data32;
+ struct i2c_smbus_ioctl_data data;
+ } v;
tdata = compat_alloc_user_space(sizeof(*tdata));
if (tdata == NULL)
return -ENOMEM;
- if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata)))
- return -EFAULT;
- if (!access_ok(VERIFY_READ, udata, sizeof(*udata)))
+ memset(&v, 0, sizeof(v));
+ if (copy_from_user(&v.data32, udata, sizeof(v.data32)))
return -EFAULT;
+ v.data.data = compat_ptr(v.data32.data);
- if (__copy_in_user(&tdata->read_write, &udata->read_write, 2 * sizeof(u8)))
- return -EFAULT;
- if (__copy_in_user(&tdata->size, &udata->size, 2 * sizeof(u32)))
- return -EFAULT;
- if (__get_user(datap, &udata->data) ||
- __put_user(compat_ptr(datap), &tdata->data))
+ if (copy_to_user(tdata, &v.data, sizeof(v.data)))
return -EFAULT;
return do_ioctl(file, cmd, (unsigned long)tdata);
@@ -866,8 +865,6 @@ COMPATIBLE_IOCTL(TIOCGDEV)
COMPATIBLE_IOCTL(TIOCCBRK)
COMPATIBLE_IOCTL(TIOCGSID)
COMPATIBLE_IOCTL(TIOCGICOUNT)
-COMPATIBLE_IOCTL(TIOCGPKT)
-COMPATIBLE_IOCTL(TIOCGPTLCK)
COMPATIBLE_IOCTL(TIOCGEXCL)
/* Little t */
COMPATIBLE_IOCTL(TIOCGETD)
@@ -883,16 +880,12 @@ COMPATIBLE_IOCTL(TIOCMGET)
COMPATIBLE_IOCTL(TIOCMBIC)
COMPATIBLE_IOCTL(TIOCMBIS)
COMPATIBLE_IOCTL(TIOCMSET)
-COMPATIBLE_IOCTL(TIOCPKT)
COMPATIBLE_IOCTL(TIOCNOTTY)
COMPATIBLE_IOCTL(TIOCSTI)
COMPATIBLE_IOCTL(TIOCOUTQ)
COMPATIBLE_IOCTL(TIOCSPGRP)
COMPATIBLE_IOCTL(TIOCGPGRP)
-COMPATIBLE_IOCTL(TIOCGPTN)
-COMPATIBLE_IOCTL(TIOCSPTLCK)
COMPATIBLE_IOCTL(TIOCSERGETLSR)
-COMPATIBLE_IOCTL(TIOCSIG)
#ifdef TIOCSRS485
COMPATIBLE_IOCTL(TIOCSRS485)
#endif
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 8b2a994042dd..a66f6624d899 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
}
EXPORT_SYMBOL(config_item_get);
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+ if (item && kref_get_unless_zero(&item->ci_kref))
+ return item;
+ return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
static void config_item_cleanup(struct config_item *item)
{
struct config_item_type *t = item->ci_type;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index a6ab012a2c6a..c8aabba502f6 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
- sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
- config_item_put(item);
kfree(sl);
return -ENOENT;
}
+ sl->sl_target = config_item_get(item);
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 08b46e6e3995..02b7d91c9231 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -7,6 +7,7 @@ config FS_ENCRYPTION
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
+ select CRYPTO_SHA256
select KEYS
help
Enable encryption of files and directories. This
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
err = submit_bio_wait(bio);
- if ((err == 0) && bio->bi_error)
+ if (err == 0 && bio->bi_status)
err = -EIO;
bio_put(bio);
if (err)
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 6d6eca394d4d..c7835df7e7b8 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -26,6 +26,7 @@
#include <linux/ratelimit.h>
#include <linux/dcache.h>
#include <linux/namei.h>
+#include <crypto/aes.h>
#include "fscrypt_private.h"
static unsigned int num_prealloc_crypto_pages = 32;
@@ -147,8 +148,8 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
{
struct {
__le64 index;
- u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
- } xts_tweak;
+ u8 padding[FS_IV_SIZE - sizeof(__le64)];
+ } iv;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
@@ -158,6 +159,16 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
BUG_ON(len == 0);
+ BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
+ BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
+ iv.index = cpu_to_le64(lblk_num);
+ memset(iv.padding, 0, sizeof(iv.padding));
+
+ if (ci->ci_essiv_tfm != NULL) {
+ crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
+ (u8 *)&iv);
+ }
+
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
@@ -170,15 +181,11 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
page_crypt_complete, &ecr);
- BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
- xts_tweak.index = cpu_to_le64(lblk_num);
- memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
-
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, len, offs);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, len, offs);
- skcipher_request_set_crypt(req, &src, &dst, len, &xts_tweak);
+ skcipher_request_set_crypt(req, &src, &dst, len, &iv);
if (rw == FS_DECRYPT)
res = crypto_skcipher_decrypt(req);
else
@@ -477,6 +484,8 @@ static void __exit fscrypt_exit(void)
destroy_workqueue(fscrypt_read_workqueue);
kmem_cache_destroy(fscrypt_ctx_cachep);
kmem_cache_destroy(fscrypt_info_cachep);
+
+ fscrypt_essiv_cleanup();
}
module_exit(fscrypt_exit);
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d1bb02b1ee58..ad9f814fdead 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -453,12 +453,3 @@ errout:
return ret;
}
EXPORT_SYMBOL(fscrypt_setup_filename);
-
-void fscrypt_free_filename(struct fscrypt_name *fname)
-{
- kfree(fname->crypto_buf.name);
- fname->crypto_buf.name = NULL;
- fname->usr_fname = NULL;
- fname->disk_name.name = NULL;
-}
-EXPORT_SYMBOL(fscrypt_free_filename);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 1e1f8a361b75..a1d5021c31ef 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -12,10 +12,13 @@
#define _FSCRYPT_PRIVATE_H
#include <linux/fscrypt_supp.h>
+#include <crypto/hash.h>
/* Encryption parameters */
-#define FS_XTS_TWEAK_SIZE 16
+#define FS_IV_SIZE 16
#define FS_AES_128_ECB_KEY_SIZE 16
+#define FS_AES_128_CBC_KEY_SIZE 16
+#define FS_AES_128_CTS_KEY_SIZE 16
#define FS_AES_256_GCM_KEY_SIZE 32
#define FS_AES_256_CBC_KEY_SIZE 32
#define FS_AES_256_CTS_KEY_SIZE 32
@@ -54,6 +57,7 @@ struct fscrypt_info {
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
+ struct crypto_cipher *ci_essiv_tfm;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
@@ -87,4 +91,7 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
+/* keyinfo.c */
+extern void __exit fscrypt_essiv_cleanup(void);
+
#endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 179e578b875b..018c588c7ac3 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -10,8 +10,13 @@
#include <keys/user-type.h>
#include <linux/scatterlist.h>
+#include <linux/ratelimit.h>
+#include <crypto/aes.h>
+#include <crypto/sha.h>
#include "fscrypt_private.h"
+static struct crypto_shash *essiv_hash_tfm;
+
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
{
struct fscrypt_completion_result *ecr = req->data;
@@ -27,13 +32,13 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
* derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
- * @derived_key: Derived key.
+ * @derived_raw_key: Derived raw key.
*
* Return: Zero on success; non-zero otherwise.
*/
static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
- u8 source_key[FS_AES_256_XTS_KEY_SIZE],
- u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
+ const struct fscrypt_key *source_key,
+ u8 derived_raw_key[FS_MAX_KEY_SIZE])
{
int res = 0;
struct skcipher_request *req = NULL;
@@ -60,10 +65,10 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
if (res < 0)
goto out;
- sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
- sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
- skcipher_request_set_crypt(req, &src_sg, &dst_sg,
- FS_AES_256_XTS_KEY_SIZE, NULL);
+ sg_init_one(&src_sg, source_key->raw, source_key->size);
+ sg_init_one(&dst_sg, derived_raw_key, source_key->size);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
+ NULL);
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
@@ -77,7 +82,7 @@ out:
static int validate_user_key(struct fscrypt_info *crypt_info,
struct fscrypt_context *ctx, u8 *raw_key,
- const char *prefix)
+ const char *prefix, int min_keysize)
{
char *description;
struct key *keyring_key;
@@ -111,50 +116,60 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
master_key = (struct fscrypt_key *)ukp->data;
BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
- if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
+ if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
+ || master_key->size % AES_BLOCK_SIZE != 0) {
printk_once(KERN_WARNING
"%s: key size incorrect: %d\n",
__func__, master_key->size);
res = -ENOKEY;
goto out;
}
- res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
+ res = derive_key_aes(ctx->nonce, master_key, raw_key);
out:
up_read(&keyring_key->sem);
key_put(keyring_key);
return res;
}
+static const struct {
+ const char *cipher_str;
+ int keysize;
+} available_modes[] = {
+ [FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
+ FS_AES_256_XTS_KEY_SIZE },
+ [FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
+ FS_AES_256_CTS_KEY_SIZE },
+ [FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
+ FS_AES_128_CBC_KEY_SIZE },
+ [FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
+ FS_AES_128_CTS_KEY_SIZE },
+};
+
static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
const char **cipher_str_ret, int *keysize_ret)
{
- if (S_ISREG(inode->i_mode)) {
- if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) {
- *cipher_str_ret = "xts(aes)";
- *keysize_ret = FS_AES_256_XTS_KEY_SIZE;
- return 0;
- }
- pr_warn_once("fscrypto: unsupported contents encryption mode "
- "%d for inode %lu\n",
- ci->ci_data_mode, inode->i_ino);
- return -ENOKEY;
+ u32 mode;
+
+ if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
+ pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
+ inode->i_ino,
+ ci->ci_data_mode, ci->ci_filename_mode);
+ return -EINVAL;
}
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
- if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) {
- *cipher_str_ret = "cts(cbc(aes))";
- *keysize_ret = FS_AES_256_CTS_KEY_SIZE;
- return 0;
- }
- pr_warn_once("fscrypto: unsupported filenames encryption mode "
- "%d for inode %lu\n",
- ci->ci_filename_mode, inode->i_ino);
- return -ENOKEY;
+ if (S_ISREG(inode->i_mode)) {
+ mode = ci->ci_data_mode;
+ } else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+ mode = ci->ci_filename_mode;
+ } else {
+ WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+ inode->i_ino, (inode->i_mode & S_IFMT));
+ return -EINVAL;
}
- pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n",
- (inode->i_mode & S_IFMT), inode->i_ino);
- return -ENOKEY;
+ *cipher_str_ret = available_modes[mode].cipher_str;
+ *keysize_ret = available_modes[mode].keysize;
+ return 0;
}
static void put_crypt_info(struct fscrypt_info *ci)
@@ -163,9 +178,76 @@ static void put_crypt_info(struct fscrypt_info *ci)
return;
crypto_free_skcipher(ci->ci_ctfm);
+ crypto_free_cipher(ci->ci_essiv_tfm);
kmem_cache_free(fscrypt_info_cachep, ci);
}
+static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
+{
+ struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
+
+ /* init hash transform on demand */
+ if (unlikely(!tfm)) {
+ struct crypto_shash *prev_tfm;
+
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
+ PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+ prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
+ if (prev_tfm) {
+ crypto_free_shash(tfm);
+ tfm = prev_tfm;
+ }
+ }
+
+ {
+ SHASH_DESC_ON_STACK(desc, tfm);
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ return crypto_shash_digest(desc, key, keysize, salt);
+ }
+}
+
+static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
+ int keysize)
+{
+ int err;
+ struct crypto_cipher *essiv_tfm;
+ u8 salt[SHA256_DIGEST_SIZE];
+
+ essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(essiv_tfm))
+ return PTR_ERR(essiv_tfm);
+
+ ci->ci_essiv_tfm = essiv_tfm;
+
+ err = derive_essiv_salt(raw_key, keysize, salt);
+ if (err)
+ goto out;
+
+ /*
+ * Using SHA256 to derive the salt/key will result in AES-256 being
+ * used for IV generation. File contents encryption will still use the
+ * configured keysize (AES-128) nevertheless.
+ */
+ err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
+ if (err)
+ goto out;
+
+out:
+ memzero_explicit(salt, sizeof(salt));
+ return err;
+}
+
+void __exit fscrypt_essiv_cleanup(void)
+{
+ crypto_free_shash(essiv_hash_tfm);
+}
+
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
@@ -212,6 +294,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
+ crypt_info->ci_essiv_tfm = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
@@ -228,10 +311,12 @@ int fscrypt_get_encryption_info(struct inode *inode)
if (!raw_key)
goto out;
- res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX);
+ res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
+ keysize);
if (res && inode->i_sb->s_cop->key_prefix) {
int res2 = validate_user_key(crypt_info, &ctx, raw_key,
- inode->i_sb->s_cop->key_prefix);
+ inode->i_sb->s_cop->key_prefix,
+ keysize);
if (res2) {
if (res2 == -ENOKEY)
res = -ENOKEY;
@@ -243,18 +328,30 @@ int fscrypt_get_encryption_info(struct inode *inode)
ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- printk(KERN_DEBUG
- "%s: error %d (inode %u) allocating crypto tfm\n",
- __func__, res, (unsigned) inode->i_ino);
+ pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
+ __func__, res, inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ /*
+ * if the provided key is longer than keysize, we use the first
+ * keysize bytes of the derived key only
+ */
res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
if (res)
goto out;
+ if (S_ISREG(inode->i_mode) &&
+ crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
+ res = init_essiv_generator(crypt_info, raw_key, keysize);
+ if (res) {
+ pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
+ __func__, res, inode->i_ino);
+ goto out;
+ }
+ }
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 210976e7a269..ce07a86200f3 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -38,12 +38,8 @@ static int create_encryption_context_from_policy(struct inode *inode,
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
- if (!fscrypt_valid_contents_enc_mode(
- policy->contents_encryption_mode))
- return -EINVAL;
-
- if (!fscrypt_valid_filenames_enc_mode(
- policy->filenames_encryption_mode))
+ if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+ policy->filenames_encryption_mode))
return -EINVAL;
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
@@ -260,6 +256,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+ BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);
if (res)
diff --git a/fs/dax.c b/fs/dax.c
index 2a6889b3585f..306c2b603fb8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -25,7 +25,6 @@
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/pagevec.h>
-#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/uio.h>
@@ -84,7 +83,7 @@ struct exceptional_entry_key {
};
struct wait_exceptional_entry_queue {
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct exceptional_entry_key key;
};
@@ -108,7 +107,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
return wait_table + hash;
}
-static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
int sync, void *keyp)
{
struct exceptional_entry_key *key = keyp;
@@ -784,7 +783,7 @@ static int dax_writeback_one(struct block_device *bdev,
}
dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
- wb_cache_pmem(kaddr, size);
+ dax_flush(dax_dev, pgoff, kaddr, size);
/*
* After we have flushed the cache, we can clear the dirty tag. There
* cannot be new dirty data in the pfn after the flush has completed as
@@ -856,9 +855,12 @@ int dax_writeback_mapping_range(struct address_space *mapping,
ret = dax_writeback_one(bdev, dax_dev, mapping,
indices[i], pvec.pages[i]);
- if (ret < 0)
+ if (ret < 0) {
+ mapping_set_error(mapping, ret);
goto out;
+ }
}
+ start_index = indices[pvec.nr - 1] + 1;
}
out:
put_dax(dax_dev);
@@ -975,7 +977,8 @@ int __dax_zero_page_range(struct block_device *bdev,
dax_read_unlock(id);
return rc;
}
- clear_pmem(kaddr + offset, size);
+ memset(kaddr + offset, 0, size);
+ dax_flush(dax_dev, pgoff, kaddr + offset, size);
dax_read_unlock(id);
}
return 0;
@@ -1054,7 +1057,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
map_len = end - pos;
if (iov_iter_rw(iter) == WRITE)
- map_len = copy_from_iter_pmem(kaddr, map_len, iter);
+ map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
+ map_len, iter);
else
map_len = copy_to_iter(kaddr, map_len, iter);
if (map_len <= 0) {
@@ -1212,7 +1216,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
diff --git a/fs/dcache.c b/fs/dcache.c
index cddf39777835..7ece68d0d4db 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -277,6 +277,33 @@ static inline int dname_external(const struct dentry *dentry)
return dentry->d_name.name != dentry->d_iname;
}
+void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ if (unlikely(dname_external(dentry))) {
+ struct external_name *p = external_name(dentry);
+ atomic_inc(&p->u.count);
+ spin_unlock(&dentry->d_lock);
+ name->name = p->name;
+ } else {
+ memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
+ spin_unlock(&dentry->d_lock);
+ name->name = name->inline_name;
+ }
+}
+EXPORT_SYMBOL(take_dentry_name_snapshot);
+
+void release_dentry_name_snapshot(struct name_snapshot *name)
+{
+ if (unlikely(name->name != name->inline_name)) {
+ struct external_name *p;
+ p = container_of(name->name, struct external_name, name[0]);
+ if (unlikely(atomic_dec_and_test(&p->u.count)))
+ kfree_rcu(p, u.head);
+ }
+}
+EXPORT_SYMBOL(release_dentry_name_snapshot);
+
static inline void __d_set_inode_and_type(struct dentry *dentry,
struct inode *inode,
unsigned type_flags)
@@ -1494,7 +1521,7 @@ static void check_and_drop(void *_data)
{
struct detach_data *data = _data;
- if (!data->mountpoint && !data->select.found)
+ if (!data->mountpoint && list_empty(&data->select.dispose))
__d_drop(data->select.start);
}
@@ -1536,17 +1563,15 @@ void d_invalidate(struct dentry *dentry)
d_walk(dentry, &data, detach_and_collect, check_and_drop);
- if (data.select.found)
+ if (!list_empty(&data.select.dispose))
shrink_dentry_list(&data.select.dispose);
+ else if (!data.mountpoint)
+ return;
if (data.mountpoint) {
detach_mounts(data.mountpoint);
dput(data.mountpoint);
}
-
- if (!data.mountpoint && !data.select.found)
- break;
-
cond_resched();
}
}
@@ -3548,8 +3573,6 @@ __setup("dhash_entries=", set_dhash_entries);
static void __init dcache_init_early(void)
{
- unsigned int loop;
-
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
@@ -3561,24 +3584,19 @@ static void __init dcache_init_early(void)
sizeof(struct hlist_bl_head),
dhash_entries,
13,
- HASH_EARLY,
+ HASH_EARLY | HASH_ZERO,
&d_hash_shift,
&d_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << d_hash_shift); loop++)
- INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
static void __init dcache_init(void)
{
- unsigned int loop;
-
- /*
+ /*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
- * of the dcache.
+ * of the dcache.
*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
@@ -3592,14 +3610,11 @@ static void __init dcache_init(void)
sizeof(struct hlist_bl_head),
dhash_entries,
13,
- 0,
+ HASH_ZERO,
&d_hash_shift,
&d_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << d_hash_shift); loop++)
- INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
/* SLAB cache for __getname() consumers */
@@ -3610,6 +3625,11 @@ EXPORT_SYMBOL(d_genocide);
void __init vfs_caches_init_early(void)
{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
+ INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
+
dcache_init_early();
inode_init_early();
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 354e2ab62031..6dabc4a10396 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/filesystems for more details.
+ * See Documentation/filesystems/ for more details.
*
*/
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e892ae7d89f8..a0e4e2f7e0be 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/kernel-api for more details.
+ * See ./Documentation/core-api/kernel-api.rst for more details.
*
*/
@@ -766,7 +766,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
{
int error;
struct dentry *dentry = NULL, *trap;
- const char *old_name;
+ struct name_snapshot old_name;
trap = lock_rename(new_dir, old_dir);
/* Source or destination directories don't exist? */
@@ -781,19 +781,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
goto exit;
- old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+ take_dentry_name_snapshot(&old_name, old_dentry);
error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
dentry, 0);
if (error) {
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
goto exit;
}
d_move(old_dentry, dentry);
- fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
+ fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name,
d_is_dir(old_dentry),
NULL, old_dentry);
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
unlock_rename(new_dir, old_dir);
dput(dentry);
return old_dentry;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..08cf27811e5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
dio_complete(dio, 0, true);
}
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
- * @error: Error if there was one
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
+ bio->bi_write_hint = dio->iocb->ki_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
/*
* Process one completed BIO. No locks are held.
*/
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
{
struct bio_vec *bvec;
unsigned i;
- int err;
+ blk_status_t err = bio->bi_status;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (err) {
+ if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
- err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
set_page_dirty_lock(page);
put_page(page);
}
- err = bio->bi_error;
bio_put(bio);
}
return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
bio = dio->bio_list;
dio->bio_list = bio->bi_private;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- ret2 = dio_bio_complete(dio, bio);
+ ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
if (ret == 0)
ret = ret2;
}
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
if (iov_iter_rw(iter) == WRITE) {
dio->op = REQ_OP_WRITE;
dio->op_flags = REQ_SYNC | REQ_IDLE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ dio->op_flags |= REQ_NOWAIT;
} else {
dio->op = REQ_OP_READ;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 68b9fffcb2c8..2fb4eadaa118 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
* This is used to atomically remove a wait queue entry from the eventfd wait
* queue head, and read/reset the counter value.
*/
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt)
{
unsigned long flags;
@@ -215,8 +215,8 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
*
* Returns %0 if successful, or the following error codes:
*
- * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
- * -ERESTARTSYS : A signal interrupted the wait operation.
+ * - -EAGAIN : The operation would have blocked but @no_wait was non-zero.
+ * - -ERESTARTSYS : A signal interrupted the wait operation.
*
* If @no_wait is zero, the function might sleep until the eventfd internal
* counter becomes greater than zero.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5420767c9b68..b1c8e23ddf65 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -244,7 +244,7 @@ struct eppoll_entry {
* Wait queue item that will be linked to the target file wait
* queue head.
*/
- wait_queue_t wait;
+ wait_queue_entry_t wait;
/* The wait queue head that linked the "wait" wait queue item */
wait_queue_head_t *whead;
@@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p)
return !list_empty(p);
}
-static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
{
return container_of(p, struct eppoll_entry, wait);
}
/* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
{
return container_of(p, struct eppoll_entry, wait)->base;
}
@@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
* mechanism. It is called by the stored file descriptors when they
* have events to report.
*/
-static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
@@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
- list_del_init(&wait->task_list);
+ list_del_init(&wait->entry);
}
spin_lock_irqsave(&ep->lock, flags);
@@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int res = 0, eavail, timed_out = 0;
unsigned long flags;
u64 slack = 0;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
if (timeout > 0) {
diff --git a/fs/exec.c b/fs/exec.c
index 72934df68471..62175cbcc801 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,7 +220,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- struct rlimit *rlim;
+ unsigned long ptr_size, limit;
+
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
acct_arg_size(bprm, size / PAGE_SIZE);
@@ -232,20 +249,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return page;
/*
- * Limit to 1/4-th the stack size for the argv+env strings.
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
* This ensures that:
* - the remaining binfmt code will not run out of stack space,
* - the program will have a reasonable amount of stack left
* to work from.
*/
- rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+ if (size > limit)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 8eeb694332fe..98233a97b7b8 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -72,7 +72,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
set_page_dirty(page);
if (IS_DIRSYNC(dir))
- err = write_one_page(page, 1);
+ err = write_one_page(page);
else
unlock_page(page);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d9650c9508e4..e2709695b177 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -100,7 +100,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
}
if (IS_DIRSYNC(dir)) {
- err = write_one_page(page, 1);
+ err = write_one_page(page);
if (!err)
err = sync_inode_metadata(dir, 1);
} else {
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 03f5ce1d3dbe..23ebb92484c6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,7 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index b21891a6bfca..d34d32bdc944 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -174,15 +174,12 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
- struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
ret = generic_file_fsync(file, start, end, datasync);
- if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
+ if (ret == -EIO)
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
"detected IO error when writing metadata buffers");
- ret = -EIO;
- }
return ret;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9c2028b50e5c..7b1bc9059863 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -147,9 +147,9 @@ static void ext2_put_super (struct super_block * sb)
ext2_quota_off_umount(sb);
- if (sbi->s_mb_cache) {
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_block_cache) {
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1131,9 +1131,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
#ifdef CONFIG_EXT2_FS_XATTR
- sbi->s_mb_cache = ext2_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ sbi->s_ea_block_cache = ext2_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
goto failed_mount3;
}
#endif
@@ -1182,8 +1182,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
- if (sbi->s_mb_cache)
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ if (sbi->s_ea_block_cache)
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fbdb8f171893..1b9b1268d418 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -121,6 +121,8 @@ const struct xattr_handler *ext2_xattr_handlers[] = {
NULL
};
+#define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache)
+
static inline const struct xattr_handler *
ext2_xattr_handler(int name_index)
{
@@ -150,7 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -195,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -208,7 +210,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -246,7 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -281,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -493,8 +495,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect modified block
*/
- mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
- hash, bh->b_blocknr);
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
+ bh->b_blocknr);
/* keep the buffer locked while modifying it. */
} else {
@@ -627,7 +629,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -655,7 +657,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -676,7 +678,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -721,8 +723,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect freed block
*/
- mb_cache_entry_delete_block(ext2_mb_cache,
- hash, old_bh->b_blocknr);
+ mb_cache_entry_delete(ea_block_cache, hash,
+ old_bh->b_blocknr);
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -795,8 +797,8 @@ ext2_xattr_delete_inode(struct inode *inode)
* This must happen under buffer lock for ext2_xattr_set2() to
* reliably detect freed block
*/
- mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
- hash, bh->b_blocknr);
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
+ bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
@@ -897,21 +899,21 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, ce->e_block);
+ bh = sb_bread(inode->i_sb, ce->e_value);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
"inode %ld: block %ld read error",
- inode->i_ino, (unsigned long) ce->e_block);
+ inode->i_ino, (unsigned long) ce->e_value);
} else {
lock_buffer(bh);
/*
@@ -924,27 +926,27 @@ again:
* entry is still hashed is reliable.
*/
if (hlist_bl_unhashed(&ce->e_hash_list)) {
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
unlock_buffer(bh);
brelse(bh);
goto again;
} else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
- (unsigned long) ce->e_block,
+ (unsigned long) ce->e_value,
le32_to_cpu(HDR(bh)->h_refcount),
EXT2_XATTR_REFCOUNT_MAX);
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_touch(ext2_mb_cache, ce);
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3ec0e46de95f..09441ae07a5b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type)
*/
static int
__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
- struct posix_acl *acl)
+ struct posix_acl *acl, int xattr_flags)
{
int name_index;
void *value = NULL;
@@ -218,7 +218,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
error = ext4_xattr_set_handle(handle, inode, name_index, "",
- value, size, 0);
+ value, size, xattr_flags);
kfree(value);
if (!error)
@@ -231,18 +231,23 @@ int
ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
handle_t *handle;
- int error, retries = 0;
+ int error, credits, retries = 0;
+ size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0;
error = dquot_initialize(inode);
if (error)
return error;
retry:
- handle = ext4_journal_start(inode, EXT4_HT_XATTR,
- ext4_jbd2_credits_xattr(inode));
+ error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */,
+ &credits);
+ if (error)
+ return error;
+
+ handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
- error = __ext4_set_acl(handle, inode, type, acl);
+ error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
ext4_journal_stop(handle);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -267,13 +272,13 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
if (default_acl) {
error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
- default_acl);
+ default_acl, XATTR_CREATE);
posix_acl_release(default_acl);
}
if (acl) {
if (!error)
error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
- acl);
+ acl, XATTR_CREATE);
posix_acl_release(acl);
}
return error;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32191548abed..9ebde0cd632e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
/*
* Mount flags set via mount options or defaults
*/
+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -1444,6 +1445,8 @@ struct ext4_sb_info {
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
unsigned int s_mb_free_pending;
+ struct list_head s_freed_data_list; /* List of blocks to be freed
+ after commit completed */
/* tunables */
unsigned long s_stripe;
@@ -1516,7 +1519,8 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
+ struct mb_cache *s_ea_inode_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
@@ -1797,10 +1801,12 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2098,6 +2104,12 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
}
+static inline bool ext4_is_quota_file(struct inode *inode)
+{
+ return IS_NOQUOTA(inode) &&
+ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
+}
+
/*
* This structure is stuffed into the struct file's private_data field
* for directories. It is where we put information so that we can do
@@ -2126,6 +2138,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
*/
#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
+/* htree levels for ext4 */
+#define EXT4_HTREE_LEVEL_COMPAT 2
+#define EXT4_HTREE_LEVEL 3
+
+static inline int ext4_dir_htree_level(struct super_block *sb)
+{
+ return ext4_has_feature_largedir(sb) ?
+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
+}
+
/*
* Timeout and state flag for lazy initialization inode thread.
*/
@@ -2389,16 +2411,17 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
- uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks);
+ uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks);
-#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
__ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
- 0, 0, 0)
+ i_flags, 0, 0, 0)
#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
type, nblocks) \
__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
- (type), __LINE__, (nblocks))
+ 0, (type), __LINE__, (nblocks))
extern void ext4_free_inode(handle_t *, struct inode *);
@@ -2433,6 +2456,7 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
/* inode.c */
int ext4_inode_is_fast_symlink(struct inode *inode);
@@ -2704,19 +2728,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
extern int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed);
-static inline int ext4_has_group_desc_csum(struct super_block *sb)
-{
- return ext4_has_feature_gdt_csum(sb) ||
- EXT4_SB(sb)->s_chksum_driver != NULL;
-}
-
static inline int ext4_has_metadata_csum(struct super_block *sb)
{
WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
!EXT4_SB(sb)->s_chksum_driver);
- return (EXT4_SB(sb)->s_chksum_driver != NULL);
+ return ext4_has_feature_metadata_csum(sb) &&
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
+
+static inline int ext4_has_group_desc_csum(struct super_block *sb)
+{
+ return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
+}
+
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2756,13 +2781,15 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}
-static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
+static inline loff_t ext4_isize(struct super_block *sb,
+ struct ext4_inode *raw_inode)
{
- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
+ if (ext4_has_feature_largedir(sb) ||
+ S_ISREG(le16_to_cpu(raw_inode->i_mode)))
return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
le32_to_cpu(raw_inode->i_size_lo);
- else
- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
+
+ return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
}
static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f97611171023..dabad1bc8617 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -77,7 +77,14 @@
#define EXT4_RESERVE_TRANS_BLOCKS 12U
-#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
+/*
+ * Number of credits needed if we need to insert an entry into a
+ * directory. For each new index block, we need 4 blocks (old index
+ * block, new index block, bitmap block, bg summary). For normal
+ * htree directories there are 2 levels; if the largedir feature
+ * enabled it's 3 levels.
+ */
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
@@ -104,20 +111,6 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-static inline int ext4_jbd2_credits_xattr(struct inode *inode)
-{
- int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
-
- /*
- * In case of inline data, we may push out the data to a block,
- * so we need to reserve credits for this eventuality
- */
- if (ext4_has_inline_data(inode))
- credits += ext4_writepage_trans_blocks(inode) + 1;
- return credits;
-}
-
-
/*
* Ext4 handle operation types -- for logging purposes
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3e36508610b7..e0a8425ff74d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2488,7 +2488,8 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
static inline int get_default_free_blocks_flags(struct inode *inode)
{
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
else if (ext4_should_journal_data(inode))
return EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 02ce7e7bbdf5..58294c9a7e1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock_shared(inode);
+ if (!inode_trylock_shared(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock_shared(inode);
+ }
/*
* Recheck under inode lock - at this point we are sure it cannot
* change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ if (ext4_should_dioread_nolock(inode))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
@@ -345,13 +364,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- if (ext4_encrypted_inode(inode)) {
- int err = fscrypt_get_encryption_info(inode);
- if (err)
- return 0;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -435,6 +447,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret < 0)
return ret;
}
+
+ /* Set the flags to support nowait AIO */
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b19436098837..7ec340898598 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -480,6 +480,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start_fsb;
ext4_fsblk_t end_fsb;
+ ext4_fsblk_t bofs;
ext4_fsblk_t eofs;
ext4_group_t start_ag;
ext4_group_t end_ag;
@@ -487,9 +488,12 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
ext4_grpblk_t last_cluster;
int error = 0;
+ bofs = le32_to_cpu(sbi->s_es->s_first_data_block);
eofs = ext4_blocks_count(sbi->s_es);
if (keys[0].fmr_physical >= eofs)
return 0;
+ else if (keys[0].fmr_physical < bofs)
+ keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
start_fsb = keys[0].fmr_physical;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 9d549608fd30..aae2c3971cef 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -124,7 +124,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ ret = file_write_and_wait_range(file, start, end);
if (ret)
return ret;
/*
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 98ac2f1f23b3..507bfb3344d4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
- ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
@@ -743,8 +742,9 @@ out:
*/
struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
- __u32 goal, uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks)
+ __u32 goal, uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -766,30 +766,69 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ sb = dir->i_sb;
+ sbi = EXT4_SB(sb);
+
+ if (unlikely(ext4_forced_shutdown(sbi)))
return ERR_PTR(-EIO);
- if ((ext4_encrypted_inode(dir) ||
- DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
- (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+ if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) &&
+ !(i_flags & EXT4_EA_INODE_FL)) {
err = fscrypt_get_encryption_info(dir);
if (err)
return ERR_PTR(err);
if (!fscrypt_has_encryption_key(dir))
return ERR_PTR(-ENOKEY);
- if (!handle)
- nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
- sb = dir->i_sb;
+ if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+ struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
+
+ if (p) {
+ int acl_size = p->a_count * sizeof(ext4_acl_entry);
+
+ nblocks += (S_ISDIR(mode) ? 2 : 1) *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, acl_size,
+ true /* is_create */);
+ posix_acl_release(p);
+ }
+#endif
+
+#ifdef CONFIG_SECURITY
+ {
+ int num_security_xattrs = 1;
+
+#ifdef CONFIG_INTEGRITY
+ num_security_xattrs++;
+#endif
+ /*
+ * We assume that security xattrs are never
+ * more than 1k. In practice they are under
+ * 128 bytes.
+ */
+ nblocks += num_security_xattrs *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, 1024,
+ true /* is_create */);
+ }
+#endif
+ if (encrypt)
+ nblocks += __ext4_xattr_set_credits(sb,
+ NULL /* inode */, NULL /* block_bh */,
+ FSCRYPT_SET_CONTEXT_MAX_SIZE,
+ true /* is_create */);
+ }
+
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);
- sbi = EXT4_SB(sb);
/*
* Initialize owners and quota early so that we don't have to account
@@ -1053,6 +1092,7 @@ got:
/* Don't inherit extent flag from directory, amongst others. */
ei->i_flags =
ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+ ei->i_flags |= i_flags;
ei->i_file_acl = 0;
ei->i_dtime = 0;
ei->i_block_group = group;
@@ -1109,13 +1149,15 @@ got:
goto fail_free_drop;
}
- err = ext4_init_acl(handle, inode, dir);
- if (err)
- goto fail_free_drop;
+ if (!(ei->i_flags & EXT4_EA_INODE_FL)) {
+ err = ext4_init_acl(handle, inode, dir);
+ if (err)
+ goto fail_free_drop;
- err = ext4_init_security(handle, inode, dir, qstr);
- if (err)
- goto fail_free_drop;
+ err = ext4_init_security(handle, inode, dir, qstr);
+ if (err)
+ goto fail_free_drop;
+ }
if (ext4_has_feature_extents(sb)) {
/* set extent flag only for directory, file and normal symlink*/
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bc15c2c17633..7ffa290cbb8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -829,7 +829,8 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
int flags = EXT4_FREE_BLOCKS_VALIDATED;
int err;
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8d141c0c8ff9..28c5c3abddb3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_block && entry->e_value_size) {
+ if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cf82d03968c..3c600f02673f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -144,16 +144,12 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
/*
* Test whether an inode is a fast symlink.
+ * A fast symlink has its symlink data stored in ext4_inode_info->i_data.
*/
int ext4_inode_is_fast_symlink(struct inode *inode)
{
- int ea_blocks = EXT4_I(inode)->i_file_acl ?
- EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
-
- if (ext4_has_inline_data(inode))
- return 0;
-
- return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+ return S_ISLNK(inode->i_mode) && inode->i_size &&
+ (inode->i_size < EXT4_N_BLOCKS * 4);
}
/*
@@ -189,6 +185,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
+ int extra_credits = 3;
+ struct ext4_xattr_inode_array *ea_inode_array = NULL;
trace_ext4_evict_inode(inode);
@@ -213,7 +211,8 @@ void ext4_evict_inode(struct inode *inode)
*/
if (inode->i_ino != EXT4_JOURNAL_INO &&
ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_data.nrpages) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -238,8 +237,12 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
+
+ if (!IS_NOQUOTA(inode))
+ extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
+
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
- ext4_blocks_for_truncate(inode)+3);
+ ext4_blocks_for_truncate(inode)+extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -254,6 +257,16 @@ void ext4_evict_inode(struct inode *inode)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+
+ /*
+ * Set inode->i_size to 0 before calling ext4_truncate(). We need
+ * special handling of symlinks here because i_size is used to
+ * determine whether ext4_inode_info->i_data contains symlink data or
+ * block mappings. Setting i_size to 0 will remove its fast symlink
+ * status. Erase i_data so that it becomes a valid empty block map.
+ */
+ if (ext4_inode_is_fast_symlink(inode))
+ memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -271,25 +284,17 @@ void ext4_evict_inode(struct inode *inode)
}
}
- /*
- * ext4_ext_truncate() doesn't reserve any slop when it
- * restarts journal transactions; therefore there may not be
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
- if (!ext4_handle_has_enough_credits(handle, 3)) {
- err = ext4_journal_extend(handle, 3);
- if (err > 0)
- err = ext4_journal_restart(handle, 3);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- stop_handle:
- ext4_journal_stop(handle);
- ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
+ /* Remove xattr references. */
+ err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
+ extra_credits);
+ if (err) {
+ ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
+stop_handle:
+ ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ goto no_delete;
}
/*
@@ -317,6 +322,7 @@ void ext4_evict_inode(struct inode *inode)
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -710,7 +716,7 @@ out_sem:
if (map->m_flags & EXT4_MAP_NEW &&
!(map->m_flags & EXT4_MAP_UNWRITTEN) &&
!(flags & EXT4_GET_BLOCKS_ZERO) &&
- !IS_NOQUOTA(inode) &&
+ !ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode);
@@ -4712,7 +4718,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (ext4_has_feature_64bit(sb))
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
- inode->i_size = ext4_isize(raw_inode);
+ inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
@@ -4846,6 +4852,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
brelse(iloc.bh);
ext4_set_inode_flags(inode);
+
+ if (ei->i_flags & EXT4_EA_INODE_FL) {
+ ext4_xattr_inode_set_class(inode);
+
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ }
+
unlock_new_inode(inode);
return inode;
@@ -5037,7 +5052,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- if (ei->i_disksize != ext4_isize(raw_inode)) {
+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
ext4_isize_set(raw_inode, ei->i_disksize);
need_datasync = 1;
}
@@ -5287,7 +5302,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
+
+ /* dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
error = dquot_transfer(inode, attr);
+ up_read(&EXT4_I(inode)->xattr_sem);
+
if (error) {
ext4_journal_stop(handle);
return error;
@@ -5307,6 +5329,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
+ if (ext4_encrypted_inode(inode)) {
+ error = fscrypt_get_encryption_info(inode);
+ if (error)
+ return error;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0c21e22acd74..42b3a73143cf 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -218,7 +218,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
unsigned int jflag;
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto flags_out;
oldflags = ei->i_flags;
@@ -342,7 +342,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
err = -EPERM;
inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto out_unlock;
err = ext4_get_inode_loc(inode, &iloc);
@@ -373,7 +373,13 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
if (!IS_ERR(transfer_to[PRJQUOTA])) {
+
+ /* __dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
err = __dquot_transfer(inode, transfer_to);
+ up_read(&EXT4_I(inode)->xattr_sem);
dqput(transfer_to[PRJQUOTA]);
if (err)
goto out_dirty;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b7928cddd539..581e357e8406 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -367,8 +367,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce, int rc);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
@@ -2639,6 +2637,7 @@ int ext4_mb_init(struct super_block *sb)
spin_lock_init(&sbi->s_md_lock);
spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_free_pending = 0;
+ INIT_LIST_HEAD(&sbi->s_freed_data_list);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -2782,7 +2781,8 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ struct bio **biop)
{
ext4_fsblk_t discard_block;
@@ -2791,18 +2791,18 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ if (biop) {
+ return __blkdev_issue_discard(sb->s_bdev,
+ (sector_t)discard_block << (sb->s_blocksize_bits - 9),
+ (sector_t)count << (sb->s_blocksize_bits - 9),
+ GFP_NOFS, 0, biop);
+ } else
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
-/*
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce,
- int rc)
+static void ext4_free_data_in_buddy(struct super_block *sb,
+ struct ext4_free_data *entry)
{
- struct ext4_free_data *entry = (struct ext4_free_data *)jce;
struct ext4_buddy e4b;
struct ext4_group_info *db;
int err, count = 0, count2 = 0;
@@ -2810,18 +2810,6 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->efd_count, entry->efd_group, entry);
- if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count);
- if (err && err != -EOPNOTSUPP)
- ext4_msg(sb, KERN_WARNING, "discard request in"
- " group:%d block:%d count:%d failed"
- " with %d", entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count, err);
- }
-
err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);
@@ -2862,6 +2850,56 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
}
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_free_data *entry, *tmp;
+ struct bio *discard_bio = NULL;
+ struct list_head freed_data_list;
+ struct list_head *cut_pos = NULL;
+ int err;
+
+ INIT_LIST_HEAD(&freed_data_list);
+
+ spin_lock(&sbi->s_md_lock);
+ list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
+ if (entry->efd_tid != commit_tid)
+ break;
+ cut_pos = &entry->efd_list;
+ }
+ if (cut_pos)
+ list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
+ cut_pos);
+ spin_unlock(&sbi->s_md_lock);
+
+ if (test_opt(sb, DISCARD)) {
+ list_for_each_entry(entry, &freed_data_list, efd_list) {
+ err = ext4_issue_discard(sb, entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count,
+ &discard_bio);
+ if (err && err != -EOPNOTSUPP) {
+ ext4_msg(sb, KERN_WARNING, "discard request in"
+ " group:%d block:%d count:%d failed"
+ " with %d", entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count, err);
+ } else if (err == -EOPNOTSUPP)
+ break;
+ }
+
+ if (discard_bio)
+ submit_bio_wait(discard_bio);
+ }
+
+ list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+ ext4_free_data_in_buddy(sb, entry);
+}
+
int __init ext4_init_mballoc(void)
{
ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -3529,7 +3567,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_set_bits(bitmap, start, len);
preallocated += len;
}
- mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
+ mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
}
static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -4464,7 +4502,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
trace_ext4_request_blocks(ar);
/* Allow to use superuser reservation for quota file */
- if (IS_NOQUOTA(ar->inode))
+ if (ext4_is_quota_file(ar->inode))
ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
@@ -4583,14 +4621,28 @@ out:
* are contiguous, AND the extents were freed by the same transaction,
* AND the blocks are associated with the same group.
*/
-static int can_merge(struct ext4_free_data *entry1,
- struct ext4_free_data *entry2)
+static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
+ struct ext4_free_data *entry,
+ struct ext4_free_data *new_entry,
+ struct rb_root *entry_rb_root)
{
- if ((entry1->efd_tid == entry2->efd_tid) &&
- (entry1->efd_group == entry2->efd_group) &&
- ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
- return 1;
- return 0;
+ if ((entry->efd_tid != new_entry->efd_tid) ||
+ (entry->efd_group != new_entry->efd_group))
+ return;
+ if (entry->efd_start_cluster + entry->efd_count ==
+ new_entry->efd_start_cluster) {
+ new_entry->efd_start_cluster = entry->efd_start_cluster;
+ new_entry->efd_count += entry->efd_count;
+ } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
+ entry->efd_start_cluster) {
+ new_entry->efd_count += entry->efd_count;
+ } else
+ return;
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->efd_list);
+ spin_unlock(&sbi->s_md_lock);
+ rb_erase(&entry->efd_node, entry_rb_root);
+ kmem_cache_free(ext4_free_data_cachep, entry);
}
static noinline_for_stack int
@@ -4646,29 +4698,19 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
node = rb_prev(new_node);
if (node) {
entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(entry, new_entry) &&
- ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
- new_entry->efd_start_cluster = entry->efd_start_cluster;
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
+ ext4_try_merge_freed_extent(sbi, entry, new_entry,
+ &(db->bb_free_root));
}
node = rb_next(new_node);
if (node) {
entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(new_entry, entry) &&
- ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
+ ext4_try_merge_freed_extent(sbi, entry, new_entry,
+ &(db->bb_free_root));
}
- /* Add the extent to transaction's private list */
- new_entry->efd_jce.jce_func = ext4_free_data_callback;
+
spin_lock(&sbi->s_md_lock);
- _ext4_journal_callback_add(handle, &new_entry->efd_jce);
+ list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
sbi->s_mb_free_pending += clusters;
spin_unlock(&sbi->s_md_lock);
return 0;
@@ -4871,7 +4913,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count);
+ err = ext4_issue_discard(sb, block_group, bit, count,
+ NULL);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5094,7 +5137,7 @@ __acquires(bitlock)
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
- ret = ext4_issue_discard(sb, group, start, count);
+ ret = ext4_issue_discard(sb, group, start, count, NULL);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 2bed62084a8c..009300ee1561 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -78,10 +78,8 @@ do { \
struct ext4_free_data {
- /* MUST be the first member */
- struct ext4_journal_cb_entry efd_jce;
-
- /* ext4_free_data private data starts from here */
+ /* this links the free block information from sb_info */
+ struct list_head efd_list;
/* this links the free block information from group_info */
struct rb_node efd_node;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 364ea4d4a943..cf5181b62df1 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
owner[0] = i_uid_read(inode);
owner[1] = i_gid_read(inode);
tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root),
- S_IFREG, NULL, goal, owner);
+ S_IFREG, NULL, goal, owner, 0);
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c992ef2c2f94..9bb36909ec92 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -484,7 +484,7 @@ mext_check_arguments(struct inode *orig_inode,
return -EBUSY;
}
- if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) {
+ if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
"not be quota files [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 404256caf9cf..13f0cadb1238 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -513,7 +513,7 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
{
- return le32_to_cpu(entry->block) & 0x00ffffff;
+ return le32_to_cpu(entry->block) & 0x0fffffff;
}
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
@@ -739,6 +739,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
frame->bh = ext4_read_dirblock(dir, 0, INDEX);
if (IS_ERR(frame->bh))
return (struct dx_frame *) frame->bh;
@@ -768,9 +769,15 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
}
indirect = root->info.indirect_levels;
- if (indirect > 1) {
- ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
- root->info.indirect_levels);
+ if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
+ ext4_warning(dir->i_sb,
+ "Directory (ino: %lu) htree depth %#06x exceed"
+ "supported value", dir->i_ino,
+ ext4_dir_htree_level(dir->i_sb));
+ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
+ ext4_warning(dir->i_sb, "Enable large directory "
+ "feature to access it");
+ }
goto fail;
}
@@ -859,12 +866,19 @@ fail:
static void dx_release(struct dx_frame *frames)
{
+ struct dx_root_info *info;
+ int i;
+
if (frames[0].bh == NULL)
return;
- if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
- brelse(frames[1].bh);
- brelse(frames[0].bh);
+ info = &((struct dx_root *)frames[0].bh->b_data)->info;
+ for (i = 0; i <= info->indirect_levels; i++) {
+ if (frames[i].bh == NULL)
+ break;
+ brelse(frames[i].bh);
+ frames[i].bh = NULL;
+ }
}
/*
@@ -1050,7 +1064,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
{
struct dx_hash_info hinfo;
struct ext4_dir_entry_2 *de;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct inode *dir;
ext4_lblk_t block;
int count = 0;
@@ -1428,11 +1442,11 @@ restart:
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- /* read error, skip block & hope for the best */
EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
(unsigned long) block);
brelse(bh);
- goto next;
+ ret = ERR_PTR(-EIO);
+ goto cleanup_and_exit;
}
if (!buffer_verified(bh) &&
!is_dx_internal_node(dir, block,
@@ -1442,7 +1456,8 @@ restart:
EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block);
brelse(bh);
- goto next;
+ ret = ERR_PTR(-EFSBADCRC);
+ goto cleanup_and_exit;
}
set_buffer_verified(bh);
i = search_dirblock(bh, dir, &fname,
@@ -1485,7 +1500,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_dir_entry_2 **res_dir)
{
struct super_block * sb = dir->i_sb;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
@@ -1889,7 +1904,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
*/
dir->i_mtime = dir->i_ctime = current_time(dir);
ext4_update_dx_flag(dir);
- dir->i_version++;
+ inode_inc_iversion(dir);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_dirent_node(handle, dir, bh);
@@ -1908,7 +1923,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
{
struct buffer_head *bh2;
struct dx_root *root;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
struct ext4_dir_entry_2 *de, *de2;
struct ext4_dir_entry_tail *t;
@@ -2127,13 +2142,16 @@ out:
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode)
{
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
+ int restart;
int err;
+again:
+ restart = 0;
frame = dx_probe(fname, dir, NULL, frames);
if (IS_ERR(frame))
return PTR_ERR(frame);
@@ -2155,24 +2173,44 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
if (err != -ENOSPC)
goto cleanup;
+ err = 0;
/* Block full, should compress but for now just split */
dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
dx_get_count(entries), dx_get_limit(entries)));
/* Need to split index? */
if (dx_get_count(entries) == dx_get_limit(entries)) {
ext4_lblk_t newblock;
- unsigned icount = dx_get_count(entries);
- int levels = frame - frames;
+ int levels = frame - frames + 1;
+ unsigned int icount;
+ int add_level = 1;
struct dx_entry *entries2;
struct dx_node *node2;
struct buffer_head *bh2;
- if (levels && (dx_get_count(frames->entries) ==
- dx_get_limit(frames->entries))) {
- ext4_warning_inode(dir, "Directory index full!");
+ while (frame > frames) {
+ if (dx_get_count((frame - 1)->entries) <
+ dx_get_limit((frame - 1)->entries)) {
+ add_level = 0;
+ break;
+ }
+ frame--; /* split higher index block */
+ at = frame->at;
+ entries = frame->entries;
+ restart = 1;
+ }
+ if (add_level && levels == ext4_dir_htree_level(sb)) {
+ ext4_warning(sb, "Directory (ino: %lu) index full, "
+ "reach max htree level :%d",
+ dir->i_ino, levels);
+ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
+ ext4_warning(sb, "Large directory feature is "
+ "not enabled on this "
+ "filesystem");
+ }
err = -ENOSPC;
goto cleanup;
}
+ icount = dx_get_count(entries);
bh2 = ext4_append(handle, dir, &newblock);
if (IS_ERR(bh2)) {
err = PTR_ERR(bh2);
@@ -2187,7 +2225,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
err = ext4_journal_get_write_access(handle, frame->bh);
if (err)
goto journal_error;
- if (levels) {
+ if (!add_level) {
unsigned icount1 = icount/2, icount2 = icount - icount1;
unsigned hash2 = dx_get_hash(entries + icount1);
dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
@@ -2195,7 +2233,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
err = ext4_journal_get_write_access(handle,
- frames[0].bh);
+ (frame - 1)->bh);
if (err)
goto journal_error;
@@ -2211,17 +2249,25 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
frame->entries = entries = entries2;
swap(frame->bh, bh2);
}
- dx_insert_block(frames + 0, hash2, newblock);
- dxtrace(dx_show_index("node", frames[1].entries));
+ dx_insert_block((frame - 1), hash2, newblock);
+ dxtrace(dx_show_index("node", frame->entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
err = ext4_handle_dirty_dx_node(handle, dir, bh2);
if (err)
goto journal_error;
brelse (bh2);
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ (frame - 1)->bh);
+ if (err)
+ goto journal_error;
+ if (restart) {
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ frame->bh);
+ goto journal_error;
+ }
} else {
- dxtrace(printk(KERN_DEBUG
- "Creating second level index...\n"));
+ struct dx_root *dxroot;
memcpy((char *) entries2, (char *) entries,
icount * sizeof(struct dx_entry));
dx_set_limit(entries2, dx_node_limit(dir));
@@ -2229,22 +2275,18 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
/* Set up root */
dx_set_count(entries, 1);
dx_set_block(entries + 0, newblock);
- ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-
- /* Add new access path frame */
- frame = frames + 1;
- frame->at = at = at - entries + entries2;
- frame->entries = entries = entries2;
- frame->bh = bh2;
- err = ext4_journal_get_write_access(handle,
- frame->bh);
+ dxroot = (struct dx_root *)frames[0].bh->b_data;
+ dxroot->info.indirect_levels += 1;
+ dxtrace(printk(KERN_DEBUG
+ "Creating %d level index...\n",
+ info->indirect_levels));
+ err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (err)
goto journal_error;
- }
- err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
- if (err) {
- ext4_std_error(inode->i_sb, err);
- goto cleanup;
+ err = ext4_handle_dirty_dx_node(handle, dir, bh2);
+ brelse(bh2);
+ restart = 1;
+ goto journal_error;
}
}
de = do_split(handle, dir, &bh, frame, &fname->hinfo);
@@ -2256,10 +2298,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
goto cleanup;
journal_error:
- ext4_std_error(dir->i_sb, err);
+ ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
cleanup:
brelse(bh);
dx_release(frames);
+ /* @restart is true means htree-path has been changed, we need to
+ * repeat dx_probe() to find out valid htree-path
+ */
+ if (restart && err == 0)
+ goto again;
return err;
}
@@ -2296,7 +2343,7 @@ int ext4_generic_delete_entry(handle_t *handle,
blocksize);
else
de->inode = 0;
- dir->i_version++;
+ inode_inc_iversion(dir);
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len, blocksize);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..c2fce4478cca 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
}
#endif
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
mapping_set_error(page->mapping, -EIO);
}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
- if (bio->bi_error)
+ if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
- bio->bi_error)) {
+ bio->bi_status)) {
ext4_finish_bio(bio);
bio_put(bio);
return;
}
bio->bi_end_io = NULL;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- bio->bi_error, inode->i_ino,
+ bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
- mapping_set_error(inode->i_mapping, bio->bi_error);
+ mapping_set_error(inode->i_mapping,
+ blk_status_to_errno(bio->bi_status));
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
+ io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
@@ -396,6 +398,7 @@ submit_and_retry:
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
+ io->io_bio->bi_write_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
int i;
if (ext4_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9006cb5857b8..0886fe82e9c4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -373,6 +373,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
struct ext4_journal_cb_entry *jce;
BUG_ON(txn->t_state == T_FINISHED);
+
+ ext4_process_freed_data(sb, txn->t_tid);
+
spin_lock(&sbi->s_md_lock);
while (!list_empty(&txn->t_private_list)) {
jce = list_entry(txn->t_private_list.next,
@@ -927,9 +930,13 @@ static void ext4_put_super(struct super_block *sb)
invalidate_bdev(sbi->journal_bdev);
ext4_blkdev_remove(sbi);
}
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_inode_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+ }
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
@@ -1143,7 +1150,16 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
handle_t *handle = fs_data;
- int res, res2, retries = 0;
+ int res, res2, credits, retries = 0;
+
+ /*
+ * Encrypting the root directory is not allowed because e2fsck expects
+ * lost+found to exist and be unencrypted, and encrypting the root
+ * directory would imply encrypting the lost+found directory as well as
+ * the filename "lost+found" itself.
+ */
+ if (inode->i_ino == EXT4_ROOT_INO)
+ return -EPERM;
res = ext4_convert_inline_data(inode);
if (res)
@@ -1178,8 +1194,12 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (res)
return res;
retry:
- handle = ext4_journal_start(inode, EXT4_HT_MISC,
- ext4_jbd2_credits_xattr(inode));
+ res = ext4_xattr_set_credits(inode, len, false /* is_create */,
+ &credits);
+ if (res)
+ return res;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1203,7 +1223,7 @@ retry:
return res;
}
-static int ext4_dummy_context(struct inode *inode)
+static bool ext4_dummy_context(struct inode *inode)
{
return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
}
@@ -1256,16 +1276,17 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
}
static const struct dquot_operations ext4_quota_operations = {
- .get_reserved_space = ext4_get_reserved_space,
- .write_dquot = ext4_write_dquot,
- .acquire_dquot = ext4_acquire_dquot,
- .release_dquot = ext4_release_dquot,
- .mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info,
- .alloc_dquot = dquot_alloc,
- .destroy_dquot = dquot_destroy,
- .get_projid = ext4_get_projid,
- .get_next_id = ext4_get_next_id,
+ .get_reserved_space = ext4_get_reserved_space,
+ .write_dquot = ext4_write_dquot,
+ .acquire_dquot = ext4_acquire_dquot,
+ .release_dquot = ext4_release_dquot,
+ .mark_dirty = ext4_mark_dquot_dirty,
+ .write_info = ext4_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
+ .get_inode_usage = ext4_get_inode_usage,
+ .get_next_id = ext4_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1328,7 +1349,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
};
static const match_table_t tokens = {
@@ -1411,6 +1432,8 @@ static const match_table_t tokens = {
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_nombcache, "nombcache"},
+ {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1618,6 +1641,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_err, 0, 0}
};
@@ -3445,7 +3469,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
/* Load the checksum driver */
- if (ext4_has_feature_metadata_csum(sb)) {
+ if (ext4_has_feature_metadata_csum(sb) ||
+ ext4_has_feature_ea_inode(sb)) {
sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
@@ -3467,7 +3492,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Precompute checksum seed for all metadata */
if (ext4_has_feature_csum_seed(sb))
sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
- else if (ext4_has_metadata_csum(sb))
+ else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
sizeof(es->s_uuid));
@@ -3597,6 +3622,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"The Hurd can't support 64-bit file systems");
goto failed_mount;
}
+
+ /*
+ * ea_inode feature uses l_i_version field which is not
+ * available in HURD_COMPAT mode.
+ */
+ if (ext4_has_feature_ea_inode(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "ea_inode feature is not supported for Hurd");
+ goto failed_mount;
+ }
}
if (IS_EXT2_SB(sb)) {
@@ -4061,10 +4096,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_ea_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_block_cache");
+ goto failed_mount_wq;
+ }
+
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode_cache");
+ goto failed_mount_wq;
+ }
+ }
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4296,9 +4343,13 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_inode_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+ }
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
@@ -4957,6 +5008,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d74dc5f81a04..48c7a7d55ed3 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (!ret || val >= clusters)
+ if (ret || val >= clusters)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5d3c2536641c..cff4f41ced61 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -72,12 +72,14 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
-static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
-static void ext4_xattr_rehash(struct ext4_xattr_header *,
- struct ext4_xattr_entry *);
+static void ext4_xattr_block_cache_insert(struct mb_cache *,
+ struct buffer_head *);
+static struct buffer_head *
+ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
+ struct mb_cache_entry **);
+static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
+ size_t value_count);
+static void ext4_xattr_rehash(struct ext4_xattr_header *);
static const struct xattr_handler * const ext4_xattr_handler_map[] = {
[EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
@@ -104,8 +106,22 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
NULL
};
-#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
- inode->i_sb->s_fs_info)->s_mb_cache)
+#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
+ inode->i_sb->s_fs_info)->s_ea_block_cache)
+
+#define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \
+ inode->i_sb->s_fs_info)->s_ea_inode_cache)
+
+static int
+ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ struct inode *inode);
+
+#ifdef CONFIG_LOCKDEP
+void ext4_xattr_inode_set_class(struct inode *ea_inode)
+{
+ lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+}
+#endif
static __le32 ext4_xattr_block_csum(struct inode *inode,
sector_t block_nr,
@@ -177,9 +193,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
/* Check the values */
while (!IS_LAST_ENTRY(entry)) {
- if (entry->e_value_block != 0)
- return -EFSCORRUPTED;
- if (entry->e_value_size != 0) {
+ if (entry->e_value_size != 0 &&
+ entry->e_value_inum == 0) {
u16 offs = le16_to_cpu(entry->e_value_offs);
u32 size = le32_to_cpu(entry->e_value_size);
void *value;
@@ -269,6 +284,185 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
return cmp ? -ENODATA : 0;
}
+static u32
+ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
+{
+ return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
+}
+
+static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
+{
+ return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+ ((u32)ea_inode->i_version);
+}
+
+static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
+{
+ ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+ ea_inode->i_version = (u32)ref_count;
+}
+
+static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode)
+{
+ return (u32)ea_inode->i_atime.tv_sec;
+}
+
+static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
+{
+ ea_inode->i_atime.tv_sec = hash;
+}
+
+/*
+ * Read the EA value from an inode.
+ */
+static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
+{
+ unsigned long block = 0;
+ struct buffer_head *bh;
+ int blocksize = ea_inode->i_sb->s_blocksize;
+ size_t csize, copied = 0;
+ void *copy_pos = buf;
+
+ while (copied < size) {
+ csize = (size - copied) > blocksize ? blocksize : size - copied;
+ bh = ext4_bread(NULL, ea_inode, block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ if (!bh)
+ return -EFSCORRUPTED;
+
+ memcpy(copy_pos, bh->b_data, csize);
+ brelse(bh);
+
+ copy_pos += csize;
+ block += 1;
+ copied += csize;
+ }
+ return 0;
+}
+
+static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ struct inode **ea_inode)
+{
+ struct inode *inode;
+ int err;
+
+ inode = ext4_iget(parent->i_sb, ea_ino);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ext4_error(parent->i_sb,
+ "error while reading EA inode %lu err=%d", ea_ino,
+ err);
+ return err;
+ }
+
+ if (is_bad_inode(inode)) {
+ ext4_error(parent->i_sb,
+ "error while reading EA inode %lu is_bad_inode",
+ ea_ino);
+ err = -EIO;
+ goto error;
+ }
+
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ ext4_error(parent->i_sb,
+ "EA inode %lu does not have EXT4_EA_INODE_FL flag",
+ ea_ino);
+ err = -EINVAL;
+ goto error;
+ }
+
+ *ea_inode = inode;
+ return 0;
+error:
+ iput(inode);
+ return err;
+}
+
+static int
+ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+ struct ext4_xattr_entry *entry, void *buffer,
+ size_t size)
+{
+ u32 hash;
+
+ /* Verify stored hash matches calculated hash. */
+ hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size);
+ if (hash != ext4_xattr_inode_get_hash(ea_inode))
+ return -EFSCORRUPTED;
+
+ if (entry) {
+ __le32 e_hash, tmp_data;
+
+ /* Verify entry hash. */
+ tmp_data = cpu_to_le32(hash);
+ e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
+ &tmp_data, 1);
+ if (e_hash != entry->e_hash)
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+
+#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
+
+/*
+ * Read xattr value from the EA inode.
+ */
+static int
+ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
+ void *buffer, size_t size)
+{
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
+ struct inode *ea_inode;
+ int err;
+
+ err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
+ &ea_inode);
+ if (err) {
+ ea_inode = NULL;
+ goto out;
+ }
+
+ if (i_size_read(ea_inode) != size) {
+ ext4_warning_inode(ea_inode,
+ "ea_inode file size=%llu entry size=%zu",
+ i_size_read(ea_inode), size);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
+ err = ext4_xattr_inode_read(ea_inode, buffer, size);
+ if (err)
+ goto out;
+
+ err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
+ /*
+ * Compatibility check for old Lustre ea_inode implementation. Old
+ * version does not have hash validation, but it has a backpointer
+ * from ea_inode to the parent inode.
+ */
+ if (err == -EFSCORRUPTED) {
+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
+ ea_inode->i_generation != inode->i_generation) {
+ ext4_warning_inode(ea_inode,
+ "EA inode hash validation failed");
+ goto out;
+ }
+ /* Do not add ea_inode to the cache. */
+ ea_inode_cache = NULL;
+ } else if (err)
+ goto out;
+
+ if (ea_inode_cache)
+ mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
+ ext4_xattr_inode_get_hash(ea_inode),
+ ea_inode->i_ino, true /* reusable */);
+out:
+ iput(ea_inode);
+ return err;
+}
+
static int
ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
@@ -277,7 +471,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -298,7 +492,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, bh);
entry = BFIRST(bh);
error = ext4_xattr_find_entry(&entry, name_index, name, 1);
if (error)
@@ -308,8 +502,15 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
- size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer,
+ size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, bh->b_data +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -350,8 +551,15 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, (void *)IFIRST(header) +
- le16_to_cpu(entry->e_value_offs), size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer,
+ size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, (void *)IFIRST(header) +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -428,7 +636,6 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -450,7 +657,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
@@ -539,15 +746,445 @@ static void ext4_xattr_update_super_block(handle_t *handle,
}
}
+int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
+{
+ struct ext4_iloc iloc = { .bh = NULL };
+ struct buffer_head *bh = NULL;
+ struct ext4_inode *raw_inode;
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_xattr_entry *entry;
+ qsize_t ea_inode_refs = 0;
+ void *end;
+ int ret;
+
+ lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret)
+ goto out;
+ raw_inode = ext4_raw_inode(&iloc);
+ header = IHDR(inode, raw_inode);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+ ret = xattr_check_inode(inode, header, end);
+ if (ret)
+ goto out;
+
+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ea_inode_refs++;
+ }
+
+ if (EXT4_I(inode)->i_file_acl) {
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (ext4_xattr_check_block(inode, bh)) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+
+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ea_inode_refs++;
+ }
+ *usage = ea_inode_refs + 1;
+ ret = 0;
+out:
+ brelse(iloc.bh);
+ brelse(bh);
+ return ret;
+}
+
+static inline size_t round_up_cluster(struct inode *inode, size_t length)
+{
+ struct super_block *sb = inode->i_sb;
+ size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits +
+ inode->i_blkbits);
+ size_t mask = ~(cluster_size - 1);
+
+ return (length + cluster_size - 1) & mask;
+}
+
+static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
+{
+ int err;
+
+ err = dquot_alloc_inode(inode);
+ if (err)
+ return err;
+ err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len));
+ if (err)
+ dquot_free_inode(inode);
+ return err;
+}
+
+static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
+{
+ dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
+ dquot_free_inode(inode);
+}
+
+int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ struct buffer_head *block_bh, size_t value_len,
+ bool is_create)
+{
+ int credits;
+ int blocks;
+
+ /*
+ * 1) Owner inode update
+ * 2) Ref count update on old xattr block
+ * 3) new xattr block
+ * 4) block bitmap update for new xattr block
+ * 5) group descriptor for new xattr block
+ * 6) block bitmap update for old xattr block
+ * 7) group descriptor for old block
+ *
+ * 6 & 7 can happen if we have two racing threads T_a and T_b
+ * which are each trying to set an xattr on inodes I_a and I_b
+ * which were both initially sharing an xattr block.
+ */
+ credits = 7;
+
+ /* Quota updates. */
+ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb);
+
+ /*
+ * In case of inline data, we may push out the data to a block,
+ * so we need to reserve credits for this eventuality
+ */
+ if (inode && ext4_has_inline_data(inode))
+ credits += ext4_writepage_trans_blocks(inode) + 1;
+
+ /* We are done if ea_inode feature is not enabled. */
+ if (!ext4_has_feature_ea_inode(sb))
+ return credits;
+
+ /* New ea_inode, inode map, block bitmap, group descriptor. */
+ credits += 4;
+
+ /* Data blocks. */
+ blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+
+ /* Indirection block or one level of extent tree. */
+ blocks += 1;
+
+ /* Block bitmap and group descriptor updates for each block. */
+ credits += blocks * 2;
+
+ /* Blocks themselves. */
+ credits += blocks;
+
+ if (!is_create) {
+ /* Dereference ea_inode holding old xattr value.
+ * Old ea_inode, inode map, block bitmap, group descriptor.
+ */
+ credits += 4;
+
+ /* Data blocks for old ea_inode. */
+ blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
+
+ /* Indirection block or one level of extent tree for old
+ * ea_inode.
+ */
+ blocks += 1;
+
+ /* Block bitmap and group descriptor updates for each block. */
+ credits += blocks * 2;
+ }
+
+ /* We may need to clone the existing xattr block in which case we need
+ * to increment ref counts for existing ea_inodes referenced by it.
+ */
+ if (block_bh) {
+ struct ext4_xattr_entry *entry = BFIRST(block_bh);
+
+ for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ /* Ref count update on ea_inode. */
+ credits += 1;
+ }
+ return credits;
+}
+
+static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
+ int credits, struct buffer_head *bh,
+ bool dirty, bool block_csum)
+{
+ int error;
+
+ if (!ext4_handle_valid(handle))
+ return 0;
+
+ if (handle->h_buffer_credits >= credits)
+ return 0;
+
+ error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
+ if (!error)
+ return 0;
+ if (error < 0) {
+ ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
+ return error;
+ }
+
+ if (bh && dirty) {
+ if (block_csum)
+ ext4_xattr_block_csum_set(inode, bh);
+ error = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (error) {
+ ext4_warning(inode->i_sb, "Handle metadata (error %d)",
+ error);
+ return error;
+ }
+ }
+
+ error = ext4_journal_restart(handle, credits);
+ if (error) {
+ ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
+ return error;
+ }
+
+ if (bh) {
+ error = ext4_journal_get_write_access(handle, bh);
+ if (error) {
+ ext4_warning(inode->i_sb,
+ "Get write access failed (error %d)",
+ error);
+ return error;
+ }
+ }
+ return 0;
+}
+
+static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ int ref_change)
+{
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+ struct ext4_iloc iloc;
+ s64 ref_count;
+ u32 hash;
+ int ret;
+
+ inode_lock(ea_inode);
+
+ ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
+ if (ret) {
+ iloc.bh = NULL;
+ goto out;
+ }
+
+ ref_count = ext4_xattr_inode_get_ref(ea_inode);
+ ref_count += ref_change;
+ ext4_xattr_inode_set_ref(ea_inode, ref_count);
+
+ if (ref_change > 0) {
+ WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
+ ea_inode->i_ino, ref_count);
+
+ if (ref_count == 1) {
+ WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
+ ea_inode->i_ino, ea_inode->i_nlink);
+
+ set_nlink(ea_inode, 1);
+ ext4_orphan_del(handle, ea_inode);
+
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_create(ea_inode_cache,
+ GFP_NOFS, hash,
+ ea_inode->i_ino,
+ true /* reusable */);
+ }
+ }
+ } else {
+ WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+ ea_inode->i_ino, ref_count);
+
+ if (ref_count == 0) {
+ WARN_ONCE(ea_inode->i_nlink != 1,
+ "EA inode %lu i_nlink=%u",
+ ea_inode->i_ino, ea_inode->i_nlink);
+
+ clear_nlink(ea_inode);
+ ext4_orphan_add(handle, ea_inode);
+
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_delete(ea_inode_cache, hash,
+ ea_inode->i_ino);
+ }
+ }
+ }
+
+ ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
+ iloc.bh = NULL;
+ if (ret)
+ ext4_warning_inode(ea_inode,
+ "ext4_mark_iloc_dirty() failed ret=%d", ret);
+out:
+ brelse(iloc.bh);
+ inode_unlock(ea_inode);
+ return ret;
+}
+
+static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
+{
+ return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
+}
+
+static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
+{
+ return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
+}
+
+static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
+ struct ext4_xattr_entry *first)
+{
+ struct inode *ea_inode;
+ struct ext4_xattr_entry *entry;
+ struct ext4_xattr_entry *failed_entry;
+ unsigned int ea_ino;
+ int err, saved_err;
+
+ for (entry = first; !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err)
+ goto cleanup;
+ err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode, "inc ref error %d", err);
+ iput(ea_inode);
+ goto cleanup;
+ }
+ iput(ea_inode);
+ }
+ return 0;
+
+cleanup:
+ saved_err = err;
+ failed_entry = entry;
+
+ for (entry = first; entry != failed_entry;
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err) {
+ ext4_warning(parent->i_sb,
+ "cleanup ea_ino %u iget error %d", ea_ino,
+ err);
+ continue;
+ }
+ err = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (err)
+ ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
+ err);
+ iput(ea_inode);
+ }
+ return saved_err;
+}
+
+static void
+ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
+ struct buffer_head *bh,
+ struct ext4_xattr_entry *first, bool block_csum,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits, bool skip_quota)
+{
+ struct inode *ea_inode;
+ struct ext4_xattr_entry *entry;
+ bool dirty = false;
+ unsigned int ea_ino;
+ int err;
+ int credits;
+
+ /* One credit for dec ref on ea_inode, one for orphan list addition, */
+ credits = 2 + extra_credits;
+
+ for (entry = first; !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err)
+ continue;
+
+ err = ext4_expand_inode_array(ea_inode_array, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode,
+ "Expand inode array err=%d", err);
+ iput(ea_inode);
+ continue;
+ }
+
+ err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
+ dirty, block_csum);
+ if (err) {
+ ext4_warning_inode(ea_inode, "Ensure credits err=%d",
+ err);
+ continue;
+ }
+
+ err = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d",
+ err);
+ continue;
+ }
+
+ if (!skip_quota)
+ ext4_xattr_inode_free_quota(parent,
+ le32_to_cpu(entry->e_value_size));
+
+ /*
+ * Forget about ea_inode within the same transaction that
+ * decrements the ref count. This avoids duplicate decrements in
+ * case the rest of the work spills over to subsequent
+ * transactions.
+ */
+ entry->e_value_inum = 0;
+ entry->e_value_size = 0;
+
+ dirty = true;
+ }
+
+ if (dirty) {
+ /*
+ * Note that we are deliberately skipping csum calculation for
+ * the final update because we do not expect any journal
+ * restarts until xattr block is freed.
+ */
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ ext4_warning_inode(parent,
+ "handle dirty metadata err=%d", err);
+ }
+}
+
/*
* Release the xattr block BH: If the reference count is > 1, decrement it;
* otherwise free the block.
*/
static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
- struct buffer_head *bh)
+ struct buffer_head *bh,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
{
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
u32 hash, ref;
int error = 0;
@@ -565,9 +1202,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
+
+ if (ext4_has_feature_ea_inode(inode->i_sb))
+ ext4_xattr_inode_dec_ref_all(handle, inode, bh,
+ BFIRST(bh),
+ true /* block_csum */,
+ ea_inode_array,
+ extra_credits,
+ true /* skip_quota */);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
@@ -577,11 +1224,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(ext4_mb_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
- mb_cache_entry_put(ext4_mb_cache, ce);
+ if (ea_block_cache) {
+ ce = mb_cache_entry_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ea_block_cache, ce);
+ }
}
}
@@ -620,7 +1269,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
@@ -631,113 +1280,454 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
return (*min_offs - ((void *)last - base) - sizeof(__u32));
}
-static int
-ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
+/*
+ * Write the value of the EA in an inode.
+ */
+static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
+ const void *buf, int bufsize)
+{
+ struct buffer_head *bh = NULL;
+ unsigned long block = 0;
+ int blocksize = ea_inode->i_sb->s_blocksize;
+ int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
+ int csize, wsize = 0;
+ int ret = 0;
+ int retries = 0;
+
+retry:
+ while (ret >= 0 && ret < max_blocks) {
+ struct ext4_map_blocks map;
+ map.m_lblk = block += ret;
+ map.m_len = max_blocks -= ret;
+
+ ret = ext4_map_blocks(handle, ea_inode, &map,
+ EXT4_GET_BLOCKS_CREATE);
+ if (ret <= 0) {
+ ext4_mark_inode_dirty(handle, ea_inode);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
+ ret = 0;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ if (ret < 0)
+ return ret;
+
+ block = 0;
+ while (wsize < bufsize) {
+ if (bh != NULL)
+ brelse(bh);
+ csize = (bufsize - wsize) > blocksize ? blocksize :
+ bufsize - wsize;
+ bh = ext4_getblk(handle, ea_inode, block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (ret)
+ goto out;
+
+ memcpy(bh->b_data, buf, csize);
+ set_buffer_uptodate(bh);
+ ext4_handle_dirty_metadata(handle, ea_inode, bh);
+
+ buf += csize;
+ wsize += csize;
+ block += 1;
+ }
+
+ inode_lock(ea_inode);
+ i_size_write(ea_inode, wsize);
+ ext4_update_i_disksize(ea_inode, wsize);
+ inode_unlock(ea_inode);
+
+ ext4_mark_inode_dirty(handle, ea_inode);
+
+out:
+ brelse(bh);
+
+ return ret;
+}
+
+/*
+ * Create an inode to store the value of a large EA.
+ */
+static struct inode *ext4_xattr_inode_create(handle_t *handle,
+ struct inode *inode, u32 hash)
+{
+ struct inode *ea_inode = NULL;
+ uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
+ int err;
+
+ /*
+ * Let the next inode be the goal, so we try and allocate the EA inode
+ * in the same group, or nearby one.
+ */
+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+ S_IFREG | 0600, NULL, inode->i_ino + 1, owner,
+ EXT4_EA_INODE_FL);
+ if (!IS_ERR(ea_inode)) {
+ ea_inode->i_op = &ext4_file_inode_operations;
+ ea_inode->i_fop = &ext4_file_operations;
+ ext4_set_aops(ea_inode);
+ ext4_xattr_inode_set_class(ea_inode);
+ unlock_new_inode(ea_inode);
+ ext4_xattr_inode_set_ref(ea_inode, 1);
+ ext4_xattr_inode_set_hash(ea_inode, hash);
+ err = ext4_mark_inode_dirty(handle, ea_inode);
+ if (!err)
+ err = ext4_inode_attach_jinode(ea_inode);
+ if (err) {
+ iput(ea_inode);
+ return ERR_PTR(err);
+ }
+
+ /*
+ * Xattr inodes are shared therefore quota charging is performed
+ * at a higher level.
+ */
+ dquot_free_inode(ea_inode);
+ dquot_drop(ea_inode);
+ inode_lock(ea_inode);
+ ea_inode->i_flags |= S_NOQUOTA;
+ inode_unlock(ea_inode);
+ }
+
+ return ea_inode;
+}
+
+static struct inode *
+ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+ size_t value_len, u32 hash)
+{
+ struct inode *ea_inode;
+ struct mb_cache_entry *ce;
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
+ void *ea_data;
+
+ if (!ea_inode_cache)
+ return NULL;
+
+ ce = mb_cache_entry_find_first(ea_inode_cache, hash);
+ if (!ce)
+ return NULL;
+
+ ea_data = ext4_kvmalloc(value_len, GFP_NOFS);
+ if (!ea_data) {
+ mb_cache_entry_put(ea_inode_cache, ce);
+ return NULL;
+ }
+
+ while (ce) {
+ ea_inode = ext4_iget(inode->i_sb, ce->e_value);
+ if (!IS_ERR(ea_inode) &&
+ !is_bad_inode(ea_inode) &&
+ (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
+ i_size_read(ea_inode) == value_len &&
+ !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
+ !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
+ value_len) &&
+ !memcmp(value, ea_data, value_len)) {
+ mb_cache_entry_touch(ea_inode_cache, ce);
+ mb_cache_entry_put(ea_inode_cache, ce);
+ kvfree(ea_data);
+ return ea_inode;
+ }
+
+ if (!IS_ERR(ea_inode))
+ iput(ea_inode);
+ ce = mb_cache_entry_find_next(ea_inode_cache, ce);
+ }
+ kvfree(ea_data);
+ return NULL;
+}
+
+/*
+ * Add value of the EA in an inode.
+ */
+static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+ const void *value, size_t value_len,
+ struct inode **ret_inode)
+{
+ struct inode *ea_inode;
+ u32 hash;
+ int err;
+
+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
+ ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
+ if (ea_inode) {
+ err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+ if (err) {
+ iput(ea_inode);
+ return err;
+ }
+
+ *ret_inode = ea_inode;
+ return 0;
+ }
+
+ /* Create an inode for the EA value */
+ ea_inode = ext4_xattr_inode_create(handle, inode, hash);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+
+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+ if (err) {
+ ext4_xattr_inode_dec_ref(handle, ea_inode);
+ iput(ea_inode);
+ return err;
+ }
+
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
+
+ *ret_inode = ea_inode;
+ return 0;
+}
+
+/*
+ * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
+ * feature is enabled.
+ */
+#define EXT4_XATTR_BLOCK_RESERVE(inode) min(i_blocksize(inode)/8, 1024U)
+
+static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ struct ext4_xattr_search *s,
+ handle_t *handle, struct inode *inode,
+ bool is_block)
{
struct ext4_xattr_entry *last;
- size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+ struct ext4_xattr_entry *here = s->here;
+ size_t min_offs = s->end - s->base, name_len = strlen(i->name);
+ int in_inode = i->in_inode;
+ struct inode *old_ea_inode = NULL;
+ struct inode *new_ea_inode = NULL;
+ size_t old_size, new_size;
+ int ret;
+
+ /* Space used by old and new values. */
+ old_size = (!s->not_found && !here->e_value_inum) ?
+ EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0;
+ new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0;
+
+ /*
+ * Optimization for the simple case when old and new values have the
+ * same padded sizes. Not applicable if external inodes are involved.
+ */
+ if (new_size && new_size == old_size) {
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ void *val = s->base + offs;
+
+ here->e_value_size = cpu_to_le32(i->value_len);
+ if (i->value == EXT4_ZERO_XATTR_VALUE) {
+ memset(val, 0, new_size);
+ } else {
+ memcpy(val, i->value, i->value_len);
+ /* Clear padding bytes. */
+ memset(val + i->value_len, 0, new_size - i->value_len);
+ }
+ return 0;
+ }
/* Compute min_offs and last. */
last = s->first;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
}
- free = min_offs - ((void *)last - s->base) - sizeof(__u32);
- if (!s->not_found) {
- if (s->here->e_value_size) {
- size_t size = le32_to_cpu(s->here->e_value_size);
- free += EXT4_XATTR_SIZE(size);
- }
- free += EXT4_XATTR_LEN(name_len);
- }
+
+ /* Check whether we have enough space. */
if (i->value) {
- if (free < EXT4_XATTR_LEN(name_len) +
- EXT4_XATTR_SIZE(i->value_len))
- return -ENOSPC;
+ size_t free;
+
+ free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+ if (!s->not_found)
+ free += EXT4_XATTR_LEN(name_len) + old_size;
+
+ if (free < EXT4_XATTR_LEN(name_len) + new_size) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
+ * If storing the value in an external inode is an option,
+ * reserve space for xattr entries/names in the external
+ * attribute block so that a long value does not occupy the
+ * whole space and prevent futher entries being added.
+ */
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ new_size && is_block &&
+ (min_offs + old_size - new_size) <
+ EXT4_XATTR_BLOCK_RESERVE(inode)) {
+ ret = -ENOSPC;
+ goto out;
+ }
}
- if (i->value && s->not_found) {
- /* Insert the new name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
- memmove((void *)s->here + size, s->here, rest);
- memset(s->here, 0, size);
- s->here->e_name_index = i->name_index;
- s->here->e_name_len = name_len;
- memcpy(s->here->e_name, i->name, name_len);
- } else {
- if (s->here->e_value_size) {
- void *first_val = s->base + min_offs;
- size_t offs = le16_to_cpu(s->here->e_value_offs);
- void *val = s->base + offs;
- size_t size = EXT4_XATTR_SIZE(
- le32_to_cpu(s->here->e_value_size));
-
- if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
- /* The old and the new value have the same
- size. Just replace. */
- s->here->e_value_size =
- cpu_to_le32(i->value_len);
- if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
- } else {
- /* Clear pad bytes first. */
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD);
- memcpy(val, i->value, i->value_len);
- }
- return 0;
- }
+ /*
+ * Getting access to old and new ea inodes is subject to failures.
+ * Finish that work before doing any modifications to the xattr data.
+ */
+ if (!s->not_found && here->e_value_inum) {
+ ret = ext4_xattr_inode_iget(inode,
+ le32_to_cpu(here->e_value_inum),
+ &old_ea_inode);
+ if (ret) {
+ old_ea_inode = NULL;
+ goto out;
+ }
+ }
+ if (i->value && in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
+ if (ret)
+ goto out;
+
+ ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
+ i->value_len,
+ &new_ea_inode);
+ if (ret) {
+ new_ea_inode = NULL;
+ ext4_xattr_inode_free_quota(inode, i->value_len);
+ goto out;
+ }
+ }
- /* Remove the old value. */
- memmove(first_val + size, first_val, val - first_val);
- memset(first_val, 0, size);
- s->here->e_value_size = 0;
- s->here->e_value_offs = 0;
- min_offs += size;
-
- /* Adjust all value offsets. */
- last = s->first;
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
- if (last->e_value_size && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT4_XATTR_NEXT(last);
+ if (old_ea_inode) {
+ /* We are ready to release ref count on the old_ea_inode. */
+ ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
+ if (ret) {
+ /* Release newly required ref count on new_ea_inode. */
+ if (new_ea_inode) {
+ int err;
+
+ err = ext4_xattr_inode_dec_ref(handle,
+ new_ea_inode);
+ if (err)
+ ext4_warning_inode(new_ea_inode,
+ "dec ref new_ea_inode err=%d",
+ err);
+ ext4_xattr_inode_free_quota(inode,
+ i->value_len);
}
+ goto out;
}
- if (!i->value) {
- /* Remove the old name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- last = ENTRY((void *)last - size);
- memmove(s->here, (void *)s->here + size,
- (void *)last - (void *)s->here + sizeof(__u32));
- memset(last, 0, size);
+
+ ext4_xattr_inode_free_quota(inode,
+ le32_to_cpu(here->e_value_size));
+ }
+
+ /* No failures allowed past this point. */
+
+ if (!s->not_found && here->e_value_offs) {
+ /* Remove the old value. */
+ void *first_val = s->base + min_offs;
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ void *val = s->base + offs;
+
+ memmove(first_val + old_size, first_val, val - first_val);
+ memset(first_val, 0, old_size);
+ min_offs += old_size;
+
+ /* Adjust all value offsets. */
+ last = s->first;
+ while (!IS_LAST_ENTRY(last)) {
+ size_t o = le16_to_cpu(last->e_value_offs);
+
+ if (!last->e_value_inum &&
+ last->e_value_size && o < offs)
+ last->e_value_offs = cpu_to_le16(o + old_size);
+ last = EXT4_XATTR_NEXT(last);
}
}
+ if (!i->value) {
+ /* Remove old name. */
+ size_t size = EXT4_XATTR_LEN(name_len);
+
+ last = ENTRY((void *)last - size);
+ memmove(here, (void *)here + size,
+ (void *)last - (void *)here + sizeof(__u32));
+ memset(last, 0, size);
+ } else if (s->not_found) {
+ /* Insert new name. */
+ size_t size = EXT4_XATTR_LEN(name_len);
+ size_t rest = (void *)last - (void *)here + sizeof(__u32);
+
+ memmove((void *)here + size, here, rest);
+ memset(here, 0, size);
+ here->e_name_index = i->name_index;
+ here->e_name_len = name_len;
+ memcpy(here->e_name, i->name, name_len);
+ } else {
+ /* This is an update, reset value info. */
+ here->e_value_inum = 0;
+ here->e_value_offs = 0;
+ here->e_value_size = 0;
+ }
+
if (i->value) {
- /* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
- if (i->value_len) {
- size_t size = EXT4_XATTR_SIZE(i->value_len);
- void *val = s->base + min_offs - size;
- s->here->e_value_offs = cpu_to_le16(min_offs - size);
+ /* Insert new value. */
+ if (in_inode) {
+ here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino);
+ } else if (i->value_len) {
+ void *val = s->base + min_offs - new_size;
+
+ here->e_value_offs = cpu_to_le16(min_offs - new_size);
if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
+ memset(val, 0, new_size);
} else {
- /* Clear the pad bytes first. */
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD);
memcpy(val, i->value, i->value_len);
+ /* Clear padding bytes. */
+ memset(val + i->value_len, 0,
+ new_size - i->value_len);
}
}
+ here->e_value_size = cpu_to_le32(i->value_len);
}
- return 0;
+
+ if (i->value) {
+ __le32 hash = 0;
+
+ /* Entry hash calculation. */
+ if (in_inode) {
+ __le32 crc32c_hash;
+
+ /*
+ * Feed crc32c hash instead of the raw value for entry
+ * hash calculation. This is to avoid walking
+ * potentially long value buffer again.
+ */
+ crc32c_hash = cpu_to_le32(
+ ext4_xattr_inode_get_hash(new_ea_inode));
+ hash = ext4_xattr_hash_entry(here->e_name,
+ here->e_name_len,
+ &crc32c_hash, 1);
+ } else if (is_block) {
+ __le32 *value = s->base + min_offs - new_size;
+
+ hash = ext4_xattr_hash_entry(here->e_name,
+ here->e_name_len, value,
+ new_size >> 2);
+ }
+ here->e_hash = hash;
+ }
+
+ if (is_block)
+ ext4_xattr_rehash((struct ext4_xattr_header *)s->base);
+
+ ret = 0;
+out:
+ iput(old_ea_inode);
+ iput(new_ea_inode);
+ return ret;
}
struct ext4_xattr_block_find {
@@ -794,15 +1784,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
{
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
- struct ext4_xattr_search *s = &bs->s;
+ struct ext4_xattr_search s_copy = bs->s;
+ struct ext4_xattr_search *s = &s_copy;
struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+ struct inode *ea_inode = NULL;
+ size_t old_ea_inode_size = 0;
#define header(x) ((struct ext4_xattr_header *)(x))
- if (i->value && i->value_len > sb->s_blocksize)
- return -ENOSPC;
if (s->base) {
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
@@ -818,17 +1809,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete_block(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s);
- if (!error) {
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
- s->here);
- ext4_xattr_cache_insert(ext4_mb_cache,
- bs->bh);
- }
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ true /* is_block */);
+ if (!error)
+ ext4_xattr_block_cache_insert(ea_block_cache,
+ bs->bh);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -854,6 +1843,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
header(s->base)->h_refcount = cpu_to_le32(1);
s->here = ENTRY(s->base + offset);
s->end = s->base + bs->bh->b_size;
+
+ /*
+ * If existing entry points to an xattr inode, we need
+ * to prevent ext4_xattr_set_entry() from decrementing
+ * ref count on it because the reference belongs to the
+ * original block. In this case, make the entry look
+ * like it has an empty value.
+ */
+ if (!s->not_found && s->here->e_value_inum) {
+ /*
+ * Defer quota free call for previous inode
+ * until success is guaranteed.
+ */
+ old_ea_inode_size = le32_to_cpu(
+ s->here->e_value_size);
+ s->here->e_value_inum = 0;
+ s->here->e_value_size = 0;
+ }
}
} else {
/* Allocate a buffer where we construct the new block. */
@@ -870,17 +1877,33 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base), s->here);
+
+ if (i->value && s->here->e_value_inum) {
+ unsigned int ea_ino;
+
+ /*
+ * A ref count on ea_inode has been taken as part of the call to
+ * ext4_xattr_set_entry() above. We would like to drop this
+ * extra ref but we have to wait until the xattr block is
+ * initialized and has its own ref count on the ea_inode.
+ */
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
+ if (error) {
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
inserted:
if (!IS_LAST_ENTRY(s->first)) {
- new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
+ new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
+ &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
@@ -925,7 +1948,7 @@ inserted:
EXT4_C2B(EXT4_SB(sb),
1));
brelse(new_bh);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
new_bh = NULL;
goto inserted;
@@ -944,8 +1967,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_touch(ext4_mb_cache, ce);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -984,6 +2007,22 @@ getblk_failed:
EXT4_FREE_BLOCKS_METADATA);
goto cleanup;
}
+ error = ext4_xattr_inode_inc_ref_all(handle, inode,
+ ENTRY(header(s->base)+1));
+ if (error)
+ goto getblk_failed;
+ if (ea_inode) {
+ /* Drop the extra ref on ea_inode. */
+ error = ext4_xattr_inode_dec_ref(handle,
+ ea_inode);
+ if (error)
+ ext4_warning_inode(ea_inode,
+ "dec ref error=%d",
+ error);
+ iput(ea_inode);
+ ea_inode = NULL;
+ }
+
lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, new_bh);
if (error) {
@@ -995,7 +2034,7 @@ getblk_failed:
ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, new_bh);
error = ext4_handle_dirty_metadata(handle, inode,
new_bh);
if (error)
@@ -1003,17 +2042,40 @@ getblk_failed:
}
}
+ if (old_ea_inode_size)
+ ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
+
/* Update the inode. */
EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
/* Drop the previous xattr block. */
- if (bs->bh && bs->bh != new_bh)
- ext4_xattr_release_block(handle, inode, bs->bh);
+ if (bs->bh && bs->bh != new_bh) {
+ struct ext4_xattr_inode_array *ea_inode_array = NULL;
+
+ ext4_xattr_release_block(handle, inode, bs->bh,
+ &ea_inode_array,
+ 0 /* extra_credits */);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ }
error = 0;
cleanup:
+ if (ea_inode) {
+ int error2;
+
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
+
+ /* If there was an error, revert the quota charge. */
+ if (error)
+ ext4_xattr_inode_free_quota(inode,
+ i_size_read(ea_inode));
+ iput(ea_inode);
+ }
if (ce)
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1070,7 +2132,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error) {
if (error == -ENOSPC &&
ext4_has_inline_data(inode)) {
@@ -1082,7 +2144,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_find(inode, i, is);
if (error)
return error;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ false /* is_block */);
}
if (error)
return error;
@@ -1098,7 +2161,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
return 0;
}
-static int ext4_xattr_ibody_set(struct inode *inode,
+static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -1108,7 +2171,7 @@ static int ext4_xattr_ibody_set(struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error)
return error;
header = IHDR(inode, ext4_raw_inode(&is->iloc));
@@ -1127,12 +2190,31 @@ static int ext4_xattr_value_same(struct ext4_xattr_search *s,
{
void *value;
+ /* When e_value_inum is set the value is stored externally. */
+ if (s->here->e_value_inum)
+ return 0;
if (le32_to_cpu(s->here->e_value_size) != i->value_len)
return 0;
value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
return !memcmp(value, i->value, i->value_len);
}
+static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
+{
+ struct buffer_head *bh;
+ int error;
+
+ if (!EXT4_I(inode)->i_file_acl)
+ return NULL;
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh)
+ return ERR_PTR(-EIO);
+ error = ext4_xattr_check_block(inode, bh);
+ if (error)
+ return ERR_PTR(error);
+ return bh;
+}
+
/*
* ext4_xattr_set_handle()
*
@@ -1155,7 +2237,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
.name = name,
.value = value,
.value_len = value_len,
-
+ .in_inode = 0,
};
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
@@ -1173,6 +2255,28 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
ext4_write_lock_xattr(inode, &no_expand);
+ /* Check journal credits under write lock. */
+ if (ext4_handle_valid(handle)) {
+ struct buffer_head *bh;
+ int credits;
+
+ bh = ext4_xattr_get_block(inode);
+ if (IS_ERR(bh)) {
+ error = PTR_ERR(bh);
+ goto cleanup;
+ }
+
+ credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+ value_len,
+ flags & XATTR_CREATE);
+ brelse(bh);
+
+ if (!ext4_handle_has_enough_credits(handle, credits)) {
+ error = -ENOSPC;
+ goto cleanup;
+ }
+ }
+
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
goto cleanup;
@@ -1202,9 +2306,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (flags & XATTR_CREATE)
goto cleanup;
}
+
if (!value) {
if (!is.s.not_found)
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else {
@@ -1215,7 +2320,12 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
goto cleanup;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ (EXT4_XATTR_SIZE(i.value_len) >
+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
+ i.in_inode = 1;
+retry_inode:
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) {
i.value = NULL;
error = ext4_xattr_block_set(handle, inode, &i, &bs);
@@ -1226,11 +2336,20 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
goto cleanup;
}
error = ext4_xattr_block_set(handle, inode, &i, &bs);
- if (error)
- goto cleanup;
- if (!is.s.not_found) {
+ if (!error && !is.s.not_found) {
i.value = NULL;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i,
+ &is);
+ } else if (error == -ENOSPC) {
+ /*
+ * Xattr does not fit in the block, store at
+ * external inode if possible.
+ */
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ !i.in_inode) {
+ i.in_inode = 1;
+ goto retry_inode;
+ }
}
}
}
@@ -1256,6 +2375,33 @@ cleanup:
return error;
}
+int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
+ bool is_create, int *credits)
+{
+ struct buffer_head *bh;
+ int err;
+
+ *credits = 0;
+
+ if (!EXT4_SB(inode->i_sb)->s_journal)
+ return 0;
+
+ down_read(&EXT4_I(inode)->xattr_sem);
+
+ bh = ext4_xattr_get_block(inode);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ } else {
+ *credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+ value_len, is_create);
+ brelse(bh);
+ err = 0;
+ }
+
+ up_read(&EXT4_I(inode)->xattr_sem);
+ return err;
+}
+
/*
* ext4_xattr_set()
*
@@ -1269,13 +2415,20 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
const void *value, size_t value_len, int flags)
{
handle_t *handle;
+ struct super_block *sb = inode->i_sb;
int error, retries = 0;
- int credits = ext4_jbd2_credits_xattr(inode);
+ int credits;
error = dquot_initialize(inode);
if (error)
return error;
+
retry:
+ error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE,
+ &credits);
+ if (error)
+ return error;
+
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
@@ -1286,7 +2439,7 @@ retry:
value, value_len, flags);
error2 = ext4_journal_stop(handle);
if (error == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries))
+ ext4_should_retry_alloc(sb, &retries))
goto retry;
if (error == 0)
error = error2;
@@ -1311,7 +2464,7 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
last->e_value_offs = cpu_to_le16(new_offs);
@@ -1331,18 +2484,16 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
struct ext4_xattr_ibody_find *is = NULL;
struct ext4_xattr_block_find *bs = NULL;
char *buffer = NULL, *b_entry_name = NULL;
- size_t value_offs, value_size;
+ size_t value_size = le32_to_cpu(entry->e_value_size);
struct ext4_xattr_info i = {
.value = NULL,
.value_len = 0,
.name_index = entry->e_name_index,
+ .in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
int error;
- value_offs = le16_to_cpu(entry->e_value_offs);
- value_size = le32_to_cpu(entry->e_value_size);
-
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
buffer = kmalloc(value_size, GFP_NOFS);
@@ -1358,7 +2509,15 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
bs->bh = NULL;
/* Save the entry name and the entry value */
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
+ if (error)
+ goto out;
+ } else {
+ size_t value_offs = le16_to_cpu(entry->e_value_offs);
+ memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ }
+
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
b_entry_name[entry->e_name_len] = '\0';
i.name = b_entry_name;
@@ -1372,11 +2531,10 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
goto out;
/* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(inode, &i, is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto out;
- i.name = b_entry_name;
i.value = buffer;
i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs);
@@ -1420,9 +2578,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
last = IFIRST(header);
/* Find the entry best suited to be pushed into EA block */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
+ total_size = EXT4_XATTR_LEN(last->e_name_len);
+ if (!last->e_value_inum)
+ total_size += EXT4_XATTR_SIZE(
+ le32_to_cpu(last->e_value_size));
if (total_size <= bfree &&
total_size < min_total_size) {
if (total_size + ifree < isize_diff) {
@@ -1441,8 +2600,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
}
entry_size = EXT4_XATTR_LEN(entry->e_name_len);
- total_size = entry_size +
- EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+ total_size = entry_size;
+ if (!entry->e_value_inum)
+ total_size += EXT4_XATTR_SIZE(
+ le32_to_cpu(entry->e_value_size));
error = ext4_xattr_move_to_block(handle, inode, raw_inode,
entry);
if (error)
@@ -1571,51 +2732,172 @@ cleanup:
return error;
}
+#define EIA_INCR 16 /* must be 2^n */
+#define EIA_MASK (EIA_INCR - 1)
+/* Add the large xattr @inode into @ea_inode_array for deferred iput().
+ * If @ea_inode_array is new or full it will be grown and the old
+ * contents copied over.
+ */
+static int
+ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ struct inode *inode)
+{
+ if (*ea_inode_array == NULL) {
+ /*
+ * Start with 15 inodes, so it fits into a power-of-two size.
+ * If *ea_inode_array is NULL, this is essentially offsetof()
+ */
+ (*ea_inode_array) =
+ kmalloc(offsetof(struct ext4_xattr_inode_array,
+ inodes[EIA_MASK]),
+ GFP_NOFS);
+ if (*ea_inode_array == NULL)
+ return -ENOMEM;
+ (*ea_inode_array)->count = 0;
+ } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
+ /* expand the array once all 15 + n * 16 slots are full */
+ struct ext4_xattr_inode_array *new_array = NULL;
+ int count = (*ea_inode_array)->count;
+
+ /* if new_array is NULL, this is essentially offsetof() */
+ new_array = kmalloc(
+ offsetof(struct ext4_xattr_inode_array,
+ inodes[count + EIA_INCR]),
+ GFP_NOFS);
+ if (new_array == NULL)
+ return -ENOMEM;
+ memcpy(new_array, *ea_inode_array,
+ offsetof(struct ext4_xattr_inode_array, inodes[count]));
+ kfree(*ea_inode_array);
+ *ea_inode_array = new_array;
+ }
+ (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
+ return 0;
+}
/*
* ext4_xattr_delete_inode()
*
- * Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed. We have exclusive
- * access to the inode.
+ * Free extended attribute resources associated with this inode. Traverse
+ * all entries and decrement reference on any xattr inodes associated with this
+ * inode. This is called immediately before an inode is freed. We have exclusive
+ * access to the inode. If an orphan inode is deleted it will also release its
+ * references on xattr block and xattr inodes.
*/
-void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
+int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
{
struct buffer_head *bh = NULL;
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_iloc iloc = { .bh = NULL };
+ struct ext4_xattr_entry *entry;
+ int error;
- if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
- EXT4_I(inode)->i_file_acl);
+ error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
+ NULL /* bh */,
+ false /* dirty */,
+ false /* block_csum */);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
goto cleanup;
}
- if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
- BHDR(bh)->h_blocks != cpu_to_le32(1)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
+
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "inode loc (error %d)", error);
+ goto cleanup;
+ }
+
+ error = ext4_journal_get_write_access(handle, iloc.bh);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "write access (error %d)",
+ error);
+ goto cleanup;
+ }
+
+ header = IHDR(inode, ext4_raw_inode(&iloc));
+ if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC))
+ ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh,
+ IFIRST(header),
+ false /* block_csum */,
+ ea_inode_array,
+ extra_credits,
+ false /* skip_quota */);
}
- ext4_xattr_release_block(handle, inode, bh);
- EXT4_I(inode)->i_file_acl = 0;
+ if (EXT4_I(inode)->i_file_acl) {
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh) {
+ EXT4_ERROR_INODE(inode, "block %llu read error",
+ EXT4_I(inode)->i_file_acl);
+ error = -EIO;
+ goto cleanup;
+ }
+ error = ext4_xattr_check_block(inode, bh);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
+ EXT4_I(inode)->i_file_acl, error);
+ goto cleanup;
+ }
+
+ if (ext4_has_feature_ea_inode(inode->i_sb)) {
+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ext4_xattr_inode_free_quota(inode,
+ le32_to_cpu(entry->e_value_size));
+
+ }
+
+ ext4_xattr_release_block(handle, inode, bh, ea_inode_array,
+ extra_credits);
+ /*
+ * Update i_file_acl value in the same transaction that releases
+ * block.
+ */
+ EXT4_I(inode)->i_file_acl = 0;
+ error = ext4_mark_inode_dirty(handle, inode);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
+ error);
+ goto cleanup;
+ }
+ }
+ error = 0;
cleanup:
+ brelse(iloc.bh);
brelse(bh);
+ return error;
+}
+
+void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
+{
+ int idx;
+
+ if (ea_inode_array == NULL)
+ return;
+
+ for (idx = 0; idx < ea_inode_array->count; ++idx)
+ iput(ea_inode_array->inodes[idx]);
+ kfree(ea_inode_array);
}
/*
- * ext4_xattr_cache_insert()
+ * ext4_xattr_block_cache_insert()
*
- * Create a new entry in the extended attribute cache, and insert
+ * Create a new entry in the extended attribute block cache, and insert
* it unless such an entry is already in the cache.
*
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
+ struct buffer_head *bh)
{
struct ext4_xattr_header *header = BHDR(bh);
__u32 hash = le32_to_cpu(header->h_hash);
@@ -1623,7 +2905,9 @@ ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
EXT4_XATTR_REFCOUNT_MAX;
int error;
- error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ if (!ea_block_cache)
+ return;
+ error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
if (error == -EBUSY)
@@ -1655,11 +2939,11 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
entry1->e_name_index != entry2->e_name_index ||
entry1->e_name_len != entry2->e_name_len ||
entry1->e_value_size != entry2->e_value_size ||
+ entry1->e_value_inum != entry2->e_value_inum ||
memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
return 1;
- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
- return -EFSCORRUPTED;
- if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
+ if (!entry1->e_value_inum &&
+ memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
(char *)header2 + le16_to_cpu(entry2->e_value_offs),
le32_to_cpu(entry1->e_value_size)))
return 1;
@@ -1673,7 +2957,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
}
/*
- * ext4_xattr_cache_find()
+ * ext4_xattr_block_cache_find()
*
* Find an identical extended attribute block.
*
@@ -1681,30 +2965,33 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
* not found or an error occurred.
*/
static struct buffer_head *
-ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ext4_xattr_block_cache_find(struct inode *inode,
+ struct ext4_xattr_header *header,
+ struct mb_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+ if (!ea_block_cache)
+ return NULL;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
- ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, ce->e_block);
+ bh = sb_bread(inode->i_sb, ce->e_value);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
- (unsigned long) ce->e_block);
+ (unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
@@ -1717,30 +3004,22 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
*
* Compute the hash of an extended attribute.
*/
-static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
+static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
+ size_t value_count)
{
__u32 hash = 0;
- char *name = entry->e_name;
- int n;
- for (n = 0; n < entry->e_name_len; n++) {
+ while (name_len--) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
*name++;
}
-
- if (entry->e_value_size != 0) {
- __le32 *value = (__le32 *)((char *)header +
- le16_to_cpu(entry->e_value_offs));
- for (n = (le32_to_cpu(entry->e_value_size) +
- EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
- hash = (hash << VALUE_HASH_SHIFT) ^
- (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
- le32_to_cpu(*value++);
- }
+ while (value_count--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
}
- entry->e_hash = cpu_to_le32(hash);
+ return cpu_to_le32(hash);
}
#undef NAME_HASH_SHIFT
@@ -1753,13 +3032,11 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
*
* Re-compute the extended attribute hash value after an entry has changed.
*/
-static void ext4_xattr_rehash(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
+static void ext4_xattr_rehash(struct ext4_xattr_header *header)
{
struct ext4_xattr_entry *here;
__u32 hash = 0;
- ext4_xattr_hash_entry(header, entry);
here = ENTRY(header+1);
while (!IS_LAST_ENTRY(here)) {
if (!here->e_hash) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 099c8b670ef5..0d2dde1fa87a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
+ __le32 e_value_inum; /* inode in which the value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
@@ -69,6 +69,13 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
+/*
+ * The minimum size of EA value when you start storing it in an external inode
+ * size of block - size of header - size of 1 entry - 4 null bytes
+*/
+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
+
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
@@ -77,10 +84,11 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
struct ext4_xattr_info {
- int name_index;
const char *name;
const void *value;
size_t value_len;
+ int name_index;
+ int in_inode;
};
struct ext4_xattr_search {
@@ -96,6 +104,11 @@ struct ext4_xattr_ibody_find {
struct ext4_iloc iloc;
};
+struct ext4_xattr_inode_array {
+ unsigned int count; /* # of used items in the array */
+ struct inode *inodes[0];
+};
+
extern const struct xattr_handler ext4_xattr_user_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
@@ -139,8 +152,16 @@ extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
+ bool is_create, int *credits);
+extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ struct buffer_head *block_bh, size_t value_len,
+ bool is_create);
-extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_inode_array **array,
+ int extra_credits);
+extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -169,3 +190,11 @@ static inline int ext4_init_security(handle_t *handle, struct inode *inode,
return 0;
}
#endif
+
+#ifdef CONFIG_LOCKDEP
+extern void ext4_xattr_inode_set_class(struct inode *ea_inode);
+#else
+static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { }
+#endif
+
+extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index ca949ea7c02f..a0dc559b1b47 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
-f2fs-y += shrinker.o extent_cache.o
+f2fs-y += shrinker.o extent_cache.o sysfs.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 8f487692c21f..a140c5e3dc54 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -233,7 +233,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
if (IS_ERR(value)) {
clear_inode_flag(inode, FI_ACL_MODE);
- return (int)PTR_ERR(value);
+ return PTR_ERR(value);
}
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ea9c317b5916..56bbf592e487 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
- f2fs_flush_merged_bios(sbi);
+ f2fs_flush_merged_writes(sbi);
}
/*
@@ -162,6 +162,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.op = REQ_OP_READ,
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
+ .in_list = false,
};
struct blk_plug plug;
@@ -207,12 +208,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
- fio.old_blkaddr = fio.new_blkaddr;
- f2fs_submit_page_mbio(&fio);
+ f2fs_submit_page_bio(&fio);
f2fs_put_page(page, 0);
}
out:
- f2fs_submit_merged_bio(sbi, META, READ);
blk_finish_plug(&plug);
return blkno - start;
}
@@ -249,13 +248,13 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
- f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
- 0, page->index, META, WRITE);
+ f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+ 0, page->index, META);
unlock_page(page);
if (unlikely(f2fs_cp_error(sbi)))
- f2fs_submit_merged_bio(sbi, META, WRITE);
+ f2fs_submit_merged_write(sbi, META);
return 0;
@@ -270,6 +269,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff, written;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto skip_write;
+
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
@@ -358,7 +360,7 @@ continue_unlock:
}
stop:
if (nwritten)
- f2fs_submit_merged_bio(sbi, type, WRITE);
+ f2fs_submit_merged_write(sbi, type);
blk_finish_plug(&plug);
@@ -906,7 +908,7 @@ retry:
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_write(sbi, DATA);
cond_resched();
}
goto retry;
@@ -1051,8 +1053,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned long flags;
- spin_lock(&sbi->cp_lock);
+ spin_lock_irqsave(&sbi->cp_lock, flags);
if ((cpc->reason & CP_UMOUNT) &&
le32_to_cpu(ckpt->cp_pack_total_block_count) >
@@ -1083,14 +1086,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* set this flag to activate crc|cp_ver for recovery */
__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
- spin_unlock(&sbi->cp_lock);
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
- unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
+ unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
block_t start_blk;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
@@ -1132,12 +1135,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
- spin_lock(&sbi->cp_lock);
+ spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
- spin_unlock(&sbi->cp_lock);
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1295,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
- f2fs_flush_merged_bios(sbi);
+ f2fs_flush_merged_writes(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason & CP_DISCARD) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..87c1f4150c64 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
}
#endif
if (f2fs_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
if (!PageUptodate(page))
SetPageUptodate(page);
} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
unlock_page(page);
mempool_free(page, sbi->write_io_dummy);
- if (unlikely(bio->bi_error))
+ if (unlikely(bio->bi_status))
f2fs_stop_checkpoint(sbi, true);
continue;
}
fscrypt_pullback_bio_page(&page, true);
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
@@ -282,29 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
nid_t ino, pgoff_t idx, enum page_type type)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
- struct f2fs_bio_info *io = &sbi->write_io[btype];
- bool ret;
+ enum temp_type temp;
+ struct f2fs_bio_info *io;
+ bool ret = false;
+
+ for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+ io = sbi->write_io[btype] + temp;
+
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io, inode, ino, idx);
+ up_read(&io->io_rwsem);
- down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, ino, idx);
- up_read(&io->io_rwsem);
+ /* TODO: use HOT temp only for meta pages now. */
+ if (ret || btype == META)
+ break;
+ }
return ret;
}
-static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
- struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type, int rw)
+static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
- struct f2fs_bio_info *io;
-
- io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+ struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
down_write(&io->io_rwsem);
- if (!__has_merged_page(io, inode, ino, idx))
- goto out;
-
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
@@ -314,29 +317,45 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
-out:
up_write(&io->io_rwsem);
}
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
- int rw)
+static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, bool force)
{
- __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
+ enum temp_type temp;
+
+ if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+ return;
+
+ for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+
+ __f2fs_submit_merged_write(sbi, type, temp);
+
+ /* TODO: use HOT temp only for meta pages now. */
+ if (type >= META)
+ break;
+ }
}
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
+{
+ __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+}
+
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type, int rw)
+ enum page_type type)
{
- if (has_merged_page(sbi, inode, ino, idx, type))
- __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
+ __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
}
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
{
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
- f2fs_submit_merged_bio(sbi, META, WRITE);
+ f2fs_submit_merged_write(sbi, DATA);
+ f2fs_submit_merged_write(sbi, NODE);
+ f2fs_submit_merged_write(sbi, META);
}
/*
@@ -368,16 +387,29 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
- struct f2fs_bio_info *io;
- bool is_read = is_read_io(fio->op);
+ struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
int err = 0;
- io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+ f2fs_bug_on(sbi, is_read_io(fio->op));
+
+ down_write(&io->io_rwsem);
+next:
+ if (fio->in_list) {
+ spin_lock(&io->io_lock);
+ if (list_empty(&io->io_list)) {
+ spin_unlock(&io->io_lock);
+ goto out_fail;
+ }
+ fio = list_first_entry(&io->io_list,
+ struct f2fs_io_info, list);
+ list_del(&fio->list);
+ spin_unlock(&io->io_lock);
+ }
if (fio->old_blkaddr != NEW_ADDR)
verify_block_addr(sbi, fio->old_blkaddr);
@@ -388,10 +420,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
/* set submitted = 1 as a return value */
fio->submitted = 1;
- if (!is_read)
- inc_page_count(sbi, WB_DATA_TYPE(bio_page));
-
- down_write(&io->io_rwsem);
+ inc_page_count(sbi, WB_DATA_TYPE(bio_page));
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
(io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
@@ -402,26 +431,28 @@ alloc_new:
if ((fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
err = -EAGAIN;
- if (!is_read)
- dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page));
goto out_fail;
}
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
- BIO_MAX_PAGES, is_read);
+ BIO_MAX_PAGES, false);
io->fio = *fio;
}
- if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
- PAGE_SIZE) {
+ if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
__submit_merged_bio(io);
goto alloc_new;
}
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
+
+ trace_f2fs_submit_page_write(fio->page, fio);
+
+ if (fio->in_list)
+ goto next;
out_fail:
up_write(&io->io_rwsem);
- trace_f2fs_submit_page_mbio(fio->page, fio);
return err;
}
@@ -460,14 +491,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ int err;
if (!count)
return 0;
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
- return -ENOSPC;
+ if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ return err;
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
@@ -718,6 +750,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct node_info ni;
pgoff_t fofs;
blkcnt_t count = 1;
+ int err;
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
@@ -726,15 +759,15 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (dn->data_blkaddr == NEW_ADDR)
goto alloc;
- if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
- return -ENOSPC;
+ if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ return err;
alloc:
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
- &sum, CURSEG_WARM_DATA);
+ &sum, CURSEG_WARM_DATA, NULL, false);
set_data_blkaddr(dn);
/* update i_size */
@@ -1321,7 +1354,7 @@ retry_encrypt:
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_bios(fio->sbi);
+ f2fs_flush_merged_writes(fio->sbi);
congestion_wait(BLK_RW_ASYNC, HZ/50);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
@@ -1368,13 +1401,14 @@ int do_write_data_page(struct f2fs_io_info *fio)
if (valid_ipu_blkaddr(fio)) {
ipu_force = true;
- fio->need_lock = false;
+ fio->need_lock = LOCK_DONE;
goto got_it;
}
}
- if (fio->need_lock)
- f2fs_lock_op(fio->sbi);
+ /* Deadlock due to between page->lock and f2fs_lock_op */
+ if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
+ return -EAGAIN;
err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
@@ -1388,19 +1422,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
- err = encrypt_one_page(fio);
- if (err)
- goto out_writepage;
-
- set_page_writeback(page);
-
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+ err = encrypt_one_page(fio);
+ if (err)
+ goto out_writepage;
+
+ set_page_writeback(page);
f2fs_put_dnode(&dn);
- if (fio->need_lock)
+ if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
err = rewrite_data_page(fio);
trace_f2fs_do_write_data_page(fio->page, IPU);
@@ -1408,6 +1441,20 @@ got_it:
return err;
}
+ if (fio->need_lock == LOCK_RETRY) {
+ if (!f2fs_trylock_op(fio->sbi)) {
+ err = -EAGAIN;
+ goto out_writepage;
+ }
+ fio->need_lock = LOCK_REQ;
+ }
+
+ err = encrypt_one_page(fio);
+ if (err)
+ goto out_writepage;
+
+ set_page_writeback(page);
+
/* LFS mode write path */
write_data_page(&dn, fio);
trace_f2fs_do_write_data_page(page, OPU);
@@ -1417,7 +1464,7 @@ got_it:
out_writepage:
f2fs_put_dnode(&dn);
out:
- if (fio->need_lock)
+ if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
return err;
}
@@ -1443,11 +1490,14 @@ static int __write_data_page(struct page *page, bool *submitted,
.page = page,
.encrypted_page = NULL,
.submitted = false,
- .need_lock = true,
+ .need_lock = LOCK_RETRY,
};
trace_f2fs_writepage(page, DATA);
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+
if (page->index < end_index)
goto write;
@@ -1461,8 +1511,6 @@ static int __write_data_page(struct page *page, bool *submitted,
zero_user_segment(page, offset, PAGE_SIZE);
write:
- if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
- goto redirty_out;
if (f2fs_is_drop_cache(inode))
goto out;
/* we should not write 0'th page having journal header */
@@ -1479,7 +1527,7 @@ write:
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
- fio.need_lock = false;
+ fio.need_lock = LOCK_DONE;
err = do_write_data_page(&fio);
goto done;
}
@@ -1498,8 +1546,13 @@ write:
goto out;
}
- if (err == -EAGAIN)
+ if (err == -EAGAIN) {
err = do_write_data_page(&fio);
+ if (err == -EAGAIN) {
+ fio.need_lock = LOCK_REQ;
+ err = do_write_data_page(&fio);
+ }
+ }
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
@@ -1513,8 +1566,7 @@ out:
ClearPageUptodate(page);
if (wbc->for_reclaim) {
- f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
- DATA, WRITE);
+ f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
clear_inode_flag(inode, FI_HOT_DATA);
remove_dirty_inode(inode);
submitted = NULL;
@@ -1525,7 +1577,7 @@ out:
f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_write(sbi, DATA);
submitted = NULL;
}
@@ -1618,7 +1670,7 @@ retry:
}
done_index = page->index;
-
+retry_write:
lock_page(page);
if (unlikely(page->mapping != mapping)) {
@@ -1654,6 +1706,15 @@ continue_unlock:
unlock_page(page);
ret = 0;
continue;
+ } else if (ret == -EAGAIN) {
+ ret = 0;
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC,
+ HZ/50);
+ goto retry_write;
+ }
+ continue;
}
done_index = page->index + 1;
done = 1;
@@ -1684,8 +1745,8 @@ continue_unlock:
mapping->writeback_index = done_index;
if (last_idx != ULONG_MAX)
- f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
- 0, last_idx, DATA, WRITE);
+ f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
+ 0, last_idx, DATA);
return ret;
}
@@ -1706,6 +1767,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
return 0;
+ /* during POR, we don't need to trigger writepage at all. */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto skip_write;
+
if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
available_free_memory(sbi, DIRTY_DENTS))
@@ -1715,10 +1780,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (is_inode_flag_set(inode, FI_DO_DEFRAG))
goto skip_write;
- /* during POR, we don't need to trigger writepage at all. */
- if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
- goto skip_write;
-
trace_f2fs_writepages(mapping->host, wbc, DATA);
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
@@ -1753,8 +1814,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
}
}
@@ -2152,8 +2215,12 @@ int f2fs_migrate_page(struct address_space *mapping,
BUG_ON(PageWriteback(page));
/* migrating an atomic written page is safe with the inmem_lock hold */
- if (atomic_written && !mutex_trylock(&fi->inmem_lock))
- return -EAGAIN;
+ if (atomic_written) {
+ if (mode != MIGRATE_SYNC)
+ return -EBUSY;
+ if (!mutex_trylock(&fi->inmem_lock))
+ return -EAGAIN;
+ }
/*
* A reference is expected if PagePrivate set when move mapping,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 94756f55a97e..37f9c7f55605 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -415,7 +415,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
* We lost i_pino from now on.
*/
if (is_inode_flag_set(inode, FI_INC_LINK)) {
- file_lost_pino(inode);
+ if (!S_ISDIR(inode->i_mode))
+ file_lost_pino(inode);
/*
* If link the tmpfile to alias through linkat path,
* we should remove this inode from orphan list.
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2f98d7039701..ff2352a0ed15 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -320,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode,
}
/* return true, if inode page is changed */
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
@@ -358,6 +358,16 @@ out:
return false;
}
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+{
+ bool ret = __f2fs_init_extent_tree(inode, i_ext);
+
+ if (!F2FS_I(inode)->extent_tree)
+ set_inode_flag(inode, FI_NO_EXTENT);
+
+ return ret;
+}
+
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2185c7a040a1..94a88b233e98 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/quotaops.h>
#ifdef CONFIG_F2FS_FS_ENCRYPTION
#include <linux/fscrypt_supp.h>
#else
@@ -88,6 +89,8 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
#define F2FS_MOUNT_ADAPTIVE 0x00020000
#define F2FS_MOUNT_LFS 0x00040000
+#define F2FS_MOUNT_USRQUOTA 0x00080000
+#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -303,6 +306,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
struct f2fs_move_range)
#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \
struct f2fs_flush_device)
+#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \
+ struct f2fs_gc_range)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -327,6 +332,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
#endif
+struct f2fs_gc_range {
+ u32 sync;
+ u64 start;
+ u64 len;
+};
+
struct f2fs_defragment {
u64 start;
u64 len;
@@ -513,12 +524,19 @@ struct f2fs_inode_info {
nid_t i_xattr_nid; /* node id that contains xattrs */
loff_t last_disk_size; /* lastly written file size */
+#ifdef CONFIG_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+
+ /* quota space reservation, managed internally by quota code */
+ qsize_t i_reserved_quota;
+#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+ struct rw_semaphore i_mmap_sem;
};
static inline void get_extent_info(struct extent_info *ext,
@@ -792,17 +810,33 @@ enum page_type {
OPU,
};
+enum temp_type {
+ HOT = 0, /* must be zero for meta bio */
+ WARM,
+ COLD,
+ NR_TEMP_TYPE,
+};
+
+enum need_lock_type {
+ LOCK_REQ = 0,
+ LOCK_DONE,
+ LOCK_RETRY,
+};
+
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
+ enum temp_type temp; /* contains HOT/WARM/COLD */
int op; /* contains REQ_OP_ */
int op_flags; /* req_flag_bits */
block_t new_blkaddr; /* new block address to be written */
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ struct list_head list; /* serialize IOs */
bool submitted; /* indicate IO submission */
- bool need_lock; /* indicate we need to lock cp_rwsem */
+ int need_lock; /* indicate we need to lock cp_rwsem */
+ bool in_list; /* indicate fio is in io_list */
};
#define is_read_io(rw) ((rw) == READ)
@@ -812,6 +846,8 @@ struct f2fs_bio_info {
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
struct rw_semaphore io_rwsem; /* blocking op for bio */
+ spinlock_t io_lock; /* serialize DATA/NODE IOs */
+ struct list_head io_list; /* track fios */
};
#define FDEV(i) (sbi->devs[i])
@@ -879,9 +915,9 @@ struct f2fs_sb_info {
struct f2fs_sm_info *sm_info; /* segment manager */
/* for bio operations */
- struct f2fs_bio_info read_io; /* for read bios */
- struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
- struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */
+ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
+ struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
+ /* bio ordering for NODE/DATA */
int write_io_size_bits; /* Write IO size bits */
mempool_t *write_io_dummy; /* Dummy pages */
@@ -939,6 +975,8 @@ struct f2fs_sb_info {
block_t total_valid_block_count; /* # of valid blocks */
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
+ block_t reserved_blocks; /* configurable reserved blocks */
+
u32 s_next_generation; /* for NFS support */
/* # of pages, see count_type */
@@ -1078,6 +1116,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
{
SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1126,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
@@ -1225,9 +1266,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
{
- spin_lock(&sbi->cp_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->cp_lock, flags);
__set_ckpt_flags(F2FS_CKPT(sbi), f);
- spin_unlock(&sbi->cp_lock);
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
@@ -1241,22 +1284,26 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f
static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
{
- spin_lock(&sbi->cp_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), f);
- spin_unlock(&sbi->cp_lock);
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
{
+ unsigned long flags;
+
set_sbi_flag(sbi, SBI_NEED_FSCK);
if (lock)
- spin_lock(&sbi->cp_lock);
+ spin_lock_irqsave(&sbi->cp_lock, flags);
__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
kfree(NM_I(sbi)->nat_bits);
NM_I(sbi)->nat_bits = NULL;
if (lock)
- spin_unlock(&sbi->cp_lock);
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
@@ -1272,6 +1319,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
down_read(&sbi->cp_rwsem);
}
+static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
+{
+ return down_read_trylock(&sbi->cp_rwsem);
+}
+
static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
up_read(&sbi->cp_rwsem);
@@ -1321,17 +1373,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
return 0;
}
-#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
-
/*
* Check whether the inode has blocks or not
*/
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
{
- if (F2FS_I(inode)->i_xattr_nid)
- return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
- else
- return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+ block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0;
+
+ return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block;
}
static inline bool f2fs_has_xattr_block(unsigned int ofs)
@@ -1339,16 +1388,23 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
return ofs == XATTR_NODE_OFFSET;
}
-static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool);
-static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
+static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
+static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode, blkcnt_t *count)
{
- blkcnt_t diff;
+ blkcnt_t diff = 0, release = 0;
+ block_t avail_user_block_count;
+ int ret;
+
+ ret = dquot_reserve_block(inode, *count);
+ if (ret)
+ return ret;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
- return false;
+ release = *count;
+ goto enospc;
}
#endif
/*
@@ -1359,32 +1415,42 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
sbi->total_valid_block_count += (block_t)(*count);
- if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) {
- diff = sbi->total_valid_block_count - sbi->user_block_count;
+ avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+ if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+ diff = sbi->total_valid_block_count - avail_user_block_count;
*count -= diff;
- sbi->total_valid_block_count = sbi->user_block_count;
+ release = diff;
+ sbi->total_valid_block_count = avail_user_block_count;
if (!*count) {
spin_unlock(&sbi->stat_lock);
percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
- return false;
+ goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- f2fs_i_blocks_write(inode, *count, true);
- return true;
+ if (release)
+ dquot_release_reservation_block(inode, release);
+ f2fs_i_blocks_write(inode, *count, true, true);
+ return 0;
+
+enospc:
+ dquot_release_reservation_block(inode, release);
+ return -ENOSPC;
}
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
- blkcnt_t count)
+ block_t count)
{
+ blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
+
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < count);
+ f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
spin_unlock(&sbi->stat_lock);
- f2fs_i_blocks_write(inode, count, false);
+ f2fs_i_blocks_write(inode, count, false, true);
}
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1513,51 +1579,70 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
}
-static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode)
+static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
+ struct inode *inode, bool is_inode)
{
block_t valid_block_count;
unsigned int valid_node_count;
+ bool quota = inode && !is_inode;
+
+ if (quota) {
+ int ret = dquot_reserve_block(inode, 1);
+ if (ret)
+ return ret;
+ }
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
- if (unlikely(valid_block_count > sbi->user_block_count)) {
+ if (unlikely(valid_block_count + sbi->reserved_blocks >
+ sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
- return false;
+ goto enospc;
}
valid_node_count = sbi->total_valid_node_count + 1;
if (unlikely(valid_node_count > sbi->total_node_count)) {
spin_unlock(&sbi->stat_lock);
- return false;
+ goto enospc;
}
- if (inode)
- f2fs_i_blocks_write(inode, 1, true);
-
sbi->total_valid_node_count++;
sbi->total_valid_block_count++;
spin_unlock(&sbi->stat_lock);
+ if (inode) {
+ if (is_inode)
+ f2fs_mark_inode_dirty_sync(inode, true);
+ else
+ f2fs_i_blocks_write(inode, 1, true, true);
+ }
+
percpu_counter_inc(&sbi->alloc_valid_block_count);
- return true;
+ return 0;
+
+enospc:
+ if (quota)
+ dquot_release_reservation_block(inode, 1);
+ return -ENOSPC;
}
static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
- struct inode *inode)
+ struct inode *inode, bool is_inode)
{
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, !sbi->total_valid_block_count);
f2fs_bug_on(sbi, !sbi->total_valid_node_count);
- f2fs_bug_on(sbi, !inode->i_blocks);
+ f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
- f2fs_i_blocks_write(inode, 1, false);
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
spin_unlock(&sbi->stat_lock);
+
+ if (!is_inode)
+ f2fs_i_blocks_write(inode, 1, false, true);
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -1832,13 +1917,21 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
}
static inline void f2fs_i_blocks_write(struct inode *inode,
- blkcnt_t diff, bool add)
+ block_t diff, bool add, bool claim)
{
bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
- inode->i_blocks = add ? inode->i_blocks + diff :
- inode->i_blocks - diff;
+ /* add = 1, claim = 1 should be dquot_reserve_block in pair */
+ if (add) {
+ if (claim)
+ dquot_claim_block(inode, diff);
+ else
+ dquot_alloc_block_nofail(inode, diff);
+ } else {
+ dquot_free_block(inode, diff);
+ }
+
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
@@ -2233,6 +2326,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void stop_discard_thread(struct f2fs_sb_info *sbi);
void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
@@ -2255,7 +2349,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
bool recover_newaddr);
void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
- struct f2fs_summary *sum, int type);
+ struct f2fs_summary *sum, int type,
+ struct f2fs_io_info *fio, bool add_list);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered);
void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
@@ -2305,14 +2400,13 @@ void destroy_checkpoint_caches(void);
/*
* data.c
*/
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
- int rw);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, nid_t ino, pgoff_t idx,
- enum page_type type, int rw);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+ enum page_type type);
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+int f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
@@ -2630,6 +2724,14 @@ int __init create_extent_cache(void);
void destroy_extent_cache(void);
/*
+ * sysfs.c
+ */
+int __init f2fs_register_sysfs(void);
+void f2fs_unregister_sysfs(void);
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi);
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi);
+
+/*
* crypto support
*/
static inline bool f2fs_encrypted_inode(struct inode *inode)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 61af721329fa..a0e6d2c65a9e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,6 +33,18 @@
#include "trace.h"
#include <trace/events/f2fs.h>
+static int f2fs_filemap_fault(struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ int err;
+
+ down_read(&F2FS_I(inode)->i_mmap_sem);
+ err = filemap_fault(vmf);
+ up_read(&F2FS_I(inode)->i_mmap_sem);
+
+ return err;
+}
+
static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
@@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_balance_fs(sbi, dn.node_changed);
file_update_time(vmf->vma->vm_file);
+ down_read(&F2FS_I(inode)->i_mmap_sem);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
!PageUptodate(page))) {
unlock_page(page);
err = -EFAULT;
- goto out;
+ goto out_sem;
}
/*
@@ -94,6 +107,8 @@ mapped:
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+out_sem:
+ up_read(&F2FS_I(inode)->i_mmap_sem);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
@@ -101,7 +116,7 @@ out:
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = f2fs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
};
@@ -415,14 +430,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file_inode(file);
int err;
- if (f2fs_encrypted_inode(inode)) {
- err = fscrypt_get_encryption_info(inode);
- if (err)
- return 0;
- if (!f2fs_encrypted_inode(inode))
- return -ENOKEY;
- }
-
/* we don't need to use inline_data strictly */
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -435,11 +442,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
static int f2fs_file_open(struct inode *inode, struct file *filp)
{
- int ret = generic_file_open(inode, filp);
struct dentry *dir;
- if (!ret && f2fs_encrypted_inode(inode)) {
- ret = fscrypt_get_encryption_info(inode);
+ if (f2fs_encrypted_inode(inode)) {
+ int ret = fscrypt_get_encryption_info(inode);
if (ret)
return -EACCES;
if (!fscrypt_has_encryption_key(inode))
@@ -452,7 +458,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
return -EPERM;
}
dput(dir);
- return ret;
+ return dquot_file_open(inode, filp);
}
int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -527,8 +533,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
truncate_out:
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user(page, offset, PAGE_SIZE - offset);
- if (!cache_only || !f2fs_encrypted_inode(inode) ||
- !S_ISREG(inode->i_mode))
+
+ /* An encrypted inode should have a key and truncate the last page. */
+ f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
+ if (!cache_only)
set_page_dirty(page);
f2fs_put_page(page, 1);
return 0;
@@ -633,11 +641,31 @@ int f2fs_truncate(struct inode *inode)
}
int f2fs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned int flags;
+
+ flags = fi->i_flags & FS_FL_USER_VISIBLE;
+ if (flags & FS_APPEND_FL)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (flags & FS_COMPR_FL)
+ stat->attributes |= STATX_ATTR_COMPRESSED;
+ if (f2fs_encrypted_inode(inode))
+ stat->attributes |= STATX_ATTR_ENCRYPTED;
+ if (flags & FS_IMMUTABLE_FL)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (flags & FS_NODUMP_FL)
+ stat->attributes |= STATX_ATTR_NODUMP;
+
+ stat->attributes_mask |= (STATX_ATTR_APPEND |
+ STATX_ATTR_COMPRESSED |
+ STATX_ATTR_ENCRYPTED |
+ STATX_ATTR_IMMUTABLE |
+ STATX_ATTR_NODUMP);
+
generic_fillattr(inode, stat);
- stat->blocks <<= 3;
return 0;
}
@@ -681,14 +709,34 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;
+ if (is_quota_modification(inode, attr)) {
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+ }
+ if ((attr->ia_valid & ATTR_UID &&
+ !uid_eq(attr->ia_uid, inode->i_uid)) ||
+ (attr->ia_valid & ATTR_GID &&
+ !gid_eq(attr->ia_gid, inode->i_gid))) {
+ err = dquot_transfer(inode, attr);
+ if (err)
+ return err;
+ }
+
if (attr->ia_valid & ATTR_SIZE) {
- if (f2fs_encrypted_inode(inode) &&
- fscrypt_get_encryption_info(inode))
- return -EACCES;
+ if (f2fs_encrypted_inode(inode)) {
+ err = fscrypt_get_encryption_info(inode);
+ if (err)
+ return err;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
if (attr->ia_size <= i_size_read(inode)) {
+ down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
err = f2fs_truncate(inode);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (err)
return err;
} else {
@@ -696,7 +744,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
+ down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
@@ -839,12 +889,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+ down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
}
}
@@ -957,9 +1009,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (do_replace[i]) {
f2fs_i_blocks_write(src_inode,
- 1, false);
+ 1, false, false);
f2fs_i_blocks_write(dst_inode,
- 1, true);
+ 1, true, false);
f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
blkaddr[i], ni.version, true, false);
@@ -1083,16 +1135,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
+ down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- return ret;
+ goto out;
truncate_pagecache(inode, offset);
ret = f2fs_do_collapse(inode, pg_start, pg_end);
if (ret)
- return ret;
+ goto out;
/* write out all moved pages, if possible */
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1105,6 +1158,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
+out:
+ up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1169,9 +1224,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
+ down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- return ret;
+ goto out_sem;
truncate_pagecache_range(inode, offset, offset + len - 1);
@@ -1185,7 +1241,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- return ret;
+ goto out_sem;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1193,7 +1249,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- return ret;
+ goto out_sem;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1242,6 +1298,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
out:
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size);
+out_sem:
+ up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1271,14 +1329,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
- return ret;
+ goto out;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- return ret;
+ goto out;
truncate_pagecache(inode, offset);
@@ -1307,6 +1366,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
+out:
+ up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1475,6 +1536,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
inode_lock(inode);
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode)) {
+ inode_unlock(inode);
+ ret = -EPERM;
+ goto unlock_out;
+ }
+
flags = f2fs_mask_flags(inode->i_mode, flags);
oldflags = fi->i_flags;
@@ -1493,7 +1561,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
-
+ f2fs_mark_inode_dirty_sync(inode, false);
+unlock_out:
inode_unlock(inode);
out:
mnt_drop_write_file(filp);
@@ -1862,6 +1931,50 @@ out:
return ret;
}
+static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_gc_range range;
+ u64 end;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ end = range.start + range.len;
+ if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi))
+ return -EINVAL;
+do_more:
+ if (!range.sync) {
+ if (!mutex_trylock(&sbi->gc_mutex)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ } else {
+ mutex_lock(&sbi->gc_mutex);
+ }
+
+ ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
+ range.start += sbi->blocks_per_seg;
+ if (range.start <= end)
+ goto do_more;
+out:
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2306,6 +2419,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
+ case F2FS_IOC_GARBAGE_COLLECT_RANGE:
+ return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
@@ -2326,11 +2441,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct blk_plug plug;
ssize_t ret;
- if (f2fs_encrypted_inode(inode) &&
- !fscrypt_has_encryption_key(inode) &&
- fscrypt_get_encryption_info(inode))
- return -EACCES;
-
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
@@ -2379,6 +2489,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
case F2FS_IOC_GET_ENCRYPTION_POLICY:
case F2FS_IOC_GARBAGE_COLLECT:
+ case F2FS_IOC_GARBAGE_COLLECT_RANGE:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
case F2FS_IOC_MOVE_RANGE:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 026522107ca3..fa3d2e2df8e7 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -32,13 +32,14 @@ static int gc_thread_func(void *data)
wait_ms = gc_th->min_sleep_time;
+ set_freezable();
do {
+ wait_event_interruptible_timeout(*wq,
+ kthread_should_stop() || freezing(current),
+ msecs_to_jiffies(wait_ms));
+
if (try_to_freeze())
continue;
- else
- wait_event_interruptible_timeout(*wq,
- kthread_should_stop(),
- msecs_to_jiffies(wait_ms));
if (kthread_should_stop())
break;
@@ -258,11 +259,20 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
valid_blocks * 2 : valid_blocks;
}
+static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct seg_entry *se = get_seg_entry(sbi, segno);
+
+ return se->ckpt_valid_blocks > se->valid_blocks ?
+ se->ckpt_valid_blocks : se->valid_blocks;
+}
+
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
if (p->alloc_mode == SSR)
- return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ return get_ssr_cost(sbi, segno);
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
@@ -586,9 +596,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
+ .temp = COLD,
.op = REQ_OP_READ,
.op_flags = 0,
.encrypted_page = NULL,
+ .in_list = false,
};
struct dnode_of_data dn;
struct f2fs_summary sum;
@@ -632,7 +644,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA);
+ &sum, CURSEG_COLD_DATA, NULL, false);
fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -670,7 +682,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC;
fio.new_blkaddr = newaddr;
- f2fs_submit_page_mbio(&fio);
+ f2fs_submit_page_write(&fio);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
@@ -712,12 +724,13 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
+ .temp = COLD,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
- .need_lock = true,
+ .need_lock = LOCK_REQ,
};
bool is_dirty = PageDirty(page);
int err;
@@ -936,8 +949,8 @@ next:
}
if (gc_type == FG_GC)
- f2fs_submit_merged_bio(sbi,
- (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
+ f2fs_submit_merged_write(sbi,
+ (type == SUM_TYPE_NODE) ? NODE : DATA);
blk_finish_plug(&plug);
@@ -955,7 +968,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
{
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
- int ret = -EINVAL;
+ int ret;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
@@ -965,8 +978,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
cpc.reason = __get_cp_reason(sbi);
gc_more:
- if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+ if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
+ ret = -EINVAL;
goto stop;
+ }
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
@@ -987,6 +1002,7 @@ gc_more:
gc_type = FG_GC;
}
+ ret = -EINVAL;
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background)
goto stop;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index e4c527c4e7d0..e0fd4376e6fb 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -316,12 +316,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
int make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage)
{
- struct f2fs_inline_dentry *dentry_blk;
+ struct f2fs_inline_dentry *inline_dentry;
struct f2fs_dentry_ptr d;
- dentry_blk = inline_data_addr(ipage);
+ inline_dentry = inline_data_addr(ipage);
- make_dentry_ptr_inline(NULL, &d, dentry_blk);
+ make_dentry_ptr_inline(NULL, &d, inline_dentry);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
@@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
- struct f2fs_inline_dentry *dentry_blk = NULL;
+ struct f2fs_inline_dentry *inline_dentry = NULL;
struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(new_name->len);
struct page *page = NULL;
@@ -510,11 +510,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (IS_ERR(ipage))
return PTR_ERR(ipage);
- dentry_blk = inline_data_addr(ipage);
- bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ inline_dentry = inline_data_addr(ipage);
+ bit_pos = room_for_filename(&inline_dentry->dentry_bitmap,
slots, NR_INLINE_DENTRY);
if (bit_pos >= NR_INLINE_DENTRY) {
- err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+ err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
if (err)
return err;
err = -EAGAIN;
@@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
- make_dentry_ptr_inline(NULL, &d, dentry_blk);
+ make_dentry_ptr_inline(NULL, &d, inline_dentry);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -586,14 +586,14 @@ bool f2fs_empty_inline_dir(struct inode *dir)
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos = 2;
- struct f2fs_inline_dentry *dentry_blk;
+ struct f2fs_inline_dentry *inline_dentry;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return false;
- dentry_blk = inline_data_addr(ipage);
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ inline_dentry = inline_data_addr(ipage);
+ bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap,
NR_INLINE_DENTRY,
bit_pos);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 518f49643092..6cd312a17c69 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,6 +16,7 @@
#include "f2fs.h"
#include "node.h"
+#include "segment.h"
#include <trace/events/f2fs.h>
@@ -44,7 +45,6 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
- f2fs_mark_inode_dirty_sync(inode, false);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -130,7 +130,7 @@ static int do_read_inode(struct inode *inode)
i_gid_write(inode, le32_to_cpu(ri->i_gid));
set_nlink(inode, le32_to_cpu(ri->i_links));
inode->i_size = le64_to_cpu(ri->i_size);
- inode->i_blocks = le64_to_cpu(ri->i_blocks);
+ inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
@@ -226,6 +226,7 @@ make_now:
ret = -EIO;
goto bad_inode;
}
+ f2fs_set_inode_flags(inode);
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -267,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
ri->i_size = cpu_to_le64(i_size_read(inode));
- ri->i_blocks = cpu_to_le64(inode->i_blocks);
+ ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
if (et) {
read_lock(&et->lock);
@@ -372,6 +373,8 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
+ dquot_initialize(inode);
+
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -404,8 +407,11 @@ retry:
if (err)
update_inode_page(inode);
+ dquot_free_inode(inode);
sb_end_intwrite(inode->i_sb);
no_delete:
+ dquot_drop(inode);
+
stat_dec_inline_xattr(inode);
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
@@ -425,9 +431,10 @@ no_delete:
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
+ } else {
+ f2fs_bug_on(sbi, err &&
+ !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
}
- f2fs_bug_on(sbi, err &&
- !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
out_clear:
fscrypt_put_encryption_info(inode, NULL);
clear_inode(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c31b40e5f9cf..760d85223c81 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -15,6 +15,7 @@
#include <linux/ctype.h>
#include <linux/dcache.h>
#include <linux/namei.h>
+#include <linux/quotaops.h>
#include "f2fs.h"
#include "node.h"
@@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
}
f2fs_unlock_op(sbi);
+ nid_free = true;
+
inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
@@ -52,10 +55,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
- nid_free = true;
goto fail;
}
+ err = dquot_initialize(inode);
+ if (err)
+ goto fail_drop;
+
+ err = dquot_alloc_inode(inode);
+ if (err)
+ goto fail_drop;
+
/* If the directory encrypted, then we should encrypt the inode. */
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
@@ -85,6 +95,16 @@ fail:
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
+fail_drop:
+ trace_f2fs_new_inode(inode, err);
+ dquot_drop(inode);
+ inode->i_flags |= S_NOQUOTA;
+ if (nid_free)
+ set_inode_flag(inode, FI_FREE_NID);
+ clear_nlink(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(err);
}
static int is_multimedia_file(const unsigned char *s, const char *sub)
@@ -136,6 +156,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -180,6 +204,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
f2fs_balance_fs(sbi, true);
inode->i_ctime = current_time(inode);
@@ -347,6 +375,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
trace_f2fs_unlink_enter(dir, dentry);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
if (IS_ERR(page))
@@ -413,6 +445,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (disk_link.len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -500,6 +536,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -548,6 +588,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -583,6 +627,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -676,6 +724,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
+ err = dquot_initialize(old_dir);
+ if (err)
+ goto out;
+
+ err = dquot_initialize(new_dir);
+ if (err)
+ goto out;
+
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -772,7 +828,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
down_write(&F2FS_I(old_inode)->i_sem);
- file_lost_pino(old_inode);
+ if (!old_dir_entry || whiteout)
+ file_lost_pino(old_inode);
+ else
+ F2FS_I(old_inode)->i_pino = new_dir->i_ino;
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
@@ -853,6 +912,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
!fscrypt_has_permitted_context(old_dir, new_inode)))
return -EPERM;
+ err = dquot_initialize(old_dir);
+ if (err)
+ goto out;
+
+ err = dquot_initialize(new_dir);
+ if (err)
+ goto out;
+
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4547c5c5cd98..d53fe620939e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -158,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;
- if (get_nat_flag(ne, IS_DIRTY))
- return;
-
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
@@ -171,10 +168,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
head->entry_cnt = 0;
f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
}
- list_move_tail(&ne->list, &head->entry_list);
+
+ if (get_nat_flag(ne, IS_DIRTY))
+ goto refresh_list;
+
nm_i->dirty_nat_cnt++;
head->entry_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
+refresh_list:
+ if (nat_get_blkaddr(ne) == NEW_ADDR)
+ list_del_init(&ne->list);
+ else
+ list_move_tail(&ne->list, &head->entry_list);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -673,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn)
struct node_info ni;
get_node_info(sbi, dn->nid, &ni);
- if (dn->inode->i_blocks == 0) {
- f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
- goto invalidate;
- }
f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
- dec_valid_node_count(sbi, dn->inode);
+ dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
@@ -689,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn)
dec_valid_inode_count(sbi);
f2fs_inode_synced(dn->inode);
}
-invalidate:
+
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -1006,7 +1007,7 @@ int remove_inode_page(struct inode *inode)
/* 0 is possible, after f2fs_new_inode() has failed */
f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 1);
+ inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
truncate_node(&dn);
@@ -1039,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn,
if (!page)
return ERR_PTR(-ENOMEM);
- if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
- err = -ENOSPC;
+ if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
goto fail;
- }
+
#ifdef CONFIG_F2FS_CHECK_FS
get_node_info(sbi, dn->nid, &new_ni);
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
@@ -1152,6 +1152,7 @@ repeat:
f2fs_put_page(page, 1);
return ERR_PTR(err);
} else if (err == LOCKED_PAGE) {
+ err = 0;
goto page_hit;
}
@@ -1165,15 +1166,22 @@ repeat:
goto repeat;
}
- if (unlikely(!PageUptodate(page)))
+ if (unlikely(!PageUptodate(page))) {
+ err = -EIO;
goto out_err;
+ }
page_hit:
if(unlikely(nid != nid_of_node(page))) {
- f2fs_bug_on(sbi, 1);
+ f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
+ "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
ClearPageUptodate(page);
+ err = -EINVAL;
out_err:
f2fs_put_page(page, 1);
- return ERR_PTR(-EIO);
+ return ERR_PTR(err);
}
return page;
}
@@ -1373,15 +1381,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
up_read(&sbi->node_write);
if (wbc->for_reclaim) {
- f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
- page->index, NODE, WRITE);
+ f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
+ page->index, NODE);
submitted = NULL;
}
unlock_page(page);
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_write(sbi, NODE);
submitted = NULL;
}
if (submitted)
@@ -1518,8 +1526,7 @@ continue_unlock:
}
out:
if (last_idx != ULONG_MAX)
- f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
- NODE, WRITE);
+ f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
return ret ? -EIO: 0;
}
@@ -1625,7 +1632,7 @@ continue_unlock:
}
out:
if (nwritten)
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_write(sbi, NODE);
return ret;
}
@@ -1675,6 +1682,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct blk_plug plug;
long diff;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto skip_write;
+
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
@@ -2192,14 +2202,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
get_node_info(sbi, prev_xnid, &ni);
f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
invalidate_blocks(sbi, ni.blk_addr);
- dec_valid_node_count(sbi, inode);
+ dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
recover_xnid:
/* 2: update xattr nid in inode */
remove_free_nid(sbi, new_xnid);
f2fs_i_xnid_write(inode, new_xnid);
- if (unlikely(!inc_valid_node_count(sbi, inode)))
+ if (unlikely(inc_valid_node_count(sbi, inode, false)))
f2fs_bug_on(sbi, 1);
update_inode_page(inode);
@@ -2257,7 +2267,7 @@ retry:
new_ni = old_ni;
new_ni.ino = ino;
- if (unlikely(!inc_valid_node_count(sbi, NULL)))
+ if (unlikely(inc_valid_node_count(sbi, NULL, true)))
WARN_ON(1);
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
@@ -2424,8 +2434,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
nid_t nid = nat_get_nid(ne);
int offset;
- if (nat_get_blkaddr(ne) == NEW_ADDR)
- continue;
+ f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
if (to_journal) {
offset = lookup_journal_in_cursum(journal,
@@ -2553,7 +2562,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
}
-inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int i = 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 558048e33cf9..bb53e9955ff2 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -224,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
struct f2fs_nm_info *nm_i = NM_I(sbi);
block_addr -= nm_i->nat_blkaddr;
- if ((block_addr >> sbi->log_blocks_per_seg) % 2)
- block_addr -= sbi->blocks_per_seg;
- else
- block_addr += sbi->blocks_per_seg;
-
+ block_addr ^= 1 << sbi->log_blocks_per_seg;
return block_addr + nm_i->nat_blkaddr;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..f964b68718c1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -16,6 +16,7 @@
#include <linux/kthread.h>
#include <linux/swap.h>
#include <linux/timer.h>
+#include <linux/freezer.h>
#include "f2fs.h"
#include "segment.h"
@@ -312,7 +313,7 @@ static int __commit_inmem_pages(struct inode *inode,
fio.page = page;
fio.old_blkaddr = NULL_ADDR;
fio.encrypted_page = NULL;
- fio.need_lock = false,
+ fio.need_lock = LOCK_DONE;
err = do_write_data_page(&fio);
if (err) {
unlock_page(page);
@@ -328,8 +329,7 @@ static int __commit_inmem_pages(struct inode *inode,
}
if (last_idx != ULONG_MAX)
- f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
- DATA, WRITE);
+ f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
if (!err)
__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
if (SM_I(sbi)->fcc_info) {
fcc = SM_I(sbi)->fcc_info;
+ if (fcc->f2fs_issue_flush)
+ return err;
goto init_thread;
}
@@ -566,6 +568,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->fcc_info = fcc;
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return err;
+
init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
@@ -736,12 +741,15 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ f2fs_bug_on(sbi, dc->ref);
+
if (dc->error == -EOPNOTSUPP)
dc->error = 0;
if (dc->error)
f2fs_msg(sbi->sb, KERN_INFO,
- "Issue discard failed, ret: %d", dc->error);
+ "Issue discard(%u, %u, %u) failed, ret: %d",
+ dc->lstart, dc->start, dc->len, dc->error);
__detach_discard_cmd(dcc, dc);
}
@@ -749,12 +757,36 @@ static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
- dc->error = bio->bi_error;
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
- complete(&dc->wait);
+ complete_all(&dc->wait);
bio_put(bio);
}
+void __check_sit_bitmap(struct f2fs_sb_info *sbi,
+ block_t start, block_t end)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+ struct seg_entry *sentry;
+ unsigned int segno;
+ block_t blk = start;
+ unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
+ unsigned long *map;
+
+ while (blk < end) {
+ segno = GET_SEGNO(sbi, blk);
+ sentry = get_seg_entry(sbi, segno);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
+
+ size = min((unsigned long)(end - blk), max_blocks);
+ map = (unsigned long *)(sentry->cur_valid_map);
+ offset = __find_rev_next_bit(map, size, offset);
+ f2fs_bug_on(sbi, offset != size);
+ blk += size;
+ }
+#endif
+}
+
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
@@ -782,6 +814,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
list_move_tail(&dc->list, &dcc->wait_list);
+ __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
}
} else {
__remove_discard_cmd(sbi, dc);
@@ -838,7 +871,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = blkaddr - dc->lstart;
dcc->undiscard_blks += dc->len;
__relocate_discard_cmd(dcc, dc);
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
modified = true;
}
@@ -848,16 +880,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
di.start + blkaddr + 1 - di.lstart,
di.lstart + di.len - 1 - blkaddr,
NULL, NULL);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
} else {
dc->lstart++;
dc->len--;
dc->start++;
dcc->undiscard_blks += dc->len;
__relocate_discard_cmd(dcc, dc);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
}
}
}
@@ -918,8 +946,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
di = prev_dc->di;
tdc = prev_dc;
merged = true;
@@ -935,16 +961,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
__relocate_discard_cmd(dcc, next_dc);
if (tdc)
__remove_discard_cmd(sbi, tdc);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
merged = true;
}
if (!merged) {
__insert_discard_tree(sbi, bdev, di.lstart, di.start,
di.len, NULL, NULL);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
}
next:
prev_dc = next_dc;
@@ -983,6 +1005,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
int i, iter = 0;
mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi,
+ !__check_rb_tree_consistence(sbi, &dcc->root));
blk_start_plug(&plug);
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
pend_list = &dcc->pend_list[i];
@@ -1000,22 +1024,47 @@ out:
mutex_unlock(&dcc->cmd_lock);
}
+static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+ struct discard_cmd *dc)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+ wait_for_completion_io(&dc->wait);
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, dc->state != D_DONE);
+ dc->ref--;
+ if (!dc->ref)
+ __remove_discard_cmd(sbi, dc);
+ mutex_unlock(&dcc->cmd_lock);
+}
+
static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = &(dcc->wait_list);
struct discard_cmd *dc, *tmp;
+ bool need_wait;
+
+next:
+ need_wait = false;
mutex_lock(&dcc->cmd_lock);
list_for_each_entry_safe(dc, tmp, wait_list, list) {
- if (!wait_cond || dc->state == D_DONE) {
- if (dc->ref)
- continue;
+ if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
wait_for_completion_io(&dc->wait);
__remove_discard_cmd(sbi, dc);
+ } else {
+ dc->ref++;
+ need_wait = true;
+ break;
}
}
mutex_unlock(&dcc->cmd_lock);
+
+ if (need_wait) {
+ __wait_one_discard_bio(sbi, dc);
+ goto next;
+ }
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1037,14 +1086,19 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
}
mutex_unlock(&dcc->cmd_lock);
- if (need_wait) {
- wait_for_completion_io(&dc->wait);
- mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, dc->state != D_DONE);
- dc->ref--;
- if (!dc->ref)
- __remove_discard_cmd(sbi, dc);
- mutex_unlock(&dcc->cmd_lock);
+ if (need_wait)
+ __wait_one_discard_bio(sbi, dc);
+}
+
+void stop_discard_thread(struct f2fs_sb_info *sbi)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+ if (dcc && dcc->f2fs_issue_discard) {
+ struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+ dcc->f2fs_issue_discard = NULL;
+ kthread_stop(discard_thread);
}
}
@@ -1060,18 +1114,24 @@ static int issue_discard_thread(void *data)
struct f2fs_sb_info *sbi = data;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
-repeat:
- if (kthread_should_stop())
- return 0;
- __issue_discard_cmd(sbi, true);
- __wait_discard_cmd(sbi, true);
+ set_freezable();
- congestion_wait(BLK_RW_SYNC, HZ/50);
+ do {
+ wait_event_interruptible(*q, kthread_should_stop() ||
+ freezing(current) ||
+ atomic_read(&dcc->discard_cmd_cnt));
+ if (try_to_freeze())
+ continue;
+ if (kthread_should_stop())
+ return 0;
- wait_event_interruptible(*q, kthread_should_stop() ||
- atomic_read(&dcc->discard_cmd_cnt));
- goto repeat;
+ __issue_discard_cmd(sbi, true);
+ __wait_discard_cmd(sbi, true);
+
+ congestion_wait(BLK_RW_SYNC, HZ/50);
+ } while (!kthread_should_stop());
+ return 0;
}
#ifdef CONFIG_BLK_DEV_ZONED
@@ -1322,7 +1382,8 @@ find_next:
sbi->blocks_per_seg, cur_pos);
len = next_pos - cur_pos;
- if (force && len < cpc->trim_minlen)
+ if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
+ (force && len < cpc->trim_minlen))
goto skip;
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
@@ -1398,12 +1459,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (!dcc)
return;
- if (dcc->f2fs_issue_discard) {
- struct task_struct *discard_thread = dcc->f2fs_issue_discard;
-
- dcc->f2fs_issue_discard = NULL;
- kthread_stop(discard_thread);
- }
+ stop_discard_thread(sbi);
kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
@@ -2040,66 +2096,80 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
return false;
}
-static int __get_segment_type_2(struct page *page, enum page_type p_type)
+static int __get_segment_type_2(struct f2fs_io_info *fio)
{
- if (p_type == DATA)
+ if (fio->type == DATA)
return CURSEG_HOT_DATA;
else
return CURSEG_HOT_NODE;
}
-static int __get_segment_type_4(struct page *page, enum page_type p_type)
+static int __get_segment_type_4(struct f2fs_io_info *fio)
{
- if (p_type == DATA) {
- struct inode *inode = page->mapping->host;
+ if (fio->type == DATA) {
+ struct inode *inode = fio->page->mapping->host;
if (S_ISDIR(inode->i_mode))
return CURSEG_HOT_DATA;
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(page) && is_cold_node(page))
+ if (IS_DNODE(fio->page) && is_cold_node(fio->page))
return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
}
}
-static int __get_segment_type_6(struct page *page, enum page_type p_type)
+static int __get_segment_type_6(struct f2fs_io_info *fio)
{
- if (p_type == DATA) {
- struct inode *inode = page->mapping->host;
+ if (fio->type == DATA) {
+ struct inode *inode = fio->page->mapping->host;
- if (is_cold_data(page) || file_is_cold(inode))
+ if (is_cold_data(fio->page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
if (is_inode_flag_set(inode, FI_HOT_DATA))
return CURSEG_HOT_DATA;
return CURSEG_WARM_DATA;
} else {
- if (IS_DNODE(page))
- return is_cold_node(page) ? CURSEG_WARM_NODE :
+ if (IS_DNODE(fio->page))
+ return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
return CURSEG_COLD_NODE;
}
}
-static int __get_segment_type(struct page *page, enum page_type p_type)
+static int __get_segment_type(struct f2fs_io_info *fio)
{
- switch (F2FS_P_SB(page)->active_logs) {
+ int type = 0;
+
+ switch (fio->sbi->active_logs) {
case 2:
- return __get_segment_type_2(page, p_type);
+ type = __get_segment_type_2(fio);
+ break;
case 4:
- return __get_segment_type_4(page, p_type);
+ type = __get_segment_type_4(fio);
+ break;
+ case 6:
+ type = __get_segment_type_6(fio);
+ break;
+ default:
+ f2fs_bug_on(fio->sbi, true);
}
- /* NR_CURSEG_TYPE(6) logs by default */
- f2fs_bug_on(F2FS_P_SB(page),
- F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
- return __get_segment_type_6(page, p_type);
+
+ if (IS_HOT(type))
+ fio->temp = HOT;
+ else if (IS_WARM(type))
+ fio->temp = WARM;
+ else
+ fio->temp = COLD;
+ return type;
}
void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
- struct f2fs_summary *sum, int type)
+ struct f2fs_summary *sum, int type,
+ struct f2fs_io_info *fio, bool add_list)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2135,29 +2205,35 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (page && IS_NODESEG(type))
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
+ if (add_list) {
+ struct f2fs_bio_info *io;
+
+ INIT_LIST_HEAD(&fio->list);
+ fio->in_list = true;
+ io = sbi->write_io[fio->type] + fio->temp;
+ spin_lock(&io->io_lock);
+ list_add_tail(&fio->list, &io->io_list);
+ spin_unlock(&io->io_lock);
+ }
+
mutex_unlock(&curseg->curseg_mutex);
}
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
- int type = __get_segment_type(fio->page, fio->type);
+ int type = __get_segment_type(fio);
int err;
- if (fio->type == NODE || fio->type == DATA)
- mutex_lock(&fio->sbi->wio_mutex[fio->type]);
reallocate:
allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type);
+ &fio->new_blkaddr, sum, type, fio, true);
/* writeout dirty page into bdev */
- err = f2fs_submit_page_mbio(fio);
+ err = f2fs_submit_page_write(fio);
if (err == -EAGAIN) {
fio->old_blkaddr = fio->new_blkaddr;
goto reallocate;
}
-
- if (fio->type == NODE || fio->type == DATA)
- mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
}
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2171,13 +2247,14 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
.new_blkaddr = page->index,
.page = page,
.encrypted_page = NULL,
+ .in_list = false,
};
if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
fio.op_flags &= ~REQ_META;
set_page_writeback(page);
- f2fs_submit_page_mbio(&fio);
+ f2fs_submit_page_write(&fio);
}
void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
@@ -2296,8 +2373,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
- 0, page->index, type, WRITE);
+ f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+ 0, page->index, type);
if (ordered)
wait_on_page_writeback(page);
else
@@ -2455,6 +2532,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
+ struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
+ struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
int type = CURSEG_HOT_DATA;
int err;
@@ -2481,6 +2560,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
return err;
}
+ /* sanity check for summary blocks */
+ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ return -EINVAL;
+
return 0;
}
@@ -3203,7 +3287,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sm_info->sit_entry_set);
- if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ if (!f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
return err;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 010f336a7573..6b871b492fd5 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -27,6 +27,10 @@
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
+#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
+#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
+#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
+
#define IS_CURSEG(sbi, seg) \
(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0b89b0b7b9f7..32e4c025e97e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -22,6 +22,7 @@
#include <linux/random.h>
#include <linux/exportfs.h>
#include <linux/blkdev.h>
+#include <linux/quotaops.h>
#include <linux/f2fs_fs.h>
#include <linux/sysfs.h>
@@ -35,9 +36,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
-static struct proc_dir_entry *f2fs_proc_root;
static struct kmem_cache *f2fs_inode_cachep;
-static struct kset *f2fs_kset;
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -108,6 +107,8 @@ enum {
Opt_fault_injection,
Opt_lazytime,
Opt_nolazytime,
+ Opt_usrquota,
+ Opt_grpquota,
Opt_err,
};
@@ -143,212 +144,11 @@ static match_table_t f2fs_tokens = {
{Opt_fault_injection, "fault_injection=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
+ {Opt_usrquota, "usrquota"},
+ {Opt_grpquota, "grpquota"},
{Opt_err, NULL},
};
-/* Sysfs support for f2fs */
-enum {
- GC_THREAD, /* struct f2fs_gc_thread */
- SM_INFO, /* struct f2fs_sm_info */
- DCC_INFO, /* struct discard_cmd_control */
- NM_INFO, /* struct f2fs_nm_info */
- F2FS_SBI, /* struct f2fs_sb_info */
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- FAULT_INFO_RATE, /* struct f2fs_fault_info */
- FAULT_INFO_TYPE, /* struct f2fs_fault_info */
-#endif
-};
-
-struct f2fs_attr {
- struct attribute attr;
- ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
- ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
- const char *, size_t);
- int struct_type;
- int offset;
-};
-
-static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
-{
- if (struct_type == GC_THREAD)
- return (unsigned char *)sbi->gc_thread;
- else if (struct_type == SM_INFO)
- return (unsigned char *)SM_I(sbi);
- else if (struct_type == DCC_INFO)
- return (unsigned char *)SM_I(sbi)->dcc_info;
- else if (struct_type == NM_INFO)
- return (unsigned char *)NM_I(sbi);
- else if (struct_type == F2FS_SBI)
- return (unsigned char *)sbi;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- else if (struct_type == FAULT_INFO_RATE ||
- struct_type == FAULT_INFO_TYPE)
- return (unsigned char *)&sbi->fault_info;
-#endif
- return NULL;
-}
-
-static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->sb;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
-
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)(sbi->kbytes_written +
- BD_PART_WRITTEN(sbi)));
-}
-
-static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
-{
- unsigned char *ptr = NULL;
- unsigned int *ui;
-
- ptr = __struct_ptr(sbi, a->struct_type);
- if (!ptr)
- return -EINVAL;
-
- ui = (unsigned int *)(ptr + a->offset);
-
- return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
-}
-
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned char *ptr;
- unsigned long t;
- unsigned int *ui;
- ssize_t ret;
-
- ptr = __struct_ptr(sbi, a->struct_type);
- if (!ptr)
- return -EINVAL;
-
- ui = (unsigned int *)(ptr + a->offset);
-
- ret = kstrtoul(skip_spaces(buf), 0, &t);
- if (ret < 0)
- return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
- return -EINVAL;
-#endif
- *ui = t;
- return count;
-}
-
-static ssize_t f2fs_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
- s_kobj);
- struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
- return a->show ? a->show(a, sbi, buf) : 0;
-}
-
-static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
- s_kobj);
- struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
- return a->store ? a->store(a, sbi, buf, len) : 0;
-}
-
-static void f2fs_sb_release(struct kobject *kobj)
-{
- struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
- s_kobj);
- complete(&sbi->s_kobj_unregister);
-}
-
-#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
-static struct f2fs_attr f2fs_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
- .struct_type = _struct_type, \
- .offset = _offset \
-}
-
-#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
- F2FS_ATTR_OFFSET(struct_type, name, 0644, \
- f2fs_sbi_show, f2fs_sbi_store, \
- offsetof(struct struct_name, elname))
-
-#define F2FS_GENERAL_RO_ATTR(name) \
-static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
-
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
-F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
-#endif
-F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
-
-#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
-static struct attribute *f2fs_attrs[] = {
- ATTR_LIST(gc_min_sleep_time),
- ATTR_LIST(gc_max_sleep_time),
- ATTR_LIST(gc_no_gc_sleep_time),
- ATTR_LIST(gc_idle),
- ATTR_LIST(reclaim_segments),
- ATTR_LIST(max_small_discards),
- ATTR_LIST(batched_trim_sections),
- ATTR_LIST(ipu_policy),
- ATTR_LIST(min_ipu_util),
- ATTR_LIST(min_fsync_blocks),
- ATTR_LIST(min_hot_blocks),
- ATTR_LIST(max_victim_search),
- ATTR_LIST(dir_level),
- ATTR_LIST(ram_thresh),
- ATTR_LIST(ra_nid_pages),
- ATTR_LIST(dirty_nats_ratio),
- ATTR_LIST(cp_interval),
- ATTR_LIST(idle_interval),
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- ATTR_LIST(inject_rate),
- ATTR_LIST(inject_type),
-#endif
- ATTR_LIST(lifetime_write_kbytes),
- NULL,
-};
-
-static const struct sysfs_ops f2fs_attr_ops = {
- .show = f2fs_attr_show,
- .store = f2fs_attr_store,
-};
-
-static struct kobj_type f2fs_ktype = {
- .default_attrs = f2fs_attrs,
- .sysfs_ops = &f2fs_attr_ops,
- .release = f2fs_sb_release,
-};
-
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
{
struct va_format vaf;
@@ -585,6 +385,20 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_nolazytime:
sb->s_flags &= ~MS_LAZYTIME;
break;
+#ifdef CONFIG_QUOTA
+ case Opt_usrquota:
+ set_opt(sbi, USRQUOTA);
+ break;
+ case Opt_grpquota:
+ set_opt(sbi, GRPQUOTA);
+ break;
+#else
+ case Opt_usrquota:
+ case Opt_grpquota:
+ f2fs_msg(sb, KERN_INFO,
+ "quota operations not supported");
+ break;
+#endif
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -624,7 +438,12 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->dio_rwsem[READ]);
init_rwsem(&fi->dio_rwsem[WRITE]);
+ init_rwsem(&fi->i_mmap_sem);
+#ifdef CONFIG_QUOTA
+ memset(&fi->i_dquot, 0, sizeof(fi->i_dquot));
+ fi->i_reserved_quota = 0;
+#endif
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
return &fi->vfs_inode;
@@ -765,18 +584,13 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
kfree(sbi->devs);
}
+static void f2fs_quota_off_umount(struct super_block *sb);
static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i;
- if (sbi->s_proc) {
- remove_proc_entry("segment_info", sbi->s_proc);
- remove_proc_entry("segment_bits", sbi->s_proc);
- remove_proc_entry(sb->s_id, f2fs_proc_root);
- }
- kobject_del(&sbi->s_kobj);
-
- stop_gc_thread(sbi);
+ f2fs_quota_off_umount(sb);
/* prevent remaining shrinker jobs */
mutex_lock(&sbi->umount_mutex);
@@ -797,7 +611,7 @@ static void f2fs_put_super(struct super_block *sb)
/* be sure to wait for any on-going discard commands */
f2fs_wait_discard_bios(sbi);
- if (!sbi->discard_blks) {
+ if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
@@ -817,7 +631,7 @@ static void f2fs_put_super(struct super_block *sb)
mutex_unlock(&sbi->umount_mutex);
/* our cp_error case, we can wait for any writeback page */
- f2fs_flush_merged_bios(sbi);
+ f2fs_flush_merged_writes(sbi);
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -827,8 +641,8 @@ static void f2fs_put_super(struct super_block *sb)
destroy_segment_manager(sbi);
kfree(sbi->ckpt);
- kobject_put(&sbi->s_kobj);
- wait_for_completion(&sbi->s_kobj_unregister);
+
+ f2fs_exit_sysfs(sbi);
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
@@ -838,6 +652,8 @@ static void f2fs_put_super(struct super_block *sb)
destroy_device_list(sbi);
mempool_destroy(sbi->write_io_dummy);
destroy_percpu_info(sbi);
+ for (i = 0; i < NR_PAGE_TYPE; i++)
+ kfree(sbi->write_io[i]);
kfree(sbi);
}
@@ -888,6 +704,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
block_t total_count, user_block_count, start_count, ovp_count;
+ u64 avail_node_count;
total_count = le64_to_cpu(sbi->raw_super->block_count);
user_block_count = sbi->user_block_count;
@@ -898,11 +715,19 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
- buf->f_bavail = user_block_count - valid_user_blocks(sbi);
+ buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
+ sbi->reserved_blocks;
- buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
- buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
- buf->f_bavail);
+ avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+
+ if (avail_node_count > user_block_count) {
+ buf->f_files = user_block_count;
+ buf->f_ffree = buf->f_bavail;
+ } else {
+ buf->f_files = avail_node_count;
+ buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
+ buf->f_bavail);
+ }
buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
@@ -980,79 +805,19 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION))
- seq_puts(seq, ",fault_injection");
+ seq_printf(seq, ",fault_injection=%u",
+ sbi->fault_info.inject_rate);
+#endif
+#ifdef CONFIG_QUOTA
+ if (test_opt(sbi, USRQUOTA))
+ seq_puts(seq, ",usrquota");
+ if (test_opt(sbi, GRPQUOTA))
+ seq_puts(seq, ",grpquota");
#endif
return 0;
}
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
-{
- struct super_block *sb = seq->private;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
- unsigned int total_segs =
- le32_to_cpu(sbi->raw_super->segment_count_main);
- int i;
-
- seq_puts(seq, "format: segment_type|valid_blocks\n"
- "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
- for (i = 0; i < total_segs; i++) {
- struct seg_entry *se = get_seg_entry(sbi, i);
-
- if ((i % 10) == 0)
- seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u", se->type,
- get_valid_blocks(sbi, i, false));
- if ((i % 10) == 9 || i == (total_segs - 1))
- seq_putc(seq, '\n');
- else
- seq_putc(seq, ' ');
- }
-
- return 0;
-}
-
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
-{
- struct super_block *sb = seq->private;
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
- unsigned int total_segs =
- le32_to_cpu(sbi->raw_super->segment_count_main);
- int i, j;
-
- seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
- "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
- for (i = 0; i < total_segs; i++) {
- struct seg_entry *se = get_seg_entry(sbi, i);
-
- seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u|", se->type,
- get_valid_blocks(sbi, i, false));
- for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
- seq_printf(seq, " %.2x", se->cur_valid_map[j]);
- seq_putc(seq, '\n');
- }
- return 0;
-}
-
-#define F2FS_PROC_FILE_DEF(_name) \
-static int _name##_open_fs(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
-} \
- \
-static const struct file_operations f2fs_seq_##_name##_fops = { \
- .open = _name##_open_fs, \
- .read = seq_read, \
- .llseek = seq_lseek, \
- .release = single_release, \
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-
static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
@@ -1089,6 +854,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
+ unsigned long old_sb_flags;
int err, active_logs;
bool need_restart_gc = false;
bool need_stop_gc = false;
@@ -1102,6 +868,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* need to restore them.
*/
org_mount_opt = sbi->mount_opt;
+ old_sb_flags = sb->s_flags;
active_logs = sbi->active_logs;
/* recover superblocks we couldn't write due to previous RO mount */
@@ -1113,7 +880,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
- sbi->mount_opt.opt = 0;
default_options(sbi);
/* parse mount options */
@@ -1128,6 +894,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
+ if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
+ err = dquot_suspend(sb, -1);
+ if (err < 0)
+ goto restore_opts;
+ } else {
+ /* dquot_resume needs RW */
+ sb->s_flags &= ~MS_RDONLY;
+ dquot_resume(sb, -1);
+ }
+
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -1192,12 +968,237 @@ restore_gc:
restore_opts:
sbi->mount_opt = org_mount_opt;
sbi->active_logs = active_logs;
+ sb->s_flags = old_sb_flags;
#ifdef CONFIG_F2FS_FAULT_INJECTION
sbi->fault_info = ffi;
#endif
return err;
}
+#ifdef CONFIG_QUOTA
+/* Read data from quotafile */
+static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ struct address_space *mapping = inode->i_mapping;
+ block_t blkidx = F2FS_BYTES_TO_BLK(off);
+ int offset = off & (sb->s_blocksize - 1);
+ int tocopy;
+ size_t toread;
+ loff_t i_size = i_size_read(inode);
+ struct page *page;
+ char *kaddr;
+
+ if (off > i_size)
+ return 0;
+
+ if (off + len > i_size)
+ len = i_size - off;
+ toread = len;
+ while (toread > 0) {
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
+repeat:
+ page = read_mapping_page(mapping, blkidx, NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ lock_page(page);
+
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return -EIO;
+ }
+
+ kaddr = kmap_atomic(page);
+ memcpy(data, kaddr + offset, tocopy);
+ kunmap_atomic(kaddr);
+ f2fs_put_page(page, 1);
+
+ offset = 0;
+ toread -= tocopy;
+ data += tocopy;
+ blkidx++;
+ }
+ return len;
+}
+
+/* Write to quotafile */
+static ssize_t f2fs_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ struct address_space *mapping = inode->i_mapping;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ int offset = off & (sb->s_blocksize - 1);
+ size_t towrite = len;
+ struct page *page;
+ char *kaddr;
+ int err = 0;
+ int tocopy;
+
+ while (towrite > 0) {
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset,
+ towrite);
+
+ err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
+ &page, NULL);
+ if (unlikely(err))
+ break;
+
+ kaddr = kmap_atomic(page);
+ memcpy(kaddr + offset, data, tocopy);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page);
+
+ a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
+ page, NULL);
+ offset = 0;
+ towrite -= tocopy;
+ off += tocopy;
+ data += tocopy;
+ cond_resched();
+ }
+
+ if (len == towrite)
+ return err;
+ inode->i_version++;
+ inode->i_mtime = inode->i_ctime = current_time(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
+ return len - towrite;
+}
+
+static struct dquot **f2fs_get_dquots(struct inode *inode)
+{
+ return F2FS_I(inode)->i_dquot;
+}
+
+static qsize_t *f2fs_get_reserved_space(struct inode *inode)
+{
+ return &F2FS_I(inode)->i_reserved_quota;
+}
+
+static int f2fs_quota_sync(struct super_block *sb, int type)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int cnt;
+ int ret;
+
+ ret = dquot_writeback_dquots(sb, type);
+ if (ret)
+ return ret;
+
+ /*
+ * Now when everything is written we can discard the pagecache so
+ * that userspace sees the changes.
+ */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+
+ ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+ if (ret)
+ return ret;
+
+ inode_lock(dqopt->files[cnt]);
+ truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
+ inode_unlock(dqopt->files[cnt]);
+ }
+ return 0;
+}
+
+static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
+ const struct path *path)
+{
+ struct inode *inode;
+ int err;
+
+ err = f2fs_quota_sync(sb, -1);
+ if (err)
+ return err;
+
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (err)
+ return err;
+
+ inode = d_inode(path->dentry);
+
+ inode_lock(inode);
+ F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
+ inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+ S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
+
+ return 0;
+}
+
+static int f2fs_quota_off(struct super_block *sb, int type)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ int err;
+
+ if (!inode || !igrab(inode))
+ return dquot_quota_off(sb, type);
+
+ f2fs_quota_sync(sb, -1);
+
+ err = dquot_quota_off(sb, type);
+ if (err)
+ goto out_put;
+
+ inode_lock(inode);
+ F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
+ inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
+out_put:
+ iput(inode);
+ return err;
+}
+
+static void f2fs_quota_off_umount(struct super_block *sb)
+{
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ f2fs_quota_off(sb, type);
+}
+
+static const struct dquot_operations f2fs_quota_operations = {
+ .get_reserved_space = f2fs_get_reserved_space,
+ .write_dquot = dquot_commit,
+ .acquire_dquot = dquot_acquire,
+ .release_dquot = dquot_release,
+ .mark_dirty = dquot_mark_dquot_dirty,
+ .write_info = dquot_commit_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
+ .get_next_id = dquot_get_next_id,
+};
+
+static const struct quotactl_ops f2fs_quotactl_ops = {
+ .quota_on = f2fs_quota_on,
+ .quota_off = f2fs_quota_off,
+ .quota_sync = f2fs_quota_sync,
+ .get_state = dquot_get_state,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
+};
+#else
+static inline void f2fs_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
static struct super_operations f2fs_sops = {
.alloc_inode = f2fs_alloc_inode,
.drop_inode = f2fs_drop_inode,
@@ -1205,6 +1206,11 @@ static struct super_operations f2fs_sops = {
.write_inode = f2fs_write_inode,
.dirty_inode = f2fs_dirty_inode,
.show_options = f2fs_show_options,
+#ifdef CONFIG_QUOTA
+ .quota_read = f2fs_quota_read,
+ .quota_write = f2fs_quota_write,
+ .get_dquots = f2fs_get_dquots,
+#endif
.evict_inode = f2fs_evict_inode,
.put_super = f2fs_put_super,
.sync_fs = f2fs_sync_fs,
@@ -1521,6 +1527,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
+ unsigned int main_segs, blocks_per_seg;
+ int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1542,6 +1550,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ main_segs = le32_to_cpu(raw_super->segment_count_main);
+ blocks_per_seg = sbi->blocks_per_seg;
+
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
+ le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
+ return 1;
+ }
+ for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+ if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
+ le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -1552,7 +1574,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
- int i;
+ int i, j;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1584,8 +1606,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- mutex_init(&sbi->wio_mutex[NODE]);
- mutex_init(&sbi->wio_mutex[DATA]);
+ for (i = 0; i < NR_PAGE_TYPE - 1; i++)
+ for (j = HOT; j < NR_TEMP_TYPE; j++)
+ mutex_init(&sbi->wio_mutex[i][j]);
spin_lock_init(&sbi->cp_lock);
}
@@ -1908,6 +1931,7 @@ try_onemore:
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
+ err = -EOPNOTSUPP;
goto free_sb_buf;
}
#endif
@@ -1929,6 +1953,12 @@ try_onemore:
sb->s_max_links = F2FS_LINK_MAX;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+#ifdef CONFIG_QUOTA
+ sb->dq_op = &f2fs_quota_operations;
+ sb->s_qcop = &f2fs_quotactl_ops;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+#endif
+
sb->s_op = &f2fs_sops;
sb->s_cop = &f2fs_cryptops;
sb->s_xattr = f2fs_xattr_handlers;
@@ -1950,13 +1980,24 @@ try_onemore:
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
- init_rwsem(&sbi->read_io.io_rwsem);
- sbi->read_io.sbi = sbi;
- sbi->read_io.bio = NULL;
for (i = 0; i < NR_PAGE_TYPE; i++) {
- init_rwsem(&sbi->write_io[i].io_rwsem);
- sbi->write_io[i].sbi = sbi;
- sbi->write_io[i].bio = NULL;
+ int n = (i == META) ? 1: NR_TEMP_TYPE;
+ int j;
+
+ sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
+ GFP_KERNEL);
+ if (!sbi->write_io[i]) {
+ err = -ENOMEM;
+ goto free_options;
+ }
+
+ for (j = HOT; j < n; j++) {
+ init_rwsem(&sbi->write_io[i][j].io_rwsem);
+ sbi->write_io[i][j].sbi = sbi;
+ sbi->write_io[i][j].bio = NULL;
+ spin_lock_init(&sbi->write_io[i][j].io_lock);
+ INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
+ }
}
init_rwsem(&sbi->cp_rwsem);
@@ -1970,8 +2011,10 @@ try_onemore:
if (F2FS_IO_SIZE(sbi) > 1) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
- if (!sbi->write_io_dummy)
+ if (!sbi->write_io_dummy) {
+ err = -ENOMEM;
goto free_options;
+ }
}
/* get an inode for meta space */
@@ -2003,6 +2046,7 @@ try_onemore:
sbi->total_valid_block_count =
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
+ sbi->reserved_blocks = 0;
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2078,22 +2122,9 @@ try_onemore:
goto free_root_inode;
}
- if (f2fs_proc_root)
- sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
-
- if (sbi->s_proc) {
- proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_info_fops, sb);
- proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
- &f2fs_seq_segment_bits_fops, sb);
- }
-
- sbi->s_kobj.kset = f2fs_kset;
- init_completion(&sbi->s_kobj_unregister);
- err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
- "%s", sb->s_id);
+ err = f2fs_init_sysfs(sbi);
if (err)
- goto free_proc;
+ goto free_root_inode;
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2104,7 +2135,7 @@ try_onemore:
if (bdev_read_only(sb->s_bdev) &&
!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
err = -EROFS;
- goto free_kobj;
+ goto free_sysfs;
}
if (need_fsck)
@@ -2118,7 +2149,7 @@ try_onemore:
need_fsck = true;
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%d", err);
- goto free_kobj;
+ goto free_sysfs;
}
} else {
err = recover_fsync_data(sbi, true);
@@ -2127,7 +2158,7 @@ try_onemore:
err = -EINVAL;
f2fs_msg(sb, KERN_ERR,
"Need to recover fsync data");
- goto free_kobj;
+ goto free_sysfs;
}
}
skip_recovery:
@@ -2142,7 +2173,7 @@ skip_recovery:
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
- goto free_kobj;
+ goto free_sysfs;
}
kfree(options);
@@ -2160,17 +2191,9 @@ skip_recovery:
f2fs_update_time(sbi, REQ_TIME);
return 0;
-free_kobj:
+free_sysfs:
f2fs_sync_inode_meta(sbi);
- kobject_del(&sbi->s_kobj);
- kobject_put(&sbi->s_kobj);
- wait_for_completion(&sbi->s_kobj_unregister);
-free_proc:
- if (sbi->s_proc) {
- remove_proc_entry("segment_info", sbi->s_proc);
- remove_proc_entry("segment_bits", sbi->s_proc);
- remove_proc_entry(sb->s_id, f2fs_proc_root);
- }
+ f2fs_exit_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
@@ -2202,6 +2225,8 @@ free_meta_inode:
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_options:
+ for (i = 0; i < NR_PAGE_TYPE; i++)
+ kfree(sbi->write_io[i]);
destroy_percpu_info(sbi);
kfree(options);
free_sb_buf:
@@ -2228,8 +2253,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
- if (sb->s_root)
+ if (sb->s_root) {
set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
+ stop_gc_thread(F2FS_SB(sb));
+ stop_discard_thread(F2FS_SB(sb));
+ }
kill_block_super(sb);
}
@@ -2283,30 +2311,26 @@ static int __init init_f2fs_fs(void)
err = create_extent_cache();
if (err)
goto free_checkpoint_caches;
- f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
- if (!f2fs_kset) {
- err = -ENOMEM;
+ err = f2fs_register_sysfs();
+ if (err)
goto free_extent_cache;
- }
err = register_shrinker(&f2fs_shrinker_info);
if (err)
- goto free_kset;
-
+ goto free_sysfs;
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
err = f2fs_create_root_stats();
if (err)
goto free_filesystem;
- f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
free_filesystem:
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
-free_kset:
- kset_unregister(f2fs_kset);
+free_sysfs:
+ f2fs_unregister_sysfs();
free_extent_cache:
destroy_extent_cache();
free_checkpoint_caches:
@@ -2323,11 +2347,10 @@ fail:
static void __exit exit_f2fs_fs(void)
{
- remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
- kset_unregister(f2fs_kset);
+ f2fs_unregister_sysfs();
destroy_extent_cache();
destroy_checkpoint_caches();
destroy_segment_manager_caches();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
new file mode 100644
index 000000000000..9adc202fcd6f
--- /dev/null
+++ b/fs/f2fs/sysfs.c
@@ -0,0 +1,364 @@
+/*
+ * f2fs sysfs interface
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ * Copyright (c) 2017 Chao Yu <chao@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/proc_fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "segment.h"
+#include "gc.h"
+
+static struct proc_dir_entry *f2fs_proc_root;
+static struct kset *f2fs_kset;
+
+/* Sysfs support for f2fs */
+enum {
+ GC_THREAD, /* struct f2fs_gc_thread */
+ SM_INFO, /* struct f2fs_sm_info */
+ DCC_INFO, /* struct discard_cmd_control */
+ NM_INFO, /* struct f2fs_nm_info */
+ F2FS_SBI, /* struct f2fs_sb_info */
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ FAULT_INFO_RATE, /* struct f2fs_fault_info */
+ FAULT_INFO_TYPE, /* struct f2fs_fault_info */
+#endif
+ RESERVED_BLOCKS,
+};
+
+struct f2fs_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+ ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+ const char *, size_t);
+ int struct_type;
+ int offset;
+};
+
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+ if (struct_type == GC_THREAD)
+ return (unsigned char *)sbi->gc_thread;
+ else if (struct_type == SM_INFO)
+ return (unsigned char *)SM_I(sbi);
+ else if (struct_type == DCC_INFO)
+ return (unsigned char *)SM_I(sbi)->dcc_info;
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
+ else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS)
+ return (unsigned char *)sbi;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ else if (struct_type == FAULT_INFO_RATE ||
+ struct_type == FAULT_INFO_TYPE)
+ return (unsigned char *)&sbi->fault_info;
+#endif
+ return NULL;
+}
+
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct super_block *sb = sbi->sb;
+
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(sbi->kbytes_written +
+ BD_PART_WRITTEN(sbi)));
+}
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ unsigned char *ptr = NULL;
+ unsigned int *ui;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ unsigned char *ptr;
+ unsigned long t;
+ unsigned int *ui;
+ ssize_t ret;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret < 0)
+ return ret;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
+ return -EINVAL;
+#endif
+ if (a->struct_type == RESERVED_BLOCKS) {
+ spin_lock(&sbi->stat_lock);
+ if ((unsigned long)sbi->total_valid_block_count + t >
+ (unsigned long)sbi->user_block_count) {
+ spin_unlock(&sbi->stat_lock);
+ return -EINVAL;
+ }
+ *ui = t;
+ spin_unlock(&sbi->stat_lock);
+ return count;
+ }
+ *ui = t;
+ return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+static struct f2fs_attr f2fs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ .struct_type = _struct_type, \
+ .offset = _offset \
+}
+
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
+ F2FS_ATTR_OFFSET(struct_type, name, 0644, \
+ f2fs_sbi_show, f2fs_sbi_store, \
+ offsetof(struct struct_name, elname))
+
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
+F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
+#endif
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+ ATTR_LIST(gc_min_sleep_time),
+ ATTR_LIST(gc_max_sleep_time),
+ ATTR_LIST(gc_no_gc_sleep_time),
+ ATTR_LIST(gc_idle),
+ ATTR_LIST(reclaim_segments),
+ ATTR_LIST(max_small_discards),
+ ATTR_LIST(batched_trim_sections),
+ ATTR_LIST(ipu_policy),
+ ATTR_LIST(min_ipu_util),
+ ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_hot_blocks),
+ ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
+ ATTR_LIST(ra_nid_pages),
+ ATTR_LIST(dirty_nats_ratio),
+ ATTR_LIST(cp_interval),
+ ATTR_LIST(idle_interval),
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ ATTR_LIST(inject_rate),
+ ATTR_LIST(inject_type),
+#endif
+ ATTR_LIST(lifetime_write_kbytes),
+ ATTR_LIST(reserved_blocks),
+ NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+ .show = f2fs_attr_show,
+ .store = f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+ .default_attrs = f2fs_attrs,
+ .sysfs_ops = &f2fs_attr_ops,
+ .release = f2fs_sb_release,
+};
+
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned int total_segs =
+ le32_to_cpu(sbi->raw_super->segment_count_main);
+ int i;
+
+ seq_puts(seq, "format: segment_type|valid_blocks\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+ for (i = 0; i < total_segs; i++) {
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d|%-3u", se->type,
+ get_valid_blocks(sbi, i, false));
+ if ((i % 10) == 9 || i == (total_segs - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+
+ return 0;
+}
+
+static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned int total_segs =
+ le32_to_cpu(sbi->raw_super->segment_count_main);
+ int i, j;
+
+ seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+ for (i = 0; i < total_segs; i++) {
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d|%-3u|", se->type,
+ get_valid_blocks(sbi, i, false));
+ for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
+ seq_printf(seq, " %.2x", se->cur_valid_map[j]);
+ seq_putc(seq, '\n');
+ }
+ return 0;
+}
+
+#define F2FS_PROC_FILE_DEF(_name) \
+static int _name##_open_fs(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
+} \
+ \
+static const struct file_operations f2fs_seq_##_name##_fops = { \
+ .open = _name##_open_fs, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+};
+
+F2FS_PROC_FILE_DEF(segment_info);
+F2FS_PROC_FILE_DEF(segment_bits);
+
+int __init f2fs_register_sysfs(void)
+{
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+
+ f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+ if (!f2fs_kset)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_unregister_sysfs(void)
+{
+ kset_unregister(f2fs_kset);
+ remove_proc_entry("fs/f2fs", NULL);
+}
+
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi)
+{
+ struct super_block *sb = sbi->sb;
+ int err;
+
+ if (f2fs_proc_root)
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+ if (sbi->s_proc) {
+ proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_segment_info_fops, sb);
+ proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_segment_bits_fops, sb);
+ }
+
+ sbi->s_kobj.kset = f2fs_kset;
+ init_completion(&sbi->s_kobj_unregister);
+ err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto err_out;
+ return 0;
+err_out:
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ return err;
+}
+
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi)
+{
+ kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
+ }
+}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..3b01b646e528 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -109,20 +109,34 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
}
EXPORT_SYMBOL(__f_setown);
-void f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, unsigned long arg, int force)
{
enum pid_type type;
- struct pid *pid;
- int who = arg;
+ struct pid *pid = NULL;
+ int who = arg, ret = 0;
+
type = PIDTYPE_PID;
if (who < 0) {
+ /* avoid overflow below */
+ if (who == INT_MIN)
+ return -EINVAL;
+
type = PIDTYPE_PGID;
who = -who;
}
+
rcu_read_lock();
- pid = find_vpid(who);
- __f_setown(filp, pid, type, force);
+ if (who) {
+ pid = find_vpid(who);
+ if (!pid)
+ ret = -ESRCH;
+ }
+
+ if (!ret)
+ __f_setown(filp, pid, type, force);
rcu_read_unlock();
+
+ return ret;
}
EXPORT_SYMBOL(f_setown);
@@ -243,9 +257,72 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
+static bool rw_hint_valid(enum rw_hint hint)
+{
+ switch (hint) {
+ case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 *argp = (u64 __user *)arg;
+ enum rw_hint hint;
+ u64 h;
+
+ switch (cmd) {
+ case F_GET_FILE_RW_HINT:
+ h = file_write_hint(file);
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_FILE_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ spin_lock(&file->f_lock);
+ file->f_write_hint = hint;
+ spin_unlock(&file->f_lock);
+ return 0;
+ case F_GET_RW_HINT:
+ h = inode->i_write_hint;
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
+ void __user *argp = (void __user *)arg;
+ struct flock flock;
long err = -EINVAL;
switch (cmd) {
@@ -273,7 +350,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_OFD_GETLK:
#endif
case F_GETLK:
- err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
+ if (copy_from_user(&flock, argp, sizeof(flock)))
+ return -EFAULT;
+ err = fcntl_getlk(filp, cmd, &flock);
+ if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+ return -EFAULT;
break;
#if BITS_PER_LONG != 32
/* 32-bit arches must use fcntl64() */
@@ -283,7 +364,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
/* Fallthrough */
case F_SETLK:
case F_SETLKW:
- err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
+ if (copy_from_user(&flock, argp, sizeof(flock)))
+ return -EFAULT;
+ err = fcntl_setlk(fd, filp, cmd, &flock);
break;
case F_GETOWN:
/*
@@ -297,8 +380,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
force_successful_syscall_return();
break;
case F_SETOWN:
- f_setown(filp, arg, 1);
- err = 0;
+ err = f_setown(filp, arg, 1);
break;
case F_GETOWN_EX:
err = f_getown_ex(filp, arg);
@@ -337,6 +419,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GET_SEALS:
err = shmem_fcntl(filp, cmd, arg);
break;
+ case F_GET_RW_HINT:
+ case F_SET_RW_HINT:
+ case F_GET_FILE_RW_HINT:
+ case F_SET_FILE_RW_HINT:
+ err = fcntl_rw_hint(filp, cmd, arg);
+ break;
default:
break;
}
@@ -383,7 +471,9 @@ out:
SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
unsigned long, arg)
{
+ void __user *argp = (void __user *)arg;
struct fd f = fdget_raw(fd);
+ struct flock64 flock;
long err = -EBADF;
if (!f.file)
@@ -401,14 +491,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
switch (cmd) {
case F_GETLK64:
case F_OFD_GETLK:
- err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+ err = -EFAULT;
+ if (copy_from_user(&flock, argp, sizeof(flock)))
+ break;
+ err = fcntl_getlk64(f.file, cmd, &flock);
+ if (!err && copy_to_user(argp, &flock, sizeof(flock)))
+ err = -EFAULT;
break;
case F_SETLK64:
case F_SETLKW64:
case F_OFD_SETLK:
case F_OFD_SETLKW:
- err = fcntl_setlk64(fd, f.file, cmd,
- (struct flock64 __user *) arg);
+ err = -EFAULT;
+ if (copy_from_user(&flock, argp, sizeof(flock)))
+ break;
+ err = fcntl_setlk64(fd, f.file, cmd, &flock);
break;
default:
err = do_fcntl(fd, cmd, arg, f.file);
@@ -422,57 +519,56 @@ out:
#endif
#ifdef CONFIG_COMPAT
-static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
-{
- if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
- __get_user(kfl->l_type, &ufl->l_type) ||
- __get_user(kfl->l_whence, &ufl->l_whence) ||
- __get_user(kfl->l_start, &ufl->l_start) ||
- __get_user(kfl->l_len, &ufl->l_len) ||
- __get_user(kfl->l_pid, &ufl->l_pid))
+/* careful - don't use anywhere else */
+#define copy_flock_fields(dst, src) \
+ (dst)->l_type = (src)->l_type; \
+ (dst)->l_whence = (src)->l_whence; \
+ (dst)->l_start = (src)->l_start; \
+ (dst)->l_len = (src)->l_len; \
+ (dst)->l_pid = (src)->l_pid;
+
+static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
+{
+ struct compat_flock fl;
+
+ if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
return -EFAULT;
+ copy_flock_fields(kfl, &fl);
return 0;
}
-static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
+static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
{
- if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
- __put_user(kfl->l_type, &ufl->l_type) ||
- __put_user(kfl->l_whence, &ufl->l_whence) ||
- __put_user(kfl->l_start, &ufl->l_start) ||
- __put_user(kfl->l_len, &ufl->l_len) ||
- __put_user(kfl->l_pid, &ufl->l_pid))
+ struct compat_flock64 fl;
+
+ if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
return -EFAULT;
+ copy_flock_fields(kfl, &fl);
return 0;
}
-#ifndef HAVE_ARCH_GET_COMPAT_FLOCK64
-static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
+static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
{
- if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
- __get_user(kfl->l_type, &ufl->l_type) ||
- __get_user(kfl->l_whence, &ufl->l_whence) ||
- __get_user(kfl->l_start, &ufl->l_start) ||
- __get_user(kfl->l_len, &ufl->l_len) ||
- __get_user(kfl->l_pid, &ufl->l_pid))
+ struct compat_flock fl;
+
+ memset(&fl, 0, sizeof(struct compat_flock));
+ copy_flock_fields(&fl, kfl);
+ if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
return -EFAULT;
return 0;
}
-#endif
-#ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64
-static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
+static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
{
- if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
- __put_user(kfl->l_type, &ufl->l_type) ||
- __put_user(kfl->l_whence, &ufl->l_whence) ||
- __put_user(kfl->l_start, &ufl->l_start) ||
- __put_user(kfl->l_len, &ufl->l_len) ||
- __put_user(kfl->l_pid, &ufl->l_pid))
+ struct compat_flock64 fl;
+
+ memset(&fl, 0, sizeof(struct compat_flock64));
+ copy_flock_fields(&fl, kfl);
+ if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
return -EFAULT;
return 0;
}
-#endif
+#undef copy_flock_fields
static unsigned int
convert_fcntl_cmd(unsigned int cmd)
@@ -489,76 +585,92 @@ convert_fcntl_cmd(unsigned int cmd)
return cmd;
}
+/*
+ * GETLK was successful and we need to return the data, but it needs to fit in
+ * the compat structure.
+ * l_start shouldn't be too big, unless the original start + end is greater than
+ * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
+ * -EOVERFLOW in that case. l_len could be too big, in which case we just
+ * truncate it, and only allow the app to see that part of the conflicting lock
+ * that might make sense to it anyway
+ */
+static int fixup_compat_flock(struct flock *flock)
+{
+ if (flock->l_start > COMPAT_OFF_T_MAX)
+ return -EOVERFLOW;
+ if (flock->l_len > COMPAT_OFF_T_MAX)
+ flock->l_len = COMPAT_OFF_T_MAX;
+ return 0;
+}
+
COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
compat_ulong_t, arg)
{
- mm_segment_t old_fs;
- struct flock f;
- long ret;
- unsigned int conv_cmd;
+ struct fd f = fdget_raw(fd);
+ struct flock flock;
+ long err = -EBADF;
+
+ if (!f.file)
+ return err;
+
+ if (unlikely(f.file->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd))
+ goto out_put;
+ }
+
+ err = security_file_fcntl(f.file, cmd, arg);
+ if (err)
+ goto out_put;
switch (cmd) {
case F_GETLK:
+ err = get_compat_flock(&flock, compat_ptr(arg));
+ if (err)
+ break;
+ err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
+ if (err)
+ break;
+ err = fixup_compat_flock(&flock);
+ if (err)
+ return err;
+ err = put_compat_flock(&flock, compat_ptr(arg));
+ break;
+ case F_GETLK64:
+ case F_OFD_GETLK:
+ err = get_compat_flock64(&flock, compat_ptr(arg));
+ if (err)
+ break;
+ err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
+ if (err)
+ break;
+ err = fixup_compat_flock(&flock);
+ if (err)
+ return err;
+ err = put_compat_flock64(&flock, compat_ptr(arg));
+ break;
case F_SETLK:
case F_SETLKW:
- ret = get_compat_flock(&f, compat_ptr(arg));
- if (ret != 0)
+ err = get_compat_flock(&flock, compat_ptr(arg));
+ if (err)
break;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- ret = sys_fcntl(fd, cmd, (unsigned long)&f);
- set_fs(old_fs);
- if (cmd == F_GETLK && ret == 0) {
- /* GETLK was successful and we need to return the data...
- * but it needs to fit in the compat structure.
- * l_start shouldn't be too big, unless the original
- * start + end is greater than COMPAT_OFF_T_MAX, in which
- * case the app was asking for trouble, so we return
- * -EOVERFLOW in that case.
- * l_len could be too big, in which case we just truncate it,
- * and only allow the app to see that part of the conflicting
- * lock that might make sense to it anyway
- */
-
- if (f.l_start > COMPAT_OFF_T_MAX)
- ret = -EOVERFLOW;
- if (f.l_len > COMPAT_OFF_T_MAX)
- f.l_len = COMPAT_OFF_T_MAX;
- if (ret == 0)
- ret = put_compat_flock(&f, compat_ptr(arg));
- }
+ err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
break;
-
- case F_GETLK64:
case F_SETLK64:
case F_SETLKW64:
- case F_OFD_GETLK:
case F_OFD_SETLK:
case F_OFD_SETLKW:
- ret = get_compat_flock64(&f, compat_ptr(arg));
- if (ret != 0)
+ err = get_compat_flock64(&flock, compat_ptr(arg));
+ if (err)
break;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- conv_cmd = convert_fcntl_cmd(cmd);
- ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
- set_fs(old_fs);
- if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
- /* need to return lock information - see above for commentary */
- if (f.l_start > COMPAT_LOFF_T_MAX)
- ret = -EOVERFLOW;
- if (f.l_len > COMPAT_LOFF_T_MAX)
- f.l_len = COMPAT_LOFF_T_MAX;
- if (ret == 0)
- ret = put_compat_flock64(&f, compat_ptr(arg));
- }
+ err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
break;
-
default:
- ret = sys_fcntl(fd, cmd, arg);
+ err = do_fcntl(fd, cmd, arg, f.file);
break;
}
- return ret;
+out_put:
+ fdput(f);
+ return err;
}
COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
diff --git a/fs/file.c b/fs/file.c
index 1c2972e3a405..1fc7fbbb4510 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -30,21 +30,6 @@ unsigned int sysctl_nr_open_min = BITS_PER_LONG;
unsigned int sysctl_nr_open_max =
__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
-static void *alloc_fdmem(size_t size)
-{
- /*
- * Very large allocations can stress page reclaim, so fall back to
- * vmalloc() if the allocation size will be considered "large" by the VM.
- */
- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *data = kmalloc(size, GFP_KERNEL_ACCOUNT |
- __GFP_NOWARN | __GFP_NORETRY);
- if (data != NULL)
- return data;
- }
- return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
-}
-
static void __free_fdtable(struct fdtable *fdt)
{
kvfree(fdt->fd);
@@ -131,13 +116,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
if (!fdt)
goto out;
fdt->max_fds = nr;
- data = alloc_fdmem(nr * sizeof(struct file *));
+ data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
if (!data)
goto out_fdt;
fdt->fd = data;
- data = alloc_fdmem(max_t(size_t,
- 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES));
+ data = kvmalloc(max_t(size_t,
+ 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
+ GFP_KERNEL_ACCOUNT);
if (!data)
goto out_arr;
fdt->open_fds = data;
diff --git a/fs/file_table.c b/fs/file_table.c
index 954d510b765a..72e861a35a7f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -168,6 +168,7 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
file->f_path = *path;
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
+ file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
if ((mode & FMODE_READ) &&
likely(fop->read || fop->read_iter))
mode |= FMODE_CAN_READ;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index cac75547d35c..8b99955e3504 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -275,8 +275,10 @@ struct file_system_type *get_fs_type(const char *name)
int len = dot ? dot - name : strlen(name);
fs = __get_fs_type(name, len);
- if (!fs && (request_module("fs-%.*s", len, name) == 0))
+ if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
fs = __get_fs_type(name, len);
+ WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name);
+ }
if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
put_filesystem(fs);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 63ee2940775c..8b426f83909f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2052,11 +2052,13 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
}
/**
- * __mark_inode_dirty - internal function
- * @inode: inode to mark
- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
- * Mark an inode as dirty. Callers should use mark_inode_dirty or
- * mark_inode_dirty_sync.
+ * __mark_inode_dirty - internal function
+ *
+ * @inode: inode to mark
+ * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ *
+ * Mark an inode as dirty. Callers should use mark_inode_dirty or
+ * mark_inode_dirty_sync.
*
* Put the inode on the super block's dirty list.
*
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 611b5408f6ec..e747b3d720ee 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
void pin_kill(struct fs_pin *p)
{
- wait_queue_t wait;
+ wait_queue_entry_t wait;
if (!p) {
rcu_read_unlock();
@@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p)
rcu_read_unlock();
schedule();
rcu_read_lock();
- if (likely(list_empty(&wait.task_list)))
+ if (likely(list_empty(&wait.entry)))
break;
/* OK, we know p couldn't have been freed yet */
spin_lock_irq(&p->wait.lock);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4d810be532dd..9fa3aef9a5b3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -970,7 +970,7 @@ more_rgrps:
continue;
bn = be64_to_cpu(*p);
if (gfs2_holder_initialized(rd_gh)) {
- rgd = (struct gfs2_rgrpd *)rd_gh->gh_gl->gl_object;
+ rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
gfs2_assert_withdraw(sdp,
gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
} else {
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 79113219be5f..db427658ccd9 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1444,7 +1444,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
"g.offset (%u)\n",
(unsigned long long)bh->b_blocknr,
entries2, g.offset);
-
+ gfs2_consist_inode(ip);
error = -EIO;
goto out_free;
}
@@ -1612,6 +1612,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
(unsigned long long)dip->i_no_addr,
dip->i_entries,
g.offset);
+ gfs2_consist_inode(dip);
error = -EIO;
goto out;
}
@@ -2031,8 +2032,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
for (x = 0; x < rlist.rl_rgrps; x++) {
- struct gfs2_rgrpd *rgd;
- rgd = rlist.rl_ghs[x].gh_gl->gl_object;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
+
rg_blocks += rgd->rd_length;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 959a19ced4d5..c38ab6c81898 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = {
static struct rhashtable gl_hash_table;
-void gfs2_glock_free(struct gfs2_glock *gl)
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl)
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
@@ -152,20 +159,34 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
spin_unlock(&lru_lock);
}
-/**
- * gfs2_glock_put() - Decrement reference count on glock
- * @gl: The glock to put
- *
+/*
+ * Enqueue the glock on the work queue. Passes one glock reference on to the
+ * work queue.
*/
+static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
+ if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
+ /*
+ * We are holding the lockref spinlock, and the work was still
+ * queued above. The queued work (glock_work_func) takes that
+ * spinlock before dropping its glock reference(s), so it
+ * cannot have dropped them in the meantime.
+ */
+ GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
+ gl->gl_lockref.count--;
+ }
+}
-void gfs2_glock_put(struct gfs2_glock *gl)
+static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
+ spin_lock(&gl->gl_lockref.lock);
+ __gfs2_glock_queue_work(gl, delay);
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
+static void __gfs2_glock_put(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = gfs2_glock2aspace(gl);
- if (lockref_put_or_lock(&gl->gl_lockref))
- return;
-
lockref_mark_dead(&gl->gl_lockref);
gfs2_glock_remove_from_lru(gl);
@@ -178,6 +199,20 @@ void gfs2_glock_put(struct gfs2_glock *gl)
}
/**
+ * gfs2_glock_put() - Decrement reference count on glock
+ * @gl: The glock to put
+ *
+ */
+
+void gfs2_glock_put(struct gfs2_glock *gl)
+{
+ if (lockref_put_or_lock(&gl->gl_lockref))
+ return;
+
+ __gfs2_glock_put(gl);
+}
+
+/**
* may_grant - check if its ok to grant a new lock
* @gl: The glock
* @gh: The lock request which we wish to grant
@@ -482,8 +517,7 @@ __acquires(&gl->gl_lockref.lock)
target == LM_ST_UNLOCKED &&
test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
finish_xmote(gl, target);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
+ gfs2_glock_queue_work(gl, 0);
}
else if (ret) {
pr_err("lm_lock ret %d\n", ret);
@@ -492,8 +526,7 @@ __acquires(&gl->gl_lockref.lock)
}
} else { /* lock_nolock */
finish_xmote(gl, target);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
+ gfs2_glock_queue_work(gl, 0);
}
spin_lock(&gl->gl_lockref.lock);
@@ -565,8 +598,7 @@ out_sched:
clear_bit(GLF_LOCK, &gl->gl_flags);
smp_mb__after_atomic();
gl->gl_lockref.count++;
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gl->gl_lockref.count--;
+ __gfs2_glock_queue_work(gl, 0);
return;
out_unlock:
@@ -601,11 +633,11 @@ static void glock_work_func(struct work_struct *work)
{
unsigned long delay = 0;
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
- int drop_ref = 0;
+ unsigned int drop_refs = 1;
if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
finish_xmote(gl, gl->gl_reply);
- drop_ref = 1;
+ drop_refs++;
}
spin_lock(&gl->gl_lockref.lock);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -623,17 +655,25 @@ static void glock_work_func(struct work_struct *work)
}
}
run_queue(gl, 0);
- spin_unlock(&gl->gl_lockref.lock);
- if (!delay)
- gfs2_glock_put(gl);
- else {
+ if (delay) {
+ /* Keep one glock reference for the work we requeue. */
+ drop_refs--;
if (gl->gl_name.ln_type != LM_TYPE_INODE)
delay = 0;
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
- gfs2_glock_put(gl);
+ __gfs2_glock_queue_work(gl, delay);
}
- if (drop_ref)
- gfs2_glock_put(gl);
+
+ /*
+ * Drop the remaining glock references manually here. (Mind that
+ * __gfs2_glock_queue_work depends on the lockref spinlock begin held
+ * here as well.)
+ */
+ gl->gl_lockref.count -= drop_refs;
+ if (!gl->gl_lockref.count) {
+ __gfs2_glock_put(gl);
+ return;
+ }
+ spin_unlock(&gl->gl_lockref.lock);
}
/**
@@ -986,8 +1026,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
gl->gl_lockref.count++;
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gl->gl_lockref.count--;
+ __gfs2_glock_queue_work(gl, 0);
}
run_queue(gl, 1);
spin_unlock(&gl->gl_lockref.lock);
@@ -1047,17 +1086,15 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
gfs2_glock_add_to_lru(gl);
trace_gfs2_glock_queue(gh, 0);
+ if (unlikely(!fast_path)) {
+ gl->gl_lockref.count++;
+ if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
+ gl->gl_name.ln_type == LM_TYPE_INODE)
+ delay = gl->gl_hold_time;
+ __gfs2_glock_queue_work(gl, delay);
+ }
spin_unlock(&gl->gl_lockref.lock);
- if (likely(fast_path))
- return;
-
- gfs2_glock_hold(gl);
- if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
- gl->gl_name.ln_type == LM_TYPE_INODE)
- delay = gl->gl_hold_time;
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
- gfs2_glock_put(gl);
}
void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1233,9 +1270,8 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
spin_lock(&gl->gl_lockref.lock);
handle_callback(gl, state, delay, true);
+ __gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
- gfs2_glock_put(gl);
}
/**
@@ -1294,10 +1330,8 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
gl->gl_lockref.count++;
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ __gfs2_glock_queue_work(gl, 0);
spin_unlock(&gl->gl_lockref.lock);
-
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
}
static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1355,8 +1389,7 @@ add_back_to_lru:
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gl->gl_lockref.count--;
+ __gfs2_glock_queue_work(gl, 0);
spin_unlock(&gl->gl_lockref.lock);
cond_resched_lock(&lru_lock);
}
@@ -1462,13 +1495,12 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
static void thaw_glock(struct gfs2_glock *gl)
{
- if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
- goto out;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) {
-out:
+ if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
gfs2_glock_put(gl);
+ return;
}
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_queue_work(gl, 0);
}
/**
@@ -1484,9 +1516,8 @@ static void clear_glock(struct gfs2_glock *gl)
spin_lock(&gl->gl_lockref.lock);
if (gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
+ __gfs2_glock_queue_work(gl, 0);
spin_unlock(&gl->gl_lockref.lock);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
}
/**
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index ab1ef322f7a5..9ad4a6ac6c84 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -257,4 +257,11 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
return gh->gh_gl;
}
+static inline void glock_set_object(struct gfs2_glock *gl, void *object)
+{
+ spin_lock(&gl->gl_lockref.lock);
+ gl->gl_object = object;
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5db59d444838..5e69636d4dd3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -137,7 +137,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
*
* Called when demoting or unlocking an EX glock. We must flush
* to disk all dirty buffers/pages relating to this glock, and must not
- * not return to caller to demote/unlock the glock until I/O is complete.
+ * return to caller to demote/unlock the glock until I/O is complete.
*/
static void rgrp_go_sync(struct gfs2_glock *gl)
@@ -184,7 +184,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
- struct gfs2_rgrpd *rgd = gl->gl_object;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
if (rgd)
gfs2_rgrp_brelse(rgd);
@@ -197,6 +197,38 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
}
+static struct gfs2_inode *gfs2_glock2inode(struct gfs2_glock *gl)
+{
+ struct gfs2_inode *ip;
+
+ spin_lock(&gl->gl_lockref.lock);
+ ip = gl->gl_object;
+ if (ip)
+ set_bit(GIF_GLOP_PENDING, &ip->i_flags);
+ spin_unlock(&gl->gl_lockref.lock);
+ return ip;
+}
+
+struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl)
+{
+ struct gfs2_rgrpd *rgd;
+
+ spin_lock(&gl->gl_lockref.lock);
+ rgd = gl->gl_object;
+ spin_unlock(&gl->gl_lockref.lock);
+
+ return rgd;
+}
+
+static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
+{
+ if (!ip)
+ return;
+
+ clear_bit_unlock(GIF_GLOP_PENDING, &ip->i_flags);
+ wake_up_bit(&ip->i_flags, GIF_GLOP_PENDING);
+}
+
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
@@ -205,25 +237,24 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
static void inode_go_sync(struct gfs2_glock *gl)
{
- struct gfs2_inode *ip = gl->gl_object;
+ struct gfs2_inode *ip = gfs2_glock2inode(gl);
+ int isreg = ip && S_ISREG(ip->i_inode.i_mode);
struct address_space *metamapping = gfs2_glock2aspace(gl);
int error;
- if (ip && !S_ISREG(ip->i_inode.i_mode))
- ip = NULL;
- if (ip) {
+ if (isreg) {
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
inode_dio_wait(&ip->i_inode);
}
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
- return;
+ goto out;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH);
filemap_fdatawrite(metamapping);
- if (ip) {
+ if (isreg) {
struct address_space *mapping = ip->i_inode.i_mapping;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
@@ -238,6 +269,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
*/
smp_mb__before_atomic();
clear_bit(GLF_DIRTY, &gl->gl_flags);
+
+out:
+ gfs2_clear_glop_pending(ip);
}
/**
@@ -253,7 +287,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
- struct gfs2_inode *ip = gl->gl_object;
+ struct gfs2_inode *ip = gfs2_glock2inode(gl);
gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count));
@@ -274,6 +308,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
}
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
+
+ gfs2_clear_glop_pending(ip);
}
/**
@@ -541,7 +577,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl)
*/
static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
{
- struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
+ struct gfs2_inode *ip = gl->gl_object;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY))
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..73fce76e67ee 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -336,7 +336,6 @@ enum {
};
struct gfs2_glock {
- struct hlist_bl_node gl_list;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
@@ -374,6 +373,7 @@ struct gfs2_glock {
loff_t end;
} gl_vm;
};
+ struct rcu_head gl_rcu;
struct rhash_head gl_node;
};
@@ -386,6 +386,7 @@ enum {
GIF_SW_PAGED = 3,
GIF_ORDERED = 4,
GIF_FREE_VFS_INODE = 5,
+ GIF_GLOP_PENDING = 6,
};
struct gfs2_inode {
@@ -815,13 +816,11 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
- int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
unsigned int sd_log_flush_head;
- u64 sd_log_flush_wrapped;
spinlock_t sd_ail_lock;
struct list_head sd_ail1_list;
@@ -858,5 +857,7 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
preempt_enable();
}
+extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
+
#endif /* __INCORE_DOT_H__ */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9f605ea4810c..acca501f8110 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -144,7 +144,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (unlikely(error))
goto fail;
- ip->i_gl->gl_object = ip;
+ flush_delayed_work(&ip->i_gl->gl_work);
+ glock_set_object(ip->i_gl, ip);
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
@@ -173,8 +174,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_put;
-
- ip->i_iopen_gh.gh_gl->gl_object = ip;
+ flush_delayed_work(&ip->i_iopen_gh.gh_gl->gl_work);
+ glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
@@ -201,14 +202,14 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
fail_refresh:
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- ip->i_iopen_gh.gh_gl->gl_object = NULL;
+ glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_put:
if (io_gl)
gfs2_glock_put(io_gl);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
- ip->i_gl->gl_object = NULL;
+ glock_set_object(ip->i_gl, NULL);
fail:
iget_failed(inode);
return ERR_PTR(error);
@@ -607,6 +608,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
if (error)
goto fail;
+ gfs2_holder_mark_uninitialized(ghs + 1);
error = create_ok(dip, name, mode);
if (error)
@@ -705,7 +707,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_free_inode;
- ip->i_gl->gl_object = ip;
+ glock_set_object(ip->i_gl, ip);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_free_inode;
@@ -731,7 +733,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;
- ip->i_iopen_gh.gh_gl->gl_object = ip;
+ glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
gfs2_set_iop(inode);
insert_inode_hash(inode);
@@ -778,7 +780,6 @@ fail_gunlock3:
fail_gunlock2:
if (io_gl)
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
- gfs2_glock_dq_uninit(ghs + 1);
fail_free_inode:
if (ip->i_gl)
gfs2_glock_put(ip->i_gl);
@@ -799,6 +800,8 @@ fail_gunlock:
&GFS2_I(inode)->i_flags);
iput(inode);
}
+ if (gfs2_holder_initialized(ghs + 1))
+ gfs2_glock_dq_uninit(ghs + 1);
fail:
return error;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index d2955daf17a4..9a624f694400 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -722,7 +722,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
sdp->sd_log_flush_head = sdp->sd_log_head;
- sdp->sd_log_flush_wrapped = 0;
tr = sdp->sd_log_tr;
if (tr) {
sdp->sd_log_tr = NULL;
@@ -775,7 +774,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
}
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
- sdp->sd_log_flush_wrapped = 0;
log_write_header(sdp, 0);
sdp->sd_log_head = sdp->sd_log_flush_head;
}
@@ -880,7 +878,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
sdp->sd_log_flush_head = sdp->sd_log_head;
- sdp->sd_log_flush_wrapped = 0;
log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..3010f9edd177 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -71,7 +71,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
{
struct gfs2_glock *gl = bd->bd_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct gfs2_rgrpd *rgd = gl->gl_object;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
@@ -134,10 +134,8 @@ static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
(sdp->sd_log_flush_head != sdp->sd_log_head));
- if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
+ if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
sdp->sd_log_flush_head = 0;
- sdp->sd_log_flush_wrapped = 1;
- }
}
static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
@@ -170,7 +168,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
*/
static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
- int error)
+ blk_status_t error)
{
struct buffer_head *bh, *next;
struct page *page = bvec->bv_page;
@@ -182,7 +180,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
bh = bh->b_this_page;
do {
if (error)
- set_buffer_write_io_error(bh);
+ mark_buffer_write_io_error(bh);
unlock_buffer(bh);
next = bh->b_this_page;
size -= bh->b_size;
@@ -209,15 +207,13 @@ static void gfs2_end_log_write(struct bio *bio)
struct page *page;
int i;
- if (bio->bi_error) {
- sdp->sd_log_error = bio->bi_error;
- fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
- }
+ if (bio->bi_status)
+ fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
- gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+ gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
else
mempool_free(page, gfs2_page_pool);
}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 67d1fc4668f7..0a89e6f7a314 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -52,7 +52,6 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- INIT_HLIST_BL_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_lockref.lock);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
do {
struct buffer_head *next = bh->b_this_page;
len -= bh->b_size;
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bh = next;
} while (bh && len);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b92135c202c2..e76058d34b74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
SetPageUptodate(page);
else
- pr_warn("error %d reading superblock\n", bio->bi_error);
+ pr_warn("error %d reading superblock\n", bio->bi_status);
unlock_page(page);
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 83c9909ff14a..836e38ba5d0a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -705,9 +705,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
- spin_lock(&gl->gl_lockref.lock);
- gl->gl_object = NULL;
- spin_unlock(&gl->gl_lockref.lock);
+ glock_set_object(gl, NULL);
gfs2_glock_add_to_lru(gl);
gfs2_glock_put(gl);
}
@@ -917,7 +915,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
error = rgd_insert(rgd);
spin_unlock(&sdp->sd_rindex_spin);
if (!error) {
- rgd->rd_gl->gl_object = rgd;
+ glock_set_object(rgd->rd_gl, rgd);
rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
rgd->rd_length) * bsize) - 1;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 29b0473f6e74..fdedec379b78 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1105,9 +1105,12 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
gfs2_holder_uninit(gh);
error = err;
} else {
- if (!error)
- error = statfs_slow_fill(
- gh->gh_gl->gl_object, sc);
+ if (!error) {
+ struct gfs2_rgrpd *rgd =
+ gfs2_glock2rgrp(gh->gh_gl);
+
+ error = statfs_slow_fill(rgd, sc);
+ }
gfs2_glock_dq_uninit(gh);
}
}
@@ -1535,6 +1538,12 @@ static void gfs2_evict_inode(struct inode *inode)
if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
goto out;
+ if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
+ BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
+ gfs2_holder_mark_uninitialized(&gh);
+ goto alloc_failed;
+ }
+
/* Must not read inode block until block type has been verified */
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
@@ -1543,11 +1552,9 @@ static void gfs2_evict_inode(struct inode *inode)
goto out;
}
- if (!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
- error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
- if (error)
- goto out_truncate;
- }
+ error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+ if (error)
+ goto out_truncate;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
@@ -1555,6 +1562,7 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
+alloc_failed:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
@@ -1621,7 +1629,8 @@ out_unlock:
}
gfs2_holder_uninit(&ip->i_iopen_gh);
}
- gfs2_glock_dq_uninit(&gh);
+ if (gfs2_holder_initialized(&gh))
+ gfs2_glock_dq_uninit(&gh);
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
out:
@@ -1631,13 +1640,13 @@ out:
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
- ip->i_gl->gl_object = NULL;
- flush_delayed_work(&ip->i_gl->gl_work);
+ glock_set_object(ip->i_gl, NULL);
+ wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
- ip->i_iopen_gh.gh_gl->gl_object = NULL;
+ glock_set_object(ip->i_iopen_gh.gh_gl, NULL);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index e77bc52b468f..ca1f97ff898c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -626,12 +626,12 @@ static struct attribute *tune_attrs[] = {
NULL,
};
-static struct attribute_group tune_group = {
+static const struct attribute_group tune_group = {
.name = "tune",
.attrs = tune_attrs,
};
-static struct attribute_group lock_module_group = {
+static const struct attribute_group lock_module_group = {
.name = "lock_module",
.attrs = lock_module_attrs,
};
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index d87721aeb575..54179554c7d2 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1327,8 +1327,8 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
for (x = 0; x < rlist.rl_rgrps; x++) {
- struct gfs2_rgrpd *rgd;
- rgd = rlist.rl_ghs[x].gh_gl->gl_object;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
+
rg_blocks += rgd->rd_length;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dde861387a40..d44f5456eb9b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..50370599e371 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_write_hint = WRITE_LIFE_NOT_SET;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
@@ -1891,11 +1892,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
spin_lock(&inode_hash_lock);
}
@@ -1914,8 +1915,6 @@ __setup("ihash_entries=", set_ihash_entries);
*/
void __init inode_init_early(void)
{
- unsigned int loop;
-
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
@@ -1927,20 +1926,15 @@ void __init inode_init_early(void)
sizeof(struct hlist_head),
ihash_entries,
14,
- HASH_EARLY,
+ HASH_EARLY | HASH_ZERO,
&i_hash_shift,
&i_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void __init inode_init(void)
{
- unsigned int loop;
-
/* inode slab cache */
inode_cachep = kmem_cache_create("inode_cache",
sizeof(struct inode),
@@ -1958,14 +1952,11 @@ void __init inode_init(void)
sizeof(struct hlist_head),
ihash_entries,
14,
- 0,
+ HASH_ZERO,
&i_hash_shift,
&i_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
@@ -2023,7 +2014,7 @@ bool inode_owner_or_capable(const struct inode *inode)
return true;
ns = current_user_ns();
- if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
+ if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
return true;
return false;
}
@@ -2038,11 +2029,11 @@ static void __inode_dio_wait(struct inode *inode)
DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
do {
- prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
if (atomic_read(&inode->i_dio_count))
schedule();
} while (atomic_read(&inode->i_dio_count));
- finish_wait(wq, &q.wait);
+ finish_wait(wq, &q.wq_entry);
}
/**
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..173222863aca 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -584,6 +584,100 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
}
EXPORT_SYMBOL_GPL(iomap_fiemap);
+static loff_t
+iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+ void *data, struct iomap *iomap)
+{
+ switch (iomap->type) {
+ case IOMAP_UNWRITTEN:
+ offset = page_cache_seek_hole_data(inode, offset, length,
+ SEEK_HOLE);
+ if (offset < 0)
+ return length;
+ /* fall through */
+ case IOMAP_HOLE:
+ *(loff_t *)data = offset;
+ return 0;
+ default:
+ return length;
+ }
+}
+
+loff_t
+iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+ loff_t size = i_size_read(inode);
+ loff_t length = size - offset;
+ loff_t ret;
+
+ /* Nothing to be found beyond the end of the file. */
+ if (offset >= size)
+ return -ENXIO;
+
+ while (length > 0) {
+ ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+ &offset, iomap_seek_hole_actor);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ length -= ret;
+ }
+
+ return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_hole);
+
+static loff_t
+iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+ void *data, struct iomap *iomap)
+{
+ switch (iomap->type) {
+ case IOMAP_HOLE:
+ return length;
+ case IOMAP_UNWRITTEN:
+ offset = page_cache_seek_hole_data(inode, offset, length,
+ SEEK_DATA);
+ if (offset < 0)
+ return length;
+ /*FALLTHRU*/
+ default:
+ *(loff_t *)data = offset;
+ return 0;
+ }
+}
+
+loff_t
+iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+ loff_t size = i_size_read(inode);
+ loff_t length = size - offset;
+ loff_t ret;
+
+ /* Nothing to be found beyond the end of the file. */
+ if (offset >= size)
+ return -ENXIO;
+
+ while (length > 0) {
+ ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+ &offset, iomap_seek_data_actor);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ length -= ret;
+ }
+
+ if (length <= 0)
+ return -ENXIO;
+ return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_data);
+
/*
* Private flags for iomap_dio, must not overlap with the public ones in
* iomap.h:
@@ -672,8 +766,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
- if (bio->bi_error)
- iomap_dio_set_error(dio, bio->bi_error);
+ if (bio->bi_status)
+ iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +887,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
+ bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
@@ -881,6 +976,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
flags |= IOMAP_WRITE;
}
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, start, end)) {
+ ret = -EAGAIN;
+ goto out_free_dio;
+ }
+ flags |= IOMAP_NOWAIT;
+ }
+
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b6b194ec1b4f..3c1c31321d9b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -263,18 +263,10 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
- if (err) {
- /*
- * Because AS_EIO is cleared by
- * filemap_fdatawait_range(), set it again so
- * that user process can get -EIO from fsync().
- */
- mapping_set_error(jinode->i_vfs_inode->i_mapping, -EIO);
-
- if (!ret)
- ret = err;
- }
+ err = filemap_fdatawait_keep_errors(
+ jinode->i_vfs_inode->i_mapping);
+ if (!ret)
+ ret = err;
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb();
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ebad34266bcf..7d5ef3bf3f3e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2579,10 +2579,10 @@ restart:
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&journal->j_list_lock);
schedule();
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
goto restart;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d30a6da7013..8b08044b3120 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -409,25 +409,6 @@ static handle_t *new_handle(int nblocks)
return handle;
}
-/**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
- * @journal: Journal to start transaction on.
- * @nblocks: number of block buffer we might modify
- *
- * We make sure that the transaction can guarantee at least nblocks of
- * modified buffers in the log. We block until the log can guarantee
- * that much space. Additionally, if rsv_blocks > 0, we also create another
- * handle with rsv_blocks reserved blocks in the journal. This handle is
- * is stored in h_rsv_handle. It is not attached to any particular transaction
- * and thus doesn't block transaction commit. If the caller uses this reserved
- * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
- * on the parent handle will dispose the reserved one. Reserved handle has to
- * be converted to a normal handle using jbd2_journal_start_reserved() before
- * it can be used.
- *
- * Return a pointer to a newly allocated handle, or an ERR_PTR() value
- * on failure.
- */
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
gfp_t gfp_mask, unsigned int type,
unsigned int line_no)
@@ -478,6 +459,25 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
EXPORT_SYMBOL(jbd2__journal_start);
+/**
+ * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ *
+ * We make sure that the transaction can guarantee at least nblocks of
+ * modified buffers in the log. We block until the log can guarantee
+ * that much space. Additionally, if rsv_blocks > 0, we also create another
+ * handle with rsv_blocks reserved blocks in the journal. This handle is
+ * is stored in h_rsv_handle. It is not attached to any particular transaction
+ * and thus doesn't block transaction commit. If the caller uses this reserved
+ * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
+ * on the parent handle will dispose the reserved one. Reserved handle has to
+ * be converted to a normal handle using jbd2_journal_start_reserved() before
+ * it can be used.
+ *
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
+ */
handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
{
return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
@@ -1072,10 +1072,10 @@ out:
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
*
- * Returns an error code or 0 on success.
+ * Returns: error code or 0 on success.
*
* In full data journalling mode the buffer may be of type BJ_AsyncData,
- * because we're write()ing a buffer which is also part of a shared mapping.
+ * because we're ``write()ing`` a buffer which is also part of a shared mapping.
*/
int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
bp->l_flag |= lbmDONE;
- if (bio->bi_error) {
+ if (bio->bi_status) {
bp->l_flag |= lbmERROR;
jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..65120a471729 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
BUG_ON(!PagePrivate(page));
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
@@ -664,6 +664,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
INCREMENT(mpStat.pagealloc);
mp = alloc_metapage(GFP_NOFS);
mp->page = page;
+ mp->sb = inode->i_sb;
mp->flag = 0;
mp->xflag = COMMIT_PAGE;
mp->count = 1;
@@ -711,7 +712,8 @@ void force_metapage(struct metapage *mp)
get_page(page);
lock_page(page);
set_page_dirty(page);
- write_one_page(page, 1);
+ if (write_one_page(page))
+ jfs_error(mp->sb, "write_one_page() failed\n");
clear_bit(META_forcewrite, &mp->flag);
put_page(page);
}
@@ -756,7 +758,8 @@ void release_metapage(struct metapage * mp)
set_page_dirty(page);
if (test_bit(META_sync, &mp->flag)) {
clear_bit(META_sync, &mp->flag);
- write_one_page(page, 1);
+ if (write_one_page(page))
+ jfs_error(mp->sb, "write_one_page() failed\n");
lock_page(page); /* write_one_page unlocks the page */
}
} else if (mp->lsn) /* discard_metapage doesn't remove it */
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a869fb4a20d6..8b0ee514eb84 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -38,6 +38,7 @@ struct metapage {
/* implementation */
struct page *page;
+ struct super_block *sb;
unsigned int logical_size;
/* Journal management */
diff --git a/fs/libfs.c b/fs/libfs.c
index a04395334bb1..3aabe553fc45 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -974,7 +974,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
int err;
int ret;
- err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ err = file_write_and_wait_range(file, start, end);
if (err)
return err;
@@ -991,6 +991,10 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
out:
inode_unlock(inode);
+ /* check and advance again to catch errors after syncing out buffers */
+ err = file_check_and_advance_wb_err(file);
+ if (ret == 0)
+ ret = err;
return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);
diff --git a/fs/locks.c b/fs/locks.c
index af2031a1fcff..afefeb4ad6de 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1858,8 +1858,8 @@ EXPORT_SYMBOL(generic_setlease);
*
* Call this to establish a lease on the file. The "lease" argument is not
* used for F_UNLCK requests and may be NULL. For commands that set or alter
- * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set;
- * if not, this function will return -ENOLCK (and generate a scary-looking
+ * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
+ * set; if not, this function will return -ENOLCK (and generate a scary-looking
* stack trace).
*
* The "priv" pointer is passed directly to the lm_setup function as-is. It
@@ -1972,15 +1972,13 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
* @cmd: the type of lock to apply.
*
* Apply a %FL_FLOCK style lock to an open file descriptor.
- * The @cmd can be one of
+ * The @cmd can be one of:
*
- * %LOCK_SH -- a shared lock.
- *
- * %LOCK_EX -- an exclusive lock.
- *
- * %LOCK_UN -- remove an existing lock.
- *
- * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes.
+ * - %LOCK_SH -- a shared lock.
+ * - %LOCK_EX -- an exclusive lock.
+ * - %LOCK_UN -- remove an existing lock.
+ * - %LOCK_MAND -- a 'mandatory' flock.
+ * This exists to emulate Windows Share Modes.
*
* %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
* processes read and write access respectively.
@@ -2086,26 +2084,22 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
/* Report the first existing lock that would conflict with l.
* This implements the F_GETLK command of fcntl().
*/
-int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
+int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
{
struct file_lock file_lock;
- struct flock flock;
int error;
- error = -EFAULT;
- if (copy_from_user(&flock, l, sizeof(flock)))
- goto out;
error = -EINVAL;
- if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+ if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
goto out;
- error = flock_to_posix_lock(filp, &file_lock, &flock);
+ error = flock_to_posix_lock(filp, &file_lock, flock);
if (error)
goto out;
if (cmd == F_OFD_GETLK) {
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_GETLK;
@@ -2117,15 +2111,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
if (error)
goto out;
- flock.l_type = file_lock.fl_type;
+ flock->l_type = file_lock.fl_type;
if (file_lock.fl_type != F_UNLCK) {
- error = posix_lock_to_flock(&flock, &file_lock);
+ error = posix_lock_to_flock(flock, &file_lock);
if (error)
goto rel_priv;
}
- error = -EFAULT;
- if (!copy_to_user(l, &flock, sizeof(flock)))
- error = 0;
rel_priv:
locks_release_private(&file_lock);
out:
@@ -2218,26 +2209,16 @@ check_fmode_for_setlk(struct file_lock *fl)
* This implements both the F_SETLK and F_SETLKW commands of fcntl().
*/
int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
- struct flock __user *l)
+ struct flock *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct flock flock;
- struct inode *inode;
+ struct inode *inode = locks_inode(filp);
struct file *f;
int error;
if (file_lock == NULL)
return -ENOLCK;
- inode = locks_inode(filp);
-
- /*
- * This might block, so we do it before checking the inode.
- */
- error = -EFAULT;
- if (copy_from_user(&flock, l, sizeof(flock)))
- goto out;
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2246,7 +2227,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
- error = flock_to_posix_lock(filp, file_lock, &flock);
+ error = flock_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2261,7 +2242,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
switch (cmd) {
case F_OFD_SETLK:
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_SETLK;
@@ -2270,7 +2251,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
break;
case F_OFD_SETLKW:
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_SETLKW;
@@ -2315,26 +2296,22 @@ out:
/* Report the first existing lock that would conflict with l.
* This implements the F_GETLK command of fcntl().
*/
-int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
+int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
{
struct file_lock file_lock;
- struct flock64 flock;
int error;
- error = -EFAULT;
- if (copy_from_user(&flock, l, sizeof(flock)))
- goto out;
error = -EINVAL;
- if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+ if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
goto out;
- error = flock64_to_posix_lock(filp, &file_lock, &flock);
+ error = flock64_to_posix_lock(filp, &file_lock, flock);
if (error)
goto out;
if (cmd == F_OFD_GETLK) {
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_GETLK64;
@@ -2346,13 +2323,9 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
if (error)
goto out;
- flock.l_type = file_lock.fl_type;
+ flock->l_type = file_lock.fl_type;
if (file_lock.fl_type != F_UNLCK)
- posix_lock_to_flock64(&flock, &file_lock);
-
- error = -EFAULT;
- if (!copy_to_user(l, &flock, sizeof(flock)))
- error = 0;
+ posix_lock_to_flock64(flock, &file_lock);
locks_release_private(&file_lock);
out:
@@ -2363,26 +2336,16 @@ out:
* This implements both the F_SETLK and F_SETLKW commands of fcntl().
*/
int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
- struct flock64 __user *l)
+ struct flock64 *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct flock64 flock;
- struct inode *inode;
+ struct inode *inode = locks_inode(filp);
struct file *f;
int error;
if (file_lock == NULL)
return -ENOLCK;
- /*
- * This might block, so we do it before checking the inode.
- */
- error = -EFAULT;
- if (copy_from_user(&flock, l, sizeof(flock)))
- goto out;
-
- inode = locks_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2391,7 +2354,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
- error = flock64_to_posix_lock(filp, file_lock, &flock);
+ error = flock64_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2406,7 +2369,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
switch (cmd) {
case F_OFD_SETLK:
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_SETLK64;
@@ -2415,7 +2378,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
break;
case F_OFD_SETLKW:
error = -EINVAL;
- if (flock.l_pid != 0)
+ if (flock->l_pid != 0)
goto out;
cmd = F_SETLKW64;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b19be429d655..d818fd236787 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -10,13 +10,14 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete_block()).
+ * mb_cache_entry_delete()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
- * They use hash of a block contents as a key and block number as a value.
- * That's why keys need not be unique (different xattr blocks may end up having
- * the same hash). However block number always uniquely identifies a cache
- * entry.
+ * Ext4 also uses it for deduplication of xattr values stored in inodes.
+ * They use hash of data as a key and provide a value that may represent a
+ * block or inode number. That's why keys need not be unique (hash of different
+ * data may be the same). However user provided value always uniquely
+ * identifies a cache entry.
*
* We provide functions for creation and removal of entries, search by key,
* and a special "delete entry with given key-value pair" operation. Fixed
@@ -62,15 +63,15 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
* @cache - cache where the entry should be created
* @mask - gfp mask with which the entry should be allocated
* @key - key of the entry
- * @block - block that contains data
- * @reusable - is the block reusable by other inodes?
+ * @value - value of the entry
+ * @reusable - is the entry reusable by others?
*
- * Creates entry in @cache with key @key and records that data is stored in
- * block @block. The function returns -EBUSY if entry with the same key
- * and for the same block already exists in cache. Otherwise 0 is returned.
+ * Creates entry in @cache with key @key and value @value. The function returns
+ * -EBUSY if entry with the same key and value already exists in cache.
+ * Otherwise 0 is returned.
*/
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
- sector_t block, bool reusable)
+ u64 value, bool reusable)
{
struct mb_cache_entry *entry, *dup;
struct hlist_bl_node *dup_node;
@@ -91,12 +92,12 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
/* One ref for hash, one ref returned */
atomic_set(&entry->e_refcnt, 1);
entry->e_key = key;
- entry->e_block = block;
+ entry->e_value = value;
entry->e_reusable = reusable;
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
- if (dup->e_key == key && dup->e_block == block) {
+ if (dup->e_key == key && dup->e_value == value) {
hlist_bl_unlock(head);
kmem_cache_free(mb_entry_cache, entry);
return -EBUSY;
@@ -187,13 +188,13 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
EXPORT_SYMBOL(mb_cache_entry_find_next);
/*
- * mb_cache_entry_get - get a cache entry by block number (and key)
+ * mb_cache_entry_get - get a cache entry by value (and key)
* @cache - cache we work with
- * @key - key of block number @block
- * @block - block number
+ * @key - key
+ * @value - value
*/
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
- sector_t block)
+ u64 value)
{
struct hlist_bl_node *node;
struct hlist_bl_head *head;
@@ -202,7 +203,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_block == block) {
+ if (entry->e_key == key && entry->e_value == value) {
atomic_inc(&entry->e_refcnt);
goto out;
}
@@ -214,15 +215,14 @@ out:
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete_block - remove information about block from cache
+/* mb_cache_entry_delete - remove a cache entry
* @cache - cache we work with
- * @key - key of block @block
- * @block - block number
+ * @key - key
+ * @value - value
*
- * Remove entry from cache @cache with key @key with data stored in @block.
+ * Remove entry from cache @cache with key @key and value @value.
*/
-void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
- sector_t block)
+void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
{
struct hlist_bl_node *node;
struct hlist_bl_head *head;
@@ -231,7 +231,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_block == block) {
+ if (entry->e_key == key && entry->e_value == value) {
/* We keep hash list reference to keep entry alive */
hlist_bl_del_init(&entry->e_hash_list);
hlist_bl_unlock(head);
@@ -248,7 +248,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
}
hlist_bl_unlock(head);
}
-EXPORT_SYMBOL(mb_cache_entry_delete_block);
+EXPORT_SYMBOL(mb_cache_entry_delete);
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 7edc9b395700..baa9721f1299 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -57,7 +57,7 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
mark_inode_dirty(dir);
}
if (IS_DIRSYNC(dir))
- err = write_one_page(page, 1);
+ err = write_one_page(page);
else
unlock_page(page);
return err;
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 4c57c9af6946..2d1ca08870f7 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -142,7 +142,7 @@ changed:
return -EAGAIN;
}
-static inline int get_block(struct inode * inode, sector_t block,
+static int get_block(struct inode * inode, sector_t block,
struct buffer_head *bh, int create)
{
int err = -EIO;
diff --git a/fs/mount.h b/fs/mount.h
index bf1fda6eed8f..de45d9e76748 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -58,6 +58,7 @@ struct mount {
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+ struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..2e4c41ccb5c9 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
}
bio_put(bio);
@@ -344,6 +345,7 @@ confused:
*
* So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
* submitted in the following order:
+ *
* 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
*
* because the indirect block has to be read to get the mappings of blocks
@@ -614,6 +616,7 @@ alloc_new:
goto confused;
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112e..e0b46eb0e212 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1008,7 +1008,7 @@ static int may_linkat(struct path *link)
/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
* otherwise, it must be a safe source.
*/
- if (inode_owner_or_capable(inode) || safe_hardlink_source(inode))
+ if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
return 0;
audit_log_link_denied("linkat", link);
@@ -4332,6 +4332,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* The worst of all namespace operations - renaming directory. "Perverted"
* doesn't even start to describe it. Somebody in UCB had a heck of a trip...
* Problems:
+ *
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
* That's where 4.4 screws up. Current fix: serialization on
@@ -4362,11 +4363,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
bool is_dir = d_is_dir(old_dentry);
- const unsigned char *old_name;
struct inode *source = old_dentry->d_inode;
struct inode *target = new_dentry->d_inode;
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
+ struct name_snapshot old_name;
if (source == target)
return 0;
@@ -4413,7 +4414,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error)
return error;
- old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+ take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
if (!is_dir || (flags & RENAME_EXCHANGE))
lock_two_nondirectories(source, target);
@@ -4468,14 +4469,14 @@ out:
inode_unlock(target);
dput(new_dentry);
if (!error) {
- fsnotify_move(old_dir, new_dir, old_name, is_dir,
+ fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
!(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
if (flags & RENAME_EXCHANGE) {
fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
new_is_dir, NULL, new_dentry);
}
}
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
return error;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 8bd3e4d448b9..81f934b5d571 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -236,6 +236,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
+ INIT_LIST_HEAD(&mnt->mnt_umounting);
init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
}
return mnt;
@@ -3238,7 +3239,6 @@ static void __init init_mount_tree(void)
void __init mnt_init(void)
{
- unsigned u;
int err;
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
@@ -3247,22 +3247,17 @@ void __init mnt_init(void)
mount_hashtable = alloc_large_system_hash("Mount-cache",
sizeof(struct hlist_head),
mhash_entries, 19,
- 0,
+ HASH_ZERO,
&m_hash_shift, &m_hash_mask, 0, 0);
mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
sizeof(struct hlist_head),
mphash_entries, 19,
- 0,
+ HASH_ZERO,
&mp_hash_shift, &mp_hash_mask, 0, 0);
if (!mount_hashtable || !mountpoint_hashtable)
panic("Failed to allocate mount hash table\n");
- for (u = 0; u <= m_hash_mask; u++)
- INIT_HLIST_HEAD(&mount_hashtable[u]);
- for (u = 0; u <= mp_hash_mask; u++)
- INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
-
kernfs_init();
err = sysfs_init();
@@ -3488,6 +3483,8 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return err;
}
+ put_mnt_ns(old_mnt_ns);
+
/* Update the pwd and root */
set_fs_pwd(fs, &root);
set_fs_root(fs, &root);
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 0c3905e0542e..6719c0be674d 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
* -- nyc
*/
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
return VM_FAULT_MAJOR;
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
{
struct parallel_io *par = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
struct parallel_io *par = bio->bi_private;
struct nfs_pgio_header *header = par->data;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d73..390ac9c39c59 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
* A single slot, so highest used slotid is either 0 or -1
*/
nfs4_free_slot(tbl, slot);
- nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a..2ac00bf4ecf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
- struct dentry *old_dentry = data->old_dentry;
- struct dentry *new_dentry = data->new_dentry;
- struct inode *old_inode = d_inode(old_dentry);
- struct inode *new_inode = d_inode(new_dentry);
-
- nfs_mark_for_revalidate(old_inode);
-
- switch (task->tk_status) {
- case 0:
- if (new_inode != NULL)
- nfs_drop_nlink(new_inode);
- d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
- nfs_save_change_attribute(data->new_dir));
- break;
- case -ENOENT:
- nfs_dentry_handle_enoent(old_dentry);
- }
-}
-
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL;
+ struct dentry *dentry = NULL, *rehash = NULL;
struct rpc_task *task;
int error = -EBUSY;
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
*/
- if (!d_unhashed(new_dentry))
+ if (!d_unhashed(new_dentry)) {
d_drop(new_dentry);
+ rehash = new_dentry;
+ }
if (d_count(new_dentry) > 2) {
int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
new_dentry = dentry;
+ rehash = NULL;
new_inode = NULL;
}
}
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
- nfs_complete_rename);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error == 0)
error = task->tk_status;
rpc_put_task(task);
+ nfs_mark_for_revalidate(old_inode);
out:
+ if (rehash)
+ d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
+ if (!error) {
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
+ /*
+ * The d_move() should be here instead of in an async RPC completion
+ * handler because we need the proper locks to move the dentry. If
+ * we're interrupted by a signal, the async RPC completion handler
+ * should mark the directories for revalidation.
+ */
+ d_move(old_dentry, new_dentry);
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(new_dir));
+ } else if (error == -ENOENT)
+ nfs_dentry_handle_enoent(old_dentry);
+
/* new dentry created? */
if (dentry)
dput(dentry);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3e24392f2caa..8701d7617964 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -7,6 +7,7 @@
#include <linux/security.h>
#include <linux/crc32.h>
#include <linux/nfs_page.h>
+#include <linux/wait_bit.h>
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cd..98b0b662af09 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
/* Except MODE, it seems harmless of setting twice. */
if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
- attrset[1] & FATTR4_WORD1_MODE)
+ (attrset[1] & FATTR4_WORD1_MODE ||
+ attrset[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -6372,7 +6373,7 @@ struct nfs4_lock_waiter {
};
static int
-nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key)
+nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{
int ret;
struct cb_notify_lock_args *cbnl = key;
@@ -6415,7 +6416,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
.inode = state->inode,
.owner = &owner,
.notified = false };
- wait_queue_t wait;
+ wait_queue_entry_t wait;
/* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
nfs4_free_pages(lgp->args.layout.pages, max_pages);
pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
- nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501b..cbf82b0d4467 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2134,6 +2134,8 @@ again:
put_rpccred(cred);
switch (status) {
case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
break;
case -ETIMEDOUT:
if (clnt->cl_softrtry)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index eceb4eabb064..c5334c0e23a1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2545,10 +2545,25 @@ EXPORT_SYMBOL_GPL(nfs_set_sb_security);
int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
struct nfs_mount_info *mount_info)
{
+ int error;
+ unsigned long kflags = 0, kflags_out = 0;
+
/* clone any lsm security options from the parent to the new sb */
if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
return -ESTALE;
- return security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
+
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+ kflags |= SECURITY_LSM_NATIVE_LABELS;
+
+ error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags,
+ &kflags_out);
+ if (error)
+ return error;
+
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+ !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+ NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213afc854..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
u8 *buf, *d, type, assoc;
int error;
+ if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+ return -EINVAL;
+
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
goto out_free_buf;
}
req = scsi_req(rq);
- scsi_req_init(rq);
error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
if (error)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2be32955d7f2..38d0383dc7f9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -911,24 +911,13 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
- mm_segment_t oldfs;
+ struct iov_iter iter;
int host_err;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
- set_fs(oldfs);
- return nfsd_finish_read(file, count, host_err);
-}
+ iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+ host_err = vfs_iter_read(file, &iter, &offset, 0);
-static __be32
-nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
-{
- if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- return nfsd_splice_read(rqstp, file, offset, count);
- else
- return nfsd_readv(file, offset, vec, vlen, count);
+ return nfsd_finish_read(file, count, host_err);
}
/*
@@ -974,7 +963,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
unsigned long *cnt, int stable)
{
struct svc_export *exp;
- mm_segment_t oldfs;
+ struct iov_iter iter;
__be32 err = 0;
int host_err;
int use_wgather;
@@ -1000,10 +989,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (stable && !use_wgather)
flags |= RWF_SYNC;
- /* Write the data. */
- oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, flags);
- set_fs(oldfs);
+ iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
*cnt = host_err;
@@ -1044,7 +1031,12 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ra = nfsd_init_raparms(file);
trace_read_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
+
+ if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
+ err = nfsd_splice_read(rqstp, file, offset, count);
+ else
+ err = nfsd_readv(file, offset, vec, vlen, count);
+
trace_read_io_done(rqstp, fhp, offset, vlen);
if (ra)
@@ -1464,41 +1456,34 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32
nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
{
- mm_segment_t oldfs;
__be32 err;
- int host_err;
+ const char *link;
struct path path;
+ DEFINE_DELAYED_CALL(done);
+ int len;
err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
- if (err)
- goto out;
+ if (unlikely(err))
+ return err;
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
- err = nfserr_inval;
- if (!d_is_symlink(path.dentry))
- goto out;
+ if (unlikely(!d_is_symlink(path.dentry)))
+ return nfserr_inval;
touch_atime(&path);
- /* N.B. Why does this call need a get_fs()??
- * Remove the set_fs and watch the fireworks:-) --okir
- */
- oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_readlink(path.dentry, (char __user *)buf, *lenp);
- set_fs(oldfs);
-
- if (host_err < 0)
- goto out_nfserr;
- *lenp = host_err;
- err = 0;
-out:
- return err;
+ link = vfs_get_link(path.dentry, &done);
+ if (IS_ERR(link))
+ return nfserrno(PTR_ERR(link));
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
+ len = strlen(link);
+ if (len < *lenp)
+ *lenp = len;
+ memcpy(buf, link, *lenp);
+ do_delayed_call(&done);
+ return 0;
}
/*
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
{
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index febed1217b3f..70ded52dc1dd 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
}
struct nilfs_segctor_wait_request {
- wait_queue_t wq;
+ wait_queue_entry_t wq;
__u32 seq;
int err;
atomic_t done;
@@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
unsigned long flags;
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
- list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
- wq.task_list) {
+ list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
if (!atomic_read(&wrq->done) &&
nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
wrq->err = err;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 01a9f0f007d4..0c4583b61717 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -161,16 +161,20 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
if (unlikely(!fsnotify_inode_watches_children(p_inode)))
__fsnotify_update_child_dentry_flags(p_inode);
else if (p_inode->i_fsnotify_mask & mask) {
+ struct name_snapshot name;
+
/* we are notifying a parent so come up with the new mask which
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
+ take_dentry_name_snapshot(&name, dentry);
if (path)
ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
- dentry->d_name.name, 0);
+ name.name, 0);
else
ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
- dentry->d_name.name, 0);
+ name.name, 0);
+ release_dentry_name_snapshot(&name);
}
dput(parent);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (bio->bi_error) {
- mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
- wc->wc_error = bio->bi_error;
+ if (bio->bi_status) {
+ mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+ wc->wc_error = blk_status_to_errno(bio->bi_status);
}
o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 564c504d6efd..74a21f6695c8 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -426,6 +426,7 @@ static int sc_fop_release(struct inode *inode, struct file *file)
struct o2net_sock_container *dummy_sc = sd->dbg_sock;
o2net_debug_del_sc(dummy_sc);
+ kfree(dummy_sc);
return seq_release_private(inode, file);
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937a36b5..4689940a953c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ /* had_lock means that the currect process already takes the cluster
+ * lock previously. If had_lock is 1, we have nothing to do here, and
+ * it will get unlocked where we got the lock.
+ */
if (!had_lock) {
ocfs2_remove_holder(lockres, oh);
ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 382401d3e88f..1a1e0078ab38 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -136,7 +136,7 @@ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type)
{
- int rc = 0;
+ int rc = -ESTALE;
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 44d178b8d1aa..5bb4a89f9045 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -25,6 +25,8 @@
#ifndef _OCFS2_FS_H
#define _OCFS2_FS_H
+#include <linux/magic.h>
+
/* Version */
#define OCFS2_MAJOR_REV_LEVEL 0
#define OCFS2_MINOR_REV_LEVEL 90
@@ -56,9 +58,6 @@
#define OCFS2_MIN_BLOCKSIZE 512
#define OCFS2_MAX_BLOCKSIZE OCFS2_MIN_CLUSTERSIZE
-/* Filesystem magic number */
-#define OCFS2_SUPER_MAGIC 0x7461636f
-
/* Object signatures */
#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
#define OCFS2_INODE_SIGNATURE "INODE01"
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 820359096c7a..d6c350ba25b9 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -631,7 +631,7 @@ static struct attribute *ocfs2_attrs[] = {
NULL,
};
-static struct attribute_group ocfs2_attr_group = {
+static const struct attribute_group ocfs2_attr_group = {
.attrs = ocfs2_attrs,
};
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d9b3a5..f70c3778d600 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
void *buffer,
size_t buffer_size)
{
- int ret;
+ int ret, had_lock;
struct buffer_head *di_bh = NULL;
+ struct ocfs2_lock_holder oh;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+ if (had_lock < 0) {
+ mlog_errno(had_lock);
+ return had_lock;
}
down_read(&OCFS2_I(inode)->ip_xattr_sem);
ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
name, buffer, buffer_size);
up_read(&OCFS2_I(inode)->ip_xattr_sem);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
brelse(di_bh);
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret, credits, ref_meta = 0, ref_credits = 0;
+ int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
struct ocfs2_refcount_tree *ref_tree = NULL;
+ struct ocfs2_lock_holder oh;
struct ocfs2_xattr_info xi = {
.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
return -ENOMEM;
}
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+ if (had_lock < 0) {
+ ret = had_lock;
mlog_errno(ret);
goto cleanup_nolock;
}
@@ -3670,7 +3673,7 @@ cleanup:
if (ret)
mlog_errno(ret);
}
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..35bb784763a4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -707,6 +707,9 @@ static int do_dentry_open(struct file *f,
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
+ /* Ensure that we skip any errors that predate opening of the file */
+ f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
+
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH;
f->f_op = &empty_fops;
@@ -759,6 +762,7 @@ static int do_dentry_open(struct file *f,
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 83b506020718..038d67545d9f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -46,8 +46,8 @@ static void run_down(struct slot_map *m)
spin_lock(&m->q.lock);
if (m->c != -1) {
for (;;) {
- if (likely(list_empty(&wait.task_list)))
- __add_wait_queue_tail(&m->q, &wait);
+ if (likely(list_empty(&wait.entry)))
+ __add_wait_queue_entry_tail(&m->q, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
if (m->c == -1)
@@ -84,8 +84,8 @@ static int wait_for_free(struct slot_map *m)
do {
long n = left, t;
- if (likely(list_empty(&wait.task_list)))
- __add_wait_queue_tail_exclusive(&m->q, &wait);
+ if (likely(list_empty(&wait.entry)))
+ __add_wait_queue_entry_tail_exclusive(&m->q, &wait);
set_current_state(TASK_INTERRUPTIBLE);
if (m->c > 0)
@@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m)
left = -EINTR;
} while (left > 0);
- if (!list_empty(&wait.task_list))
- list_del(&wait.task_list);
+ if (!list_empty(&wait.entry))
+ list_del(&wait.entry);
else if (left <= 0 && waitqueue_active(&m->q))
__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
__set_current_state(TASK_RUNNING);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 33fe6ca929f7..e5869f91b3ab 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -329,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
.link = link
};
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto out;
-
err = security_inode_copy_up(dentry, &new_creds);
if (err < 0)
- goto out1;
+ goto out;
if (new_creds)
old_creds = override_creds(new_creds);
@@ -361,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
}
if (err)
- goto out2;
+ goto out;
if (S_ISREG(stat->mode)) {
struct path upperpath;
@@ -397,10 +391,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
+ *
+ * Don't set origin when we are breaking the association with a lower
+ * hard link.
*/
- err = ovl_set_origin(dentry, lowerpath->dentry, temp);
- if (err)
+ if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+ err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (err)
+ goto out_cleanup;
+ }
+
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ if (IS_ERR(upper)) {
+ err = PTR_ERR(upper);
+ upper = NULL;
goto out_cleanup;
+ }
if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
@@ -415,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, pstat);
-out2:
+out:
dput(temp);
-out1:
dput(upper);
-out:
return err;
out_cleanup:
if (!tmpfile)
ovl_cleanup(wdir, temp);
- goto out2;
+ goto out;
}
/*
diff --git a/fs/pnode.c b/fs/pnode.c
index 5bc7896d122a..53d411a371ce 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
}
+static inline struct mount *last_slave(struct mount *p)
+{
+ return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+}
+
static inline struct mount *next_slave(struct mount *p)
{
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -162,6 +167,19 @@ static struct mount *propagation_next(struct mount *m,
}
}
+static struct mount *skip_propagation_subtree(struct mount *m,
+ struct mount *origin)
+{
+ /*
+ * Advance m such that propagation_next will not return
+ * the slaves of m.
+ */
+ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ m = last_slave(m);
+
+ return m;
+}
+
static struct mount *next_group(struct mount *m, struct mount *origin)
{
while (1) {
@@ -413,65 +431,104 @@ void propagate_mount_unlock(struct mount *mnt)
}
}
-/*
- * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
- */
-static void mark_umount_candidates(struct mount *mnt)
+static void umount_one(struct mount *mnt, struct list_head *to_umount)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
-
- BUG_ON(parent == mnt);
-
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
- continue;
- if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
- SET_MNT_MARK(child);
- }
- }
+ CLEAR_MNT_MARK(mnt);
+ mnt->mnt.mnt_flags |= MNT_UMOUNT;
+ list_del_init(&mnt->mnt_child);
+ list_del_init(&mnt->mnt_umounting);
+ list_move_tail(&mnt->mnt_list, to_umount);
}
/*
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
* parent propagates to.
*/
-static void __propagate_umount(struct mount *mnt)
+static bool __propagate_umount(struct mount *mnt,
+ struct list_head *to_umount,
+ struct list_head *to_restore)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
+ bool progress = false;
+ struct mount *child;
- BUG_ON(parent == mnt);
+ /*
+ * The state of the parent won't change if this mount is
+ * already unmounted or marked as without children.
+ */
+ if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
+ goto out;
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *topper;
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- /*
- * umount the child only if the child has no children
- * and the child is marked safe to unmount.
- */
- if (!child || !IS_MNT_MARKED(child))
+ /* Verify topper is the only grandchild that has not been
+ * speculatively unmounted.
+ */
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
continue;
- CLEAR_MNT_MARK(child);
+ if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
+ continue;
+ /* Found a mounted child */
+ goto children;
+ }
- /* If there is exactly one mount covering all of child
- * replace child with that mount.
- */
- topper = find_topper(child);
- if (topper)
- mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
- topper);
+ /* Mark mounts that can be unmounted if not locked */
+ SET_MNT_MARK(mnt);
+ progress = true;
+
+ /* If a mount is without children and not locked umount it. */
+ if (!IS_MNT_LOCKED(mnt)) {
+ umount_one(mnt, to_umount);
+ } else {
+children:
+ list_move_tail(&mnt->mnt_umounting, to_restore);
+ }
+out:
+ return progress;
+}
+
+static void umount_list(struct list_head *to_umount,
+ struct list_head *to_restore)
+{
+ struct mount *mnt, *child, *tmp;
+ list_for_each_entry(mnt, to_umount, mnt_list) {
+ list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
+ /* topper? */
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
+ list_move_tail(&child->mnt_umounting, to_restore);
+ else
+ umount_one(child, to_umount);
+ }
+ }
+}
- if (list_empty(&child->mnt_mounts)) {
- list_del_init(&child->mnt_child);
- child->mnt.mnt_flags |= MNT_UMOUNT;
- list_move_tail(&child->mnt_list, &mnt->mnt_list);
+static void restore_mounts(struct list_head *to_restore)
+{
+ /* Restore mounts to a clean working state */
+ while (!list_empty(to_restore)) {
+ struct mount *mnt, *parent;
+ struct mountpoint *mp;
+
+ mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
+ CLEAR_MNT_MARK(mnt);
+ list_del_init(&mnt->mnt_umounting);
+
+ /* Should this mount be reparented? */
+ mp = mnt->mnt_mp;
+ parent = mnt->mnt_parent;
+ while (parent->mnt.mnt_flags & MNT_UMOUNT) {
+ mp = parent->mnt_mp;
+ parent = parent->mnt_parent;
}
+ if (parent != mnt->mnt_parent)
+ mnt_change_mountpoint(parent, mp, mnt);
+ }
+}
+
+static void cleanup_umount_visitations(struct list_head *visited)
+{
+ while (!list_empty(visited)) {
+ struct mount *mnt =
+ list_first_entry(visited, struct mount, mnt_umounting);
+ list_del_init(&mnt->mnt_umounting);
}
}
@@ -485,11 +542,68 @@ static void __propagate_umount(struct mount *mnt)
int propagate_umount(struct list_head *list)
{
struct mount *mnt;
+ LIST_HEAD(to_restore);
+ LIST_HEAD(to_umount);
+ LIST_HEAD(visited);
+
+ /* Find candidates for unmounting */
+ list_for_each_entry_reverse(mnt, list, mnt_list) {
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+
+ /*
+ * If this mount has already been visited it is known that it's
+ * entire peer group and all of their slaves in the propagation
+ * tree for the mountpoint has already been visited and there is
+ * no need to visit them again.
+ */
+ if (!list_empty(&mnt->mnt_umounting))
+ continue;
+
+ list_add_tail(&mnt->mnt_umounting, &visited);
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ if (!list_empty(&child->mnt_umounting)) {
+ /*
+ * If the child has already been visited it is
+ * know that it's entire peer group and all of
+ * their slaves in the propgation tree for the
+ * mountpoint has already been visited and there
+ * is no need to visit this subtree again.
+ */
+ m = skip_propagation_subtree(m, parent);
+ continue;
+ } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
+ /*
+ * We have come accross an partially unmounted
+ * mount in list that has not been visited yet.
+ * Remember it has been visited and continue
+ * about our merry way.
+ */
+ list_add_tail(&child->mnt_umounting, &visited);
+ continue;
+ }
+
+ /* Check the child and parents while progress is made */
+ while (__propagate_umount(child,
+ &to_umount, &to_restore)) {
+ /* Is the parent a umount candidate? */
+ child = child->mnt_parent;
+ if (list_empty(&child->mnt_umounting))
+ break;
+ }
+ }
+ }
- list_for_each_entry_reverse(mnt, list, mnt_list)
- mark_umount_candidates(mnt);
+ umount_list(&to_umount, &to_restore);
+ restore_mounts(&to_restore);
+ cleanup_umount_visitations(&visited);
+ list_splice_tail(&to_umount, list);
- list_for_each_entry(mnt, list, mnt_list)
- __propagate_umount(mnt);
return 0;
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 4ee55274f155..45629f4b5402 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (&m->list == &kclist_head) {
if (clear_user(buffer, tsz))
return -EFAULT;
- } else if (is_vmalloc_or_module_addr((void *)start)) {
+ } else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
if (copy_to_user(buffer, buf, tsz))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..520802da059c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (stack_guard_page_start(vma, start))
- start += PAGE_SIZE;
end = vma->vm_end;
- if (stack_guard_page_end(vma, end))
- end -= PAGE_SIZE;
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 792a4e5f9226..4d02c3b65061 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -349,48 +349,48 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
switch (record->type) {
case PSTORE_TYPE_DMESG:
- scnprintf(name, sizeof(name), "dmesg-%s-%lld%s",
+ scnprintf(name, sizeof(name), "dmesg-%s-%llu%s",
record->psi->name, record->id,
record->compressed ? ".enc.z" : "");
break;
case PSTORE_TYPE_CONSOLE:
- scnprintf(name, sizeof(name), "console-%s-%lld",
+ scnprintf(name, sizeof(name), "console-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_FTRACE:
- scnprintf(name, sizeof(name), "ftrace-%s-%lld",
+ scnprintf(name, sizeof(name), "ftrace-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_MCE:
- scnprintf(name, sizeof(name), "mce-%s-%lld",
+ scnprintf(name, sizeof(name), "mce-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_RTAS:
- scnprintf(name, sizeof(name), "rtas-%s-%lld",
+ scnprintf(name, sizeof(name), "rtas-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_OF:
- scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-ofw-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_COMMON:
- scnprintf(name, sizeof(name), "powerpc-common-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-common-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PMSG:
- scnprintf(name, sizeof(name), "pmsg-%s-%lld",
+ scnprintf(name, sizeof(name), "pmsg-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_OPAL:
- scnprintf(name, sizeof(name), "powerpc-opal-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-opal-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_UNKNOWN:
- scnprintf(name, sizeof(name), "unknown-%s-%lld",
+ scnprintf(name, sizeof(name), "unknown-%s-%llu",
record->psi->name, record->id);
break;
default:
- scnprintf(name, sizeof(name), "type%d-%s-%lld",
+ scnprintf(name, sizeof(name), "type%d-%s-%llu",
record->type, record->psi->name, record->id);
break;
}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index c416e653dc4f..58051265626f 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -30,5 +30,7 @@ extern void pstore_get_backend_records(struct pstore_info *psi,
extern int pstore_mkfile(struct dentry *root,
struct pstore_record *record);
extern bool pstore_is_mounted(void);
+extern void pstore_record_init(struct pstore_record *record,
+ struct pstore_info *psi);
#endif
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d468eec9b8a6..1b6e0ff6bff5 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -474,6 +474,20 @@ static size_t copy_kmsg_to_buffer(int hsize, size_t len)
return total_len;
}
+void pstore_record_init(struct pstore_record *record,
+ struct pstore_info *psinfo)
+{
+ memset(record, 0, sizeof(*record));
+
+ record->psi = psinfo;
+
+ /* Report zeroed timestamp if called before timekeeping has resumed. */
+ if (__getnstimeofday(&record->time)) {
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ }
+}
+
/*
* callback from kmsg_dump. (s2,l2) has the most recently
* written bytes, older bytes are in (s1,l1). Save as much
@@ -509,15 +523,14 @@ static void pstore_dump(struct kmsg_dumper *dumper,
int header_size;
int zipped_len = -1;
size_t dump_size;
- struct pstore_record record = {
- .type = PSTORE_TYPE_DMESG,
- .count = oopscount,
- .reason = reason,
- .part = part,
- .compressed = false,
- .buf = psinfo->buf,
- .psi = psinfo,
- };
+ struct pstore_record record;
+
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_DMESG;
+ record.count = oopscount;
+ record.reason = reason;
+ record.part = part;
+ record.buf = psinfo->buf;
if (big_oops_buf && is_locked) {
dst = big_oops_buf;
@@ -587,12 +600,12 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
const char *e = s + c;
while (s < e) {
- struct pstore_record record = {
- .type = PSTORE_TYPE_CONSOLE,
- .psi = psinfo,
- };
+ struct pstore_record record;
unsigned long flags;
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_CONSOLE;
+
if (c > psinfo->bufsize)
c = psinfo->bufsize;
@@ -640,19 +653,16 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = kmalloc(record->size, GFP_KERNEL);
- if (!record->buf)
- return -ENOMEM;
-
- if (unlikely(copy_from_user(record->buf, buf, record->size))) {
- ret = -EFAULT;
+ record->buf = memdup_user(buf, record->size);
+ if (unlikely(IS_ERR(record->buf))) {
+ ret = PTR_ERR(record->buf);
goto out;
}
ret = record->psi->write(record);
-out:
kfree(record->buf);
+out:
record->buf = NULL;
return unlikely(ret < 0) ? ret : record->size;
@@ -770,8 +780,11 @@ static void decompress_record(struct pstore_record *record)
int unzipped_len;
char *decompressed;
+ if (!record->compressed)
+ return;
+
/* Only PSTORE_TYPE_DMESG support compression. */
- if (!record->compressed || record->type != PSTORE_TYPE_DMESG) {
+ if (record->type != PSTORE_TYPE_DMESG) {
pr_warn("ignored compressed record type %d\n", record->type);
return;
}
@@ -819,6 +832,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
struct dentry *root, int quiet)
{
int failed = 0;
+ unsigned int stop_loop = 65536;
if (!psi || !root)
return;
@@ -832,7 +846,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
* may reallocate record.buf. On success, pstore_mkfile() will keep
* the record.buf, so free it only on failure.
*/
- for (;;) {
+ for (; stop_loop; stop_loop--) {
struct pstore_record *record;
int rc;
@@ -841,13 +855,15 @@ void pstore_get_backend_records(struct pstore_info *psi,
pr_err("out of memory creating record\n");
break;
}
- record->psi = psi;
+ pstore_record_init(record, psi);
record->size = psi->read(record);
/* No more records left in backend? */
- if (record->size <= 0)
+ if (record->size <= 0) {
+ kfree(record);
break;
+ }
decompress_record(record);
rc = pstore_mkfile(root, record);
@@ -865,8 +881,11 @@ out:
mutex_unlock(&psi->read_mutex);
if (failed)
- pr_warn("failed to load %d record(s) from '%s'\n",
+ pr_warn("failed to create %d record(s) from '%s'\n",
failed, psi->name);
+ if (!stop_loop)
+ pr_err("looping? Too many records seen from '%s'\n",
+ psi->name);
}
static void pstore_dowork(struct work_struct *work)
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 209755e0d7c8..24db02de1787 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -22,16 +22,16 @@ static DEFINE_MUTEX(pmsg_lock);
static ssize_t write_pmsg(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct pstore_record record = {
- .type = PSTORE_TYPE_PMSG,
- .size = count,
- .psi = psinfo,
- };
+ struct pstore_record record;
int ret;
if (!count)
return 0;
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_PMSG;
+ record.size = count;
+
/* check outside lock, page in any data. write_user also checks */
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5cb022c8cd33..7125b398d312 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <linux/version.h>
#include <linux/pstore.h>
-#include <linux/time.h>
#include <linux/io.h>
#include <linux/ioport.h>
#include <linux/platform_device.h>
@@ -356,20 +355,15 @@ out:
}
static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
- bool compressed)
+ struct pstore_record *record)
{
char *hdr;
- struct timespec timestamp;
size_t len;
- /* Report zeroed timestamp if called before timekeeping has resumed. */
- if (__getnstimeofday(&timestamp)) {
- timestamp.tv_sec = 0;
- timestamp.tv_nsec = 0;
- }
hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n",
- (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000),
- compressed ? 'C' : 'D');
+ record->time.tv_sec,
+ record->time.tv_nsec / 1000,
+ record->compressed ? 'C' : 'D');
WARN_ON_ONCE(!hdr);
len = hdr ? strlen(hdr) : 0;
persistent_ram_write(prz, hdr, len);
@@ -440,7 +434,7 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
prz = cxt->dprzs[cxt->dump_write_cnt];
/* Build header and append record contents. */
- hlen = ramoops_write_kmsg_hdr(prz, record->compressed);
+ hlen = ramoops_write_kmsg_hdr(prz, record);
size = record->size;
if (size + hlen > prz->buffer_size)
size = prz->buffer_size - hlen;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 48813aeaab80..53a17496c5c5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1910,6 +1910,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
qsize_t space, cur_space;
qsize_t rsv_space = 0;
+ qsize_t inode_usage = 1;
struct dquot *transfer_from[MAXQUOTAS] = {};
int cnt, ret = 0;
char is_valid[MAXQUOTAS] = {};
@@ -1919,6 +1920,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (IS_NOQUOTA(inode))
return 0;
+
+ if (inode->i_sb->dq_op->get_inode_usage) {
+ ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage);
+ if (ret)
+ return ret;
+ }
+
/* Initialize the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
warn_to[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1946,7 +1954,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue;
is_valid[cnt] = 1;
transfer_from[cnt] = i_dquot(inode)[cnt];
- ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
+ ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
if (ret)
goto over_quota;
ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
@@ -1963,7 +1971,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
int wtype;
- wtype = info_idq_free(transfer_from[cnt], 1);
+ wtype = info_idq_free(transfer_from[cnt], inode_usage);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_inodes[cnt],
transfer_from[cnt], wtype);
@@ -1971,13 +1979,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_space[cnt],
transfer_from[cnt], wtype);
- dquot_decr_inodes(transfer_from[cnt], 1);
+ dquot_decr_inodes(transfer_from[cnt], inode_usage);
dquot_decr_space(transfer_from[cnt], cur_space);
dquot_free_reserved_space(transfer_from[cnt],
rsv_space);
}
- dquot_incr_inodes(transfer_to[cnt], 1);
+ dquot_incr_inodes(transfer_to[cnt], inode_usage);
dquot_incr_space(transfer_to[cnt], cur_space);
dquot_resv_space(transfer_to[cnt], rsv_space);
diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d4484df9..0cc7033aa413 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -356,46 +356,6 @@ out_putf:
}
#endif
-ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- if (!file->f_op->read_iter)
- return -EINVAL;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- iter->type |= READ;
- ret = call_read_iter(file, &kiocb, iter);
- BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(vfs_iter_read);
-
-ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
-{
- struct kiocb kiocb;
- ssize_t ret;
-
- if (!file->f_op->write_iter)
- return -EINVAL;
-
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = *ppos;
-
- iter->type |= WRITE;
- ret = call_write_iter(file, &kiocb, iter);
- BUG_ON(ret == -EIOCBQUEUED);
- if (ret > 0)
- *ppos = kiocb.ki_pos;
- return ret;
-}
-EXPORT_SYMBOL(vfs_iter_write);
-
int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
struct inode *inode;
@@ -678,16 +638,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
struct kiocb kiocb;
ssize_t ret;
- if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
- return -EOPNOTSUPP;
-
init_sync_kiocb(&kiocb, filp);
- if (flags & RWF_HIPRI)
- kiocb.ki_flags |= IOCB_HIPRI;
- if (flags & RWF_DSYNC)
- kiocb.ki_flags |= IOCB_DSYNC;
- if (flags & RWF_SYNC)
- kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ ret = kiocb_set_rw_flags(&kiocb, flags);
+ if (ret)
+ return ret;
kiocb.ki_pos = *ppos;
if (type == READ)
@@ -916,86 +870,114 @@ out:
}
#endif
-static ssize_t __do_readv_writev(int type, struct file *file,
- struct iov_iter *iter, loff_t *pos, int flags)
+static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
+ loff_t *pos, int flags)
{
size_t tot_len;
ssize_t ret = 0;
+ if (!(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_READ))
+ return -EINVAL;
+
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
- ret = rw_verify_area(type, file, pos, tot_len);
+ ret = rw_verify_area(READ, file, pos, tot_len);
if (ret < 0)
- goto out;
-
- if (type != READ)
- file_start_write(file);
+ return ret;
- if ((type == READ && file->f_op->read_iter) ||
- (type == WRITE && file->f_op->write_iter))
- ret = do_iter_readv_writev(file, iter, pos, type, flags);
+ if (file->f_op->read_iter)
+ ret = do_iter_readv_writev(file, iter, pos, READ, flags);
else
- ret = do_loop_readv_writev(file, iter, pos, type, flags);
-
- if (type != READ)
- file_end_write(file);
-
+ ret = do_loop_readv_writev(file, iter, pos, READ, flags);
out:
- if ((ret + (type == READ)) > 0) {
- if (type == READ)
- fsnotify_access(file);
- else
- fsnotify_modify(file);
- }
+ if (ret >= 0)
+ fsnotify_access(file);
return ret;
}
-static ssize_t do_readv_writev(int type, struct file *file,
- const struct iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos,
- int flags)
+ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags)
{
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
- ssize_t ret;
+ if (!file->f_op->read_iter)
+ return -EINVAL;
+ return do_iter_read(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_read);
+
+static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
+ loff_t *pos, int flags)
+{
+ size_t tot_len;
+ ssize_t ret = 0;
- ret = import_iovec(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), &iov, &iter);
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EINVAL;
+
+ tot_len = iov_iter_count(iter);
+ if (!tot_len)
+ return 0;
+ ret = rw_verify_area(WRITE, file, pos, tot_len);
if (ret < 0)
return ret;
- ret = __do_readv_writev(type, file, &iter, pos, flags);
- kfree(iov);
-
+ if (file->f_op->write_iter)
+ ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
+ else
+ ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
+ if (ret > 0)
+ fsnotify_modify(file);
return ret;
}
+ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
+ int flags)
+{
+ if (!file->f_op->write_iter)
+ return -EINVAL;
+ return do_iter_write(file, iter, ppos, flags);
+}
+EXPORT_SYMBOL(vfs_iter_write);
+
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
- if (!(file->f_mode & FMODE_READ))
- return -EBADF;
- if (!(file->f_mode & FMODE_CAN_READ))
- return -EINVAL;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t ret;
- return do_readv_writev(READ, file, vec, vlen, pos, flags);
-}
+ ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret >= 0) {
+ ret = do_iter_read(file, &iter, pos, flags);
+ kfree(iov);
+ }
+ return ret;
+}
EXPORT_SYMBOL(vfs_readv);
ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
- if (!(file->f_mode & FMODE_WRITE))
- return -EBADF;
- if (!(file->f_mode & FMODE_CAN_WRITE))
- return -EINVAL;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t ret;
- return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
+ ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret >= 0) {
+ file_start_write(file);
+ ret = do_iter_write(file, &iter, pos, flags);
+ file_end_write(file);
+ kfree(iov);
+ }
+ return ret;
}
-
EXPORT_SYMBOL(vfs_writev);
static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
@@ -1143,44 +1125,20 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
}
#ifdef CONFIG_COMPAT
-
-static ssize_t compat_do_readv_writev(int type, struct file *file,
- const struct compat_iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos,
- int flags)
+static size_t compat_readv(struct file *file,
+ const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t *pos, int flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- ret = compat_import_iovec(type, uvector, nr_segs,
- UIO_FASTIOV, &iov, &iter);
- if (ret < 0)
- return ret;
-
- ret = __do_readv_writev(type, file, &iter, pos, flags);
- kfree(iov);
-
- return ret;
-}
-
-static size_t compat_readv(struct file *file,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos, int flags)
-{
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_READ))
- goto out;
-
- ret = -EINVAL;
- if (!(file->f_mode & FMODE_CAN_READ))
- goto out;
-
- ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
-
-out:
+ ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ if (ret >= 0) {
+ ret = do_iter_read(file, &iter, pos, flags);
+ kfree(iov);
+ }
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
@@ -1276,18 +1234,18 @@ static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
unsigned long vlen, loff_t *pos, int flags)
{
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_WRITE))
- goto out;
-
- ret = -EINVAL;
- if (!(file->f_mode & FMODE_CAN_WRITE))
- goto out;
-
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ ssize_t ret;
-out:
+ ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ if (ret >= 0) {
+ file_start_write(file);
+ ret = do_iter_write(file, &iter, pos, flags);
+ file_end_write(file);
+ kfree(iov);
+ }
if (ret > 0)
add_wchar(current, ret);
inc_syscw(current);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 39bb1e838d8d..a11d773e5ff3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s)
static void queue_log_writer(struct super_block *s)
{
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct reiserfs_journal *journal = SB_JOURNAL(s);
set_bit(J_WRITERS_QUEUED, &journal->j_state);
diff --git a/fs/select.c b/fs/select.c
index d6c652a31e99..9d5f15ed87fe 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
return table->entry++;
}
-static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_wqueues *pwq = wait->private;
DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
return default_wake_function(&dummy_wait, mode, sync, key);
}
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_table_entry *entry;
@@ -1161,59 +1161,25 @@ static
int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
unsigned long *fdset)
{
- nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
if (ufdset) {
- unsigned long odd;
-
- if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t)))
- return -EFAULT;
-
- odd = nr & 1UL;
- nr &= ~1UL;
- while (nr) {
- unsigned long h, l;
- if (__get_user(l, ufdset) || __get_user(h, ufdset+1))
- return -EFAULT;
- ufdset += 2;
- *fdset++ = h << 32 | l;
- nr -= 2;
- }
- if (odd && __get_user(*fdset, ufdset))
- return -EFAULT;
+ return compat_get_bitmap(fdset, ufdset, nr);
} else {
/* Tricky, must clear full unsigned long in the
- * kernel fdset at the end, this makes sure that
+ * kernel fdset at the end, ALIGN makes sure that
* actually happens.
*/
- memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t));
+ memset(fdset, 0, ALIGN(nr, BITS_PER_LONG));
+ return 0;
}
- return 0;
}
static
int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
unsigned long *fdset)
{
- unsigned long odd;
- nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
-
if (!ufdset)
return 0;
-
- odd = nr & 1UL;
- nr &= ~1UL;
- while (nr) {
- unsigned long h, l;
- l = *fdset++;
- h = l >> 32;
- if (__put_user(l, ufdset) || __put_user(h, ufdset+1))
- return -EFAULT;
- ufdset += 2;
- nr -= 2;
- }
- if (odd && __put_user(*fdset, ufdset))
- return -EFAULT;
- return 0;
+ return compat_put_bitmap(ufdset, fdset, nr);
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 7e3d71109f51..593b022ac11b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
if (likely(!waitqueue_active(wqh)))
return;
- /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+ /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
wake_up_poll(wqh, POLLHUP | POLLFREE);
}
diff --git a/fs/splice.c b/fs/splice.c
index 540c4a44756c..ae41201d0325 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -762,7 +762,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
sd.total_len - left);
- ret = vfs_iter_write(out, &from, &sd.pos);
+ ret = vfs_iter_write(out, &from, &sd.pos, 0);
if (ret <= 0)
break;
diff --git a/fs/statfs.c b/fs/statfs.c
index 4e4623c7a126..fab9b6a3c116 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -38,6 +38,8 @@ static int flags_by_sb(int s_flags)
flags |= ST_SYNCHRONOUS;
if (s_flags & MS_MANDLOCK)
flags |= ST_MANDLOCK;
+ if (s_flags & MS_RDONLY)
+ flags |= ST_RDONLY;
return flags;
}
@@ -244,6 +246,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
#ifdef CONFIG_COMPAT
static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf)
{
+ struct compat_statfs buf;
if (sizeof ubuf->f_blocks == 4) {
if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
@@ -257,20 +260,20 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
&& (kbuf->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) ||
- __put_user(kbuf->f_type, &ubuf->f_type) ||
- __put_user(kbuf->f_bsize, &ubuf->f_bsize) ||
- __put_user(kbuf->f_blocks, &ubuf->f_blocks) ||
- __put_user(kbuf->f_bfree, &ubuf->f_bfree) ||
- __put_user(kbuf->f_bavail, &ubuf->f_bavail) ||
- __put_user(kbuf->f_files, &ubuf->f_files) ||
- __put_user(kbuf->f_ffree, &ubuf->f_ffree) ||
- __put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
- __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
- __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
- __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
- __put_user(kbuf->f_flags, &ubuf->f_flags) ||
- __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
+ memset(&buf, 0, sizeof(struct compat_statfs));
+ buf.f_type = kbuf->f_type;
+ buf.f_bsize = kbuf->f_bsize;
+ buf.f_blocks = kbuf->f_blocks;
+ buf.f_bfree = kbuf->f_bfree;
+ buf.f_bavail = kbuf->f_bavail;
+ buf.f_files = kbuf->f_files;
+ buf.f_ffree = kbuf->f_ffree;
+ buf.f_namelen = kbuf->f_namelen;
+ buf.f_fsid.val[0] = kbuf->f_fsid.val[0];
+ buf.f_fsid.val[1] = kbuf->f_fsid.val[1];
+ buf.f_frsize = kbuf->f_frsize;
+ buf.f_flags = kbuf->f_flags;
+ if (copy_to_user(ubuf, &buf, sizeof(struct compat_statfs)))
return -EFAULT;
return 0;
}
@@ -299,6 +302,7 @@ COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *,
static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
{
+ struct compat_statfs64 buf;
if (sizeof(ubuf->f_bsize) == 4) {
if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen |
kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL)
@@ -312,20 +316,20 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
&& (kbuf->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) ||
- __put_user(kbuf->f_type, &ubuf->f_type) ||
- __put_user(kbuf->f_bsize, &ubuf->f_bsize) ||
- __put_user(kbuf->f_blocks, &ubuf->f_blocks) ||
- __put_user(kbuf->f_bfree, &ubuf->f_bfree) ||
- __put_user(kbuf->f_bavail, &ubuf->f_bavail) ||
- __put_user(kbuf->f_files, &ubuf->f_files) ||
- __put_user(kbuf->f_ffree, &ubuf->f_ffree) ||
- __put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
- __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
- __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
- __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
- __put_user(kbuf->f_flags, &ubuf->f_flags) ||
- __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
+ memset(&buf, 0, sizeof(struct compat_statfs64));
+ buf.f_type = kbuf->f_type;
+ buf.f_bsize = kbuf->f_bsize;
+ buf.f_blocks = kbuf->f_blocks;
+ buf.f_bfree = kbuf->f_bfree;
+ buf.f_bavail = kbuf->f_bavail;
+ buf.f_files = kbuf->f_files;
+ buf.f_ffree = kbuf->f_ffree;
+ buf.f_namelen = kbuf->f_namelen;
+ buf.f_fsid.val[0] = kbuf->f_fsid.val[0];
+ buf.f_fsid.val[1] = kbuf->f_fsid.val[1];
+ buf.f_frsize = kbuf->f_frsize;
+ buf.f_flags = kbuf->f_flags;
+ if (copy_to_user(ubuf, &buf, sizeof(struct compat_statfs64)))
return -EFAULT;
return 0;
}
diff --git a/fs/sync.c b/fs/sync.c
index 11ba023434b1..2a54c1f22035 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
spin_unlock(&inode->i_lock);
mark_inode_dirty_sync(inode);
}
- return call_fsync(file, start, end, datasync);
+ return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 5bdae85ceef7..f5191cb2c947 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -45,7 +45,7 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
mark_inode_dirty(dir);
}
if (IS_DIRSYNC(dir))
- err = write_one_page(page, 1);
+ err = write_one_page(page);
else
unlock_page(page);
return err;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c543cdb5f8ed..ece0c02d7e63 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -169,7 +169,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
}
static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
- const struct itimerspec *ktmr)
+ const struct itimerspec64 *ktmr)
{
enum hrtimer_mode htmode;
ktime_t texp;
@@ -178,10 +178,10 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
htmode = (flags & TFD_TIMER_ABSTIME) ?
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
- texp = timespec_to_ktime(ktmr->it_value);
+ texp = timespec64_to_ktime(ktmr->it_value);
ctx->expired = 0;
ctx->ticks = 0;
- ctx->tintv = timespec_to_ktime(ktmr->it_interval);
+ ctx->tintv = timespec64_to_ktime(ktmr->it_interval);
if (isalarm(ctx)) {
alarm_init(&ctx->t.alarm,
@@ -432,16 +432,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
}
static int do_timerfd_settime(int ufd, int flags,
- const struct itimerspec *new,
- struct itimerspec *old)
+ const struct itimerspec64 *new,
+ struct itimerspec64 *old)
{
struct fd f;
struct timerfd_ctx *ctx;
int ret;
if ((flags & ~TFD_SETTIME_FLAGS) ||
- !timespec_valid(&new->it_value) ||
- !timespec_valid(&new->it_interval))
+ !itimerspec64_valid(new))
return -EINVAL;
ret = timerfd_fget(ufd, &f);
@@ -487,8 +486,8 @@ static int do_timerfd_settime(int ufd, int flags,
hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
}
- old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
- old->it_interval = ktime_to_timespec(ctx->tintv);
+ old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
+ old->it_interval = ktime_to_timespec64(ctx->tintv);
/*
* Re-program the timer to the new value ...
@@ -500,7 +499,7 @@ static int do_timerfd_settime(int ufd, int flags,
return ret;
}
-static int do_timerfd_gettime(int ufd, struct itimerspec *t)
+static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
{
struct fd f;
struct timerfd_ctx *ctx;
@@ -525,8 +524,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t)
hrtimer_restart(&ctx->t.tmr);
}
}
- t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
- t->it_interval = ktime_to_timespec(ctx->tintv);
+ t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
+ t->it_interval = ktime_to_timespec64(ctx->tintv);
spin_unlock_irq(&ctx->wqh.lock);
fdput(f);
return 0;
@@ -536,15 +535,15 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
const struct itimerspec __user *, utmr,
struct itimerspec __user *, otmr)
{
- struct itimerspec new, old;
+ struct itimerspec64 new, old;
int ret;
- if (copy_from_user(&new, utmr, sizeof(new)))
+ if (get_itimerspec64(&new, utmr))
return -EFAULT;
ret = do_timerfd_settime(ufd, flags, &new, &old);
if (ret)
return ret;
- if (otmr && copy_to_user(otmr, &old, sizeof(old)))
+ if (otmr && put_itimerspec64(&old, otmr))
return -EFAULT;
return ret;
@@ -552,11 +551,11 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
{
- struct itimerspec kotmr;
+ struct itimerspec64 kotmr;
int ret = do_timerfd_gettime(ufd, &kotmr);
if (ret)
return ret;
- return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
+ return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
}
#ifdef CONFIG_COMPAT
@@ -564,15 +563,15 @@ COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
const struct compat_itimerspec __user *, utmr,
struct compat_itimerspec __user *, otmr)
{
- struct itimerspec new, old;
+ struct itimerspec64 new, old;
int ret;
- if (get_compat_itimerspec(&new, utmr))
+ if (get_compat_itimerspec64(&new, utmr))
return -EFAULT;
ret = do_timerfd_settime(ufd, flags, &new, &old);
if (ret)
return ret;
- if (otmr && put_compat_itimerspec(otmr, &old))
+ if (otmr && put_compat_itimerspec64(&old, otmr))
return -EFAULT;
return ret;
}
@@ -580,10 +579,10 @@ COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
struct compat_itimerspec __user *, otmr)
{
- struct itimerspec kotmr;
+ struct itimerspec64 kotmr;
int ret = do_timerfd_gettime(ufd, &kotmr);
if (ret)
return ret;
- return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0;
+ return put_compat_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
}
#endif
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index d642cc0a8271..f80be4c5df9d 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -400,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
/*
* There is not enough space for user on the device
*/
- if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
- UFSD("EXIT (FAILED)\n");
- return 0;
+ if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+ if (!capable(CAP_SYS_RESOURCE)) {
+ mutex_unlock(&UFS_SB(sb)->s_lock);
+ UFSD("EXIT (FAILED)\n");
+ return 0;
+ }
}
if (goal >= uspi->s_size)
@@ -421,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
if (result) {
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
}
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
@@ -439,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
result = ufs_add_fragments(inode, tmp, oldcount, newcount);
if (result) {
*err = 0;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
mutex_unlock(&UFS_SB(sb)->s_lock);
@@ -451,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
/*
* allocate new block and move data
*/
- switch (fs32_to_cpu(sb, usb1->fs_optim)) {
- case UFS_OPTSPACE:
+ if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
request = newcount;
- if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
- > uspi->s_dsize * uspi->s_minfree / (2 * 100))
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
- default:
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-
- case UFS_OPTTIME:
+ if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+ } else {
request = uspi->s_fpb;
- if (uspi->cs_total.cs_nffree < uspi->s_dsize *
- (uspi->s_minfree - 2) / 100)
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
+ if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
}
result = ufs_alloc_fragments (inode, cgno, goal, request, err);
if (result) {
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
+ mutex_unlock(&UFS_SB(sb)->s_lock);
ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index de01b8f2aa78..48609f1d9580 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -53,7 +53,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
mark_inode_dirty(dir);
}
if (IS_DIRSYNC(dir))
- err = write_one_page(page, 1);
+ err = write_one_page(page);
else
unlock_page(page);
return err;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index da553ffec85b..f36d6a53687d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -401,13 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
u64 phys64 = 0;
unsigned frag = fragment & uspi->s_fpbmask;
- if (!create) {
- phys64 = ufs_frag_map(inode, offsets, depth);
- if (phys64)
- map_bh(bh_result, sb, phys64 + frag);
- return 0;
- }
+ phys64 = ufs_frag_map(inode, offsets, depth);
+ if (!create)
+ goto done;
+ if (phys64) {
+ if (fragment >= UFS_NDIR_FRAGMENT)
+ goto done;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
+ if (fragment < UFS_I(inode)->i_lastfrag) {
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ goto done;
+ }
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ }
/* This code entered only while writing ....? */
mutex_lock(&UFS_I(inode)->truncate_mutex);
@@ -451,6 +458,11 @@ out:
}
mutex_unlock(&UFS_I(inode)->truncate_mutex);
return err;
+
+done:
+ if (phys64)
+ map_bh(bh_result, sb, phys64 + frag);
+ return 0;
}
static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -554,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
@@ -566,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
- inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
- inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
- inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+ inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+ inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+ inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
@@ -602,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
@@ -645,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * bh;
struct inode *inode;
- int err;
+ int err = -EIO;
UFSD("ENTER, ino %lu\n", ino);
@@ -680,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
err = ufs1_read_inode(inode,
ufs_inode + ufs_inotofsbo(inode->i_ino));
}
-
+ brelse(bh);
if (err)
goto bad_inode;
+
inode->i_version++;
ufsi->i_lastfrag =
(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -691,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
ufs_set_inode_ops(inode);
- brelse(bh);
-
UFSD("EXIT\n");
unlock_new_inode(inode);
return inode;
bad_inode:
iget_failed(inode);
- return ERR_PTR(-EIO);
+ return ERR_PTR(err);
}
static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
@@ -874,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
ctx->to = from + count;
}
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
static void ufs_trunc_direct(struct inode *inode)
@@ -1112,19 +1118,24 @@ static void ufs_truncate_blocks(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned offsets[4];
- int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+ int depth;
int depth2;
unsigned i;
struct ufs_buffer_head *ubh[3];
void *p;
u64 block;
- if (!depth)
- return;
+ if (inode->i_size) {
+ sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+ depth = ufs_block_to_path(inode, last, offsets);
+ if (!depth)
+ return;
+ } else {
+ depth = 1;
+ }
- /* find the last non-zero in offsets[] */
for (depth2 = depth - 1; depth2; depth2--)
- if (offsets[depth2])
+ if (offsets[depth2] != uspi->s_apb - 1)
break;
mutex_lock(&ufsi->truncate_mutex);
@@ -1133,9 +1144,8 @@ static void ufs_truncate_blocks(struct inode *inode)
offsets[0] = UFS_IND_BLOCK;
} else {
/* get the blocks that should be partially emptied */
- p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
for (i = 0; i < depth2; i++) {
- offsets[i]++; /* next branch is fully freed */
block = ufs_data_ptr_to_cpu(sb, p);
if (!block)
break;
@@ -1146,7 +1156,7 @@ static void ufs_truncate_blocks(struct inode *inode)
write_sequnlock(&ufsi->meta_lock);
break;
}
- p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+ p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
}
while (i--)
free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
@@ -1161,7 +1171,9 @@ static void ufs_truncate_blocks(struct inode *inode)
free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
}
}
+ read_seqlock_excl(&ufsi->meta_lock);
ufsi->i_lastfrag = DIRECT_FRAGMENT;
+ read_sequnlock_excl(&ufsi->meta_lock);
mark_inode_dirty(inode);
mutex_unlock(&ufsi->truncate_mutex);
}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 878cc6264f1a..0a4f58a5073c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
usb3 = ubh_get_usb_third(uspi);
if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
@@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb)
usb2 = ubh_get_usb_second(uspi);
usb3 = ubh_get_usb_third(uspi);
- if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
- mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+ if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
usb2->fs_un.fs_u2.cs_ndir =
cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
@@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb)
cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
usb3->fs_un1.fs_u2.cs_nffree =
cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
- } else {
- usb1->fs_cstotal.cs_ndir =
- cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
- usb1->fs_cstotal.cs_nbfree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
- usb1->fs_cstotal.cs_nifree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
- usb1->fs_cstotal.cs_nffree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+ goto out;
+ }
+
+ if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+ /* store stats in both old and new places */
+ usb2->fs_un.fs_u2.cs_ndir =
+ cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+ usb2->fs_un.fs_u2.cs_nbfree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+ usb3->fs_un1.fs_u2.cs_nifree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+ usb3->fs_un1.fs_u2.cs_nffree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
}
+ usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+ usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+ usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+ usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_print_super_stuff(sb, usb1, usb2, usb3);
UFSD("EXIT\n");
@@ -996,6 +1004,13 @@ again:
flags |= UFS_ST_SUN;
}
+ if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+ uspi->s_postblformat == UFS_42POSTBLFMT) {
+ if (!silent)
+ pr_err("this is not a 44bsd filesystem");
+ goto failed;
+ }
+
/*
* Check ufs magic number
*/
@@ -1143,8 +1158,8 @@ magic_found:
uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
- uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
- uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+ uspi->s_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+ uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
} else {
uspi->s_size = fs32_to_cpu(sb, usb1->fs_size);
uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize);
@@ -1193,6 +1208,18 @@ magic_found:
uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
+ uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree, 100);
+ if (uspi->s_minfree <= 5) {
+ uspi->s_time_to_space = ~0ULL;
+ uspi->s_space_to_time = 0;
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+ } else {
+ uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+ uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree - 2, 100) - 1;
+ }
+
/*
* Compute another frequently used values
*/
@@ -1382,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
mutex_lock(&UFS_SB(sb)->s_lock);
usb3 = ubh_get_usb_third(uspi);
- if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+ if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
buf->f_type = UFS2_MAGIC;
- buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
- } else {
+ else
buf->f_type = UFS_MAGIC;
- buf->f_blocks = uspi->s_dsize;
- }
- buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree;
+
+ buf->f_blocks = uspi->s_dsize;
+ buf->f_bfree = ufs_freefrags(uspi);
buf->f_ffree = uspi->cs_total.cs_nifree;
buf->f_bsize = sb->s_blocksize;
- buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
- ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+ buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+ ? (buf->f_bfree - uspi->s_root_blocks) : 0;
buf->f_files = uspi->s_ncg * uspi->s_ipg;
buf->f_namelen = UFS_MAXNAMLEN;
buf->f_fsid.val[0] = (u32)id;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 0cbd5d340b67..150eef6f1233 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -733,10 +733,8 @@ struct ufs_sb_private_info {
__u32 s_dblkno; /* offset of first data after cg */
__u32 s_cgoffset; /* cylinder group offset in cylinder */
__u32 s_cgmask; /* used to calc mod fs_ntrak */
- __u32 s_size; /* number of blocks (fragments) in fs */
- __u32 s_dsize; /* number of data blocks in fs */
- __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */
- __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */
+ __u64 s_size; /* number of blocks (fragments) in fs */
+ __u64 s_dsize; /* number of data blocks in fs */
__u32 s_ncg; /* number of cylinder groups */
__u32 s_bsize; /* size of basic blocks */
__u32 s_fsize; /* size of fragments */
@@ -793,6 +791,9 @@ struct ufs_sb_private_info {
__u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */
__s32 fs_magic; /* filesystem magic */
unsigned int s_dirblksize;
+ __u64 s_root_blocks;
+ __u64 s_time_to_space;
+ __u64 s_space_to_time;
};
/*
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index f41ad0a6106f..02497a492eb2 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
struct page *ufs_get_locked_page(struct address_space *mapping,
pgoff_t index)
{
- struct page *page;
-
- page = find_lock_page(mapping, index);
+ struct inode *inode = mapping->host;
+ struct page *page = find_lock_page(mapping, index);
if (!page) {
page = read_mapping_page(mapping, index, NULL);
@@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
printk(KERN_ERR "ufs_change_blocknr: "
"read_mapping_page error: ino %lu, index: %lu\n",
mapping->host->i_ino, index);
- goto out;
+ return page;
}
lock_page(page);
@@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
/* Truncate got there first */
unlock_page(page);
put_page(page);
- page = NULL;
- goto out;
+ return NULL;
}
if (!PageUptodate(page) || PageError(page)) {
@@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
printk(KERN_ERR "ufs_change_blocknr: "
"can not read page: ino %lu, index: %lu\n",
- mapping->host->i_ino, index);
+ inode->i_ino, index);
- page = ERR_PTR(-EIO);
+ return ERR_PTR(-EIO);
}
}
-out:
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
return page;
}
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 398019fb1448..9fc7119a1551 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
#define ubh_blkmap(ubh,begin,bit) \
((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
{
return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree -
- (uspi->s_dsize * (percentreserved) / 100);
+ uspi->cs_total.cs_nffree;
}
/*
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f7555fc25877..cadcd12a3d35 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx {
struct userfaultfd_wait_queue {
struct uffd_msg msg;
- wait_queue_t wq;
+ wait_queue_entry_t wq;
struct userfaultfd_ctx *ctx;
bool waken;
};
@@ -91,7 +91,7 @@ struct userfaultfd_wake_range {
unsigned long len;
};
-static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
+static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
struct userfaultfd_wake_range *range = key;
@@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
* wouldn't be enough, the smp_mb__before_spinlock is
* enough to avoid an explicit smp_mb() here.
*/
- list_del_init(&wq->task_list);
+ list_del_init(&wq->entry);
out:
return ret;
}
@@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
* hugepmd ranges.
*/
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ struct vm_area_struct *vma,
unsigned long address,
unsigned long flags,
unsigned long reason)
@@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
- pte = huge_pte_offset(mm, address);
+ pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
if (!pte)
goto out;
@@ -243,6 +244,7 @@ out:
}
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ struct vm_area_struct *vma,
unsigned long address,
unsigned long flags,
unsigned long reason)
@@ -340,9 +342,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
bool must_wait, return_to_userland;
long blocking_state;
- BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
ret = VM_FAULT_SIGBUS;
+
+ /*
+ * We don't do userfault handling for the final child pid update.
+ *
+ * We also don't do userfault handling during
+ * coredumping. hugetlbfs has the special
+ * follow_hugetlb_page() to skip missing pages in the
+ * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+ * the no_page_table() helper in follow_page_mask(), but the
+ * shmem_vm_ops->fault method is invoked even during
+ * coredumping without mmap_sem and it ends up here.
+ */
+ if (current->flags & (PF_EXITING|PF_DUMPCORE))
+ goto out;
+
+ /*
+ * Coredumping runs without mmap_sem so we can only check that
+ * the mmap_sem is held, if PF_DUMPCORE was not set.
+ */
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
@@ -361,12 +382,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
goto out;
/*
- * We don't do userfault handling for the final child pid update.
- */
- if (current->flags & PF_EXITING)
- goto out;
-
- /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
@@ -435,7 +450,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason);
else
- must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+ must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+ vmf->address,
vmf->flags, reason);
up_read(&mm->mmap_sem);
@@ -509,13 +525,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* and it's fine not to block on the spinlock. The uwq on this
* kernel stack can be released after the list_del_init.
*/
- if (!list_empty_careful(&uwq.wq.task_list)) {
+ if (!list_empty_careful(&uwq.wq.entry)) {
spin_lock(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
- list_del(&uwq.wq.task_list);
+ list_del(&uwq.wq.entry);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
@@ -847,7 +863,7 @@ wakeup:
static inline struct userfaultfd_wait_queue *find_userfault_in(
wait_queue_head_t *wqh)
{
- wait_queue_t *wq;
+ wait_queue_entry_t *wq;
struct userfaultfd_wait_queue *uwq;
VM_BUG_ON(!spin_is_locked(&wqh->lock));
@@ -856,7 +872,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
if (!waitqueue_active(wqh))
goto out;
/* walk in reverse to provide FIFO behavior to read userfaults */
- wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
+ wq = list_last_entry(&wqh->head, typeof(*wq), entry);
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
out:
return uwq;
@@ -990,14 +1006,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
* changes __remove_wait_queue() to use
* list_del_init() in turn breaking the
* !list_empty_careful() check in
- * handle_userfault(). The uwq->wq.task_list
+ * handle_userfault(). The uwq->wq.head list
* must never be empty at any time during the
* refile, or the waitqueue could disappear
* from under us. The "wait_queue_head_t"
* parameter of __remove_wait_queue() is unused
* anyway.
*/
- list_del(&uwq->wq.task_list);
+ list_del(&uwq->wq.entry);
__add_wait_queue(&ctx->fault_wqh, &uwq->wq);
write_seqcount_end(&ctx->refile_seq);
@@ -1019,7 +1035,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
fork_nctx = (struct userfaultfd_ctx *)
(unsigned long)
uwq->msg.arg.reserved.reserved1;
- list_move(&uwq->wq.task_list, &fork_event);
+ list_move(&uwq->wq.entry, &fork_event);
spin_unlock(&ctx->event_wqh.lock);
ret = 0;
break;
@@ -1056,8 +1072,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!list_empty(&fork_event)) {
uwq = list_first_entry(&fork_event,
typeof(*uwq),
- wq.task_list);
- list_del(&uwq->wq.task_list);
+ wq.entry);
+ list_del(&uwq->wq.entry);
__add_wait_queue(&ctx->event_wqh, &uwq->wq);
userfaultfd_event_complete(ctx, uwq);
}
@@ -1101,11 +1117,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
static void __wake_userfault(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range *range)
{
- unsigned long start, end;
-
- start = range->start;
- end = range->start + range->len;
-
spin_lock(&ctx->fault_pending_wqh.lock);
/* wake all in the range and autoremove */
if (waitqueue_active(&ctx->fault_pending_wqh))
@@ -1734,17 +1745,17 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
{
struct userfaultfd_ctx *ctx = f->private_data;
- wait_queue_t *wq;
+ wait_queue_entry_t *wq;
struct userfaultfd_wait_queue *uwq;
unsigned long pending = 0, total = 0;
spin_lock(&ctx->fault_pending_wqh.lock);
- list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) {
+ list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
pending++;
total++;
}
- list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) {
+ list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
total++;
}
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35faf128f36d..1b98cfa342ab 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -96,3 +96,16 @@ config XFS_DEBUG
not useful unless you are debugging a particular problem.
Say N unless you are an XFS developer, or you play one on TV.
+
+config XFS_ASSERT_FATAL
+ bool "XFS fatal asserts"
+ default y
+ depends on XFS_FS && XFS_DEBUG
+ help
+ Set the default DEBUG mode ASSERT failure behavior.
+
+ Say Y here to cause DEBUG mode ASSERT failures to result in fatal
+ errors that BUG() the kernel by default. If you say N, ASSERT failures
+ result in warnings.
+
+ This behavior can be modified at runtime via sysfs.
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 33db69be4832..b008ff3250eb 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -111,8 +111,7 @@ xfs_ag_resv_critical(
/* Critically low if less than 10% or max btree height remains. */
return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL,
- XFS_RANDOM_AG_RESV_CRITICAL);
+ pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
}
/*
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7486401ccbd3..744dcaec34cc 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -606,7 +606,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
/*
* Read in the allocation group free block array.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_read_agfl(
xfs_mount_t *mp, /* mount point structure */
xfs_trans_t *tp, /* transaction pointer */
@@ -2454,8 +2454,7 @@ xfs_agf_read_verify(
!xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
- XFS_ERRTAG_ALLOC_READ_AGF,
- XFS_RANDOM_ALLOC_READ_AGF))
+ XFS_ERRTAG_ALLOC_READ_AGF))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
@@ -2842,8 +2841,7 @@ xfs_free_extent(
ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_FREE_EXTENT,
- XFS_RANDOM_FREE_EXTENT))
+ XFS_ERRTAG_FREE_EXTENT))
return -EIO;
error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 77d9c27330ab..ef26edc2e938 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -213,6 +213,8 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, struct xfs_buf **bpp);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **agbp);
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index e1fcfe7f0a9a..cfde0a0f9706 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -253,7 +253,7 @@ xfs_allocbt_init_ptr_from_cur(
ptr->s = agf->agf_roots[cur->bc_btnum];
}
-STATIC __int64_t
+STATIC int64_t
xfs_bnobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
@@ -261,42 +261,42 @@ xfs_bnobt_key_diff(
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+ return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-STATIC __int64_t
+STATIC int64_t
xfs_cntbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- __int64_t diff;
+ int64_t diff;
- diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
if (diff)
return diff;
- return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+ return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-STATIC __int64_t
+STATIC int64_t
xfs_bnobt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+ return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
be32_to_cpu(k2->alloc.ar_startblock);
}
-STATIC __int64_t
+STATIC int64_t
xfs_cntbt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- __int64_t diff;
+ int64_t diff;
diff = be32_to_cpu(k1->alloc.ar_blockcount) -
be32_to_cpu(k2->alloc.ar_blockcount);
@@ -395,7 +395,6 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -442,7 +441,6 @@ xfs_cntbt_recs_inorder(
be32_to_cpu(r1->alloc.ar_startblock) <
be32_to_cpu(r2->alloc.ar_startblock));
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bnobt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
@@ -462,10 +460,8 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
.key_diff = xfs_bnobt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
.diff_two_keys = xfs_bnobt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bnobt_keys_inorder,
.recs_inorder = xfs_bnobt_recs_inorder,
-#endif
};
static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -486,10 +482,8 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
.key_diff = xfs_cntbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
.diff_two_keys = xfs_cntbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_cntbt_keys_inorder,
.recs_inorder = xfs_cntbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6622d46ddec3..ef8a1c75a467 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -114,6 +114,23 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
+/* Retrieve an extended attribute and its value. Must have iolock. */
+int
+xfs_attr_get_ilocked(
+ struct xfs_inode *ip,
+ struct xfs_da_args *args)
+{
+ if (!xfs_inode_hasattr(ip))
+ return -ENOATTR;
+ else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xfs_attr_shortform_getvalue(args);
+ else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+ return xfs_attr_leaf_get(args);
+ else
+ return xfs_attr_node_get(args);
+}
+
+/* Retrieve an extended attribute by name, and its value. */
int
xfs_attr_get(
struct xfs_inode *ip,
@@ -141,14 +158,7 @@ xfs_attr_get(
args.op_flags = XFS_DA_OP_OKNOENT;
lock_mode = xfs_ilock_attr_map_shared(ip);
- if (!xfs_inode_hasattr(ip))
- error = -ENOATTR;
- else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
- error = xfs_attr_shortform_getvalue(&args);
- else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
- error = xfs_attr_leaf_get(&args);
- else
- error = xfs_attr_node_get(&args);
+ error = xfs_attr_get_ilocked(ip, &args);
xfs_iunlock(ip, lock_mode);
*valuelenp = args.valuelen;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 2852521fc8ec..c6c15e5717e4 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -351,7 +351,7 @@ xfs_attr3_leaf_read(
err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
return err;
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d52f525f5b2d..5236d8e45146 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -253,7 +253,7 @@ xfs_attr_rmtval_copyout(
xfs_ino_t ino,
int *offset,
int *valuelen,
- __uint8_t **dst)
+ uint8_t **dst)
{
char *src = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
@@ -301,7 +301,7 @@ xfs_attr_rmtval_copyin(
xfs_ino_t ino,
int *offset,
int *valuelen,
- __uint8_t **src)
+ uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
@@ -355,7 +355,7 @@ xfs_attr_rmtval_get(
struct xfs_mount *mp = args->dp->i_mount;
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
- __uint8_t *dst = args->value;
+ uint8_t *dst = args->value;
int valuelen;
int nmap;
int error;
@@ -386,7 +386,8 @@ xfs_attr_rmtval_get(
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ error = xfs_trans_read_buf(mp, args->trans,
+ mp->m_ddev_targp,
dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
@@ -395,7 +396,7 @@ xfs_attr_rmtval_get(
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
- xfs_buf_relse(bp);
+ xfs_trans_brelse(args->trans, bp);
if (error)
return error;
@@ -421,7 +422,7 @@ xfs_attr_rmtval_set(
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
xfs_fileoff_t lfileoff = 0;
- __uint8_t *src = args->value;
+ uint8_t *src = args->value;
int blkcnt;
int valuelen;
int nmap;
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 90928bbe693c..afd684ae3136 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -31,10 +31,10 @@ typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
* We generate this then sort it, attr_list() must return things in hash-order.
*/
typedef struct xfs_attr_sf_sort {
- __uint8_t entno; /* entry number in original list */
- __uint8_t namelen; /* length of name value (no null) */
- __uint8_t valuelen; /* length of value */
- __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
+ uint8_t entno; /* entry number in original list */
+ uint8_t namelen; /* length of name value (no null) */
+ uint8_t valuelen; /* length of value */
+ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
} xfs_attr_sf_sort_t;
@@ -42,7 +42,7 @@ typedef struct xfs_attr_sf_sort {
#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
- ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+ ((1 << (NBBY*(int)sizeof(uint8_t))) - 1)
#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \
((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \
diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0d3e02..61c6b2025d0c 100644
--- a/fs/xfs/libxfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
@@ -25,47 +25,47 @@
/*
* masks with n high/low bits set, 64-bit values
*/
-static inline __uint64_t xfs_mask64hi(int n)
+static inline uint64_t xfs_mask64hi(int n)
{
- return (__uint64_t)-1 << (64 - (n));
+ return (uint64_t)-1 << (64 - (n));
}
-static inline __uint32_t xfs_mask32lo(int n)
+static inline uint32_t xfs_mask32lo(int n)
{
- return ((__uint32_t)1 << (n)) - 1;
+ return ((uint32_t)1 << (n)) - 1;
}
-static inline __uint64_t xfs_mask64lo(int n)
+static inline uint64_t xfs_mask64lo(int n)
{
- return ((__uint64_t)1 << (n)) - 1;
+ return ((uint64_t)1 << (n)) - 1;
}
/* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
+static inline int xfs_highbit32(uint32_t v)
{
return fls(v) - 1;
}
/* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
+static inline int xfs_highbit64(uint64_t v)
{
return fls64(v) - 1;
}
/* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
+static inline int xfs_lowbit32(uint32_t v)
{
return ffs(v) - 1;
}
/* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
+static inline int xfs_lowbit64(uint64_t v)
{
- __uint32_t w = (__uint32_t)v;
+ uint32_t w = (uint32_t)v;
int n = 0;
if (w) { /* lower bits */
n = ffs(w);
} else { /* upper bits */
- w = (__uint32_t)(v >> 32);
+ w = (uint32_t)(v >> 32);
if (w) {
n = ffs(w);
if (n)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a7048eafa8e6..0a9880777c9c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3992,7 +3992,7 @@ xfs_bmapi_read(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -4473,7 +4473,7 @@ xfs_bmapi_write(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -4694,7 +4694,7 @@ xfs_bmapi_remap(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -5434,6 +5434,7 @@ __xfs_bunmapi(
int whichfork; /* data or attribute fork */
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
+ xfs_fileoff_t max_len;
trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
@@ -5455,6 +5456,16 @@ __xfs_bunmapi(
ASSERT(len > 0);
ASSERT(nexts >= 0);
+ /*
+ * Guesstimate how many blocks we can unmap without running the risk of
+ * blowing out the transaction with a mix of EFIs and reflink
+ * adjustments.
+ */
+ if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+ max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
+ else
+ max_len = len;
+
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
@@ -5499,7 +5510,7 @@ __xfs_bunmapi(
extno = 0;
while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
- (nexts == 0 || extno < nexts)) {
+ (nexts == 0 || extno < nexts) && max_len > 0) {
/*
* Is the found extent after a hole in which bno lives?
* Just back up to the previous extent, if so.
@@ -5531,6 +5542,15 @@ __xfs_bunmapi(
}
if (del.br_startoff + del.br_blockcount > bno + 1)
del.br_blockcount = bno + 1 - del.br_startoff;
+
+ /* How much can we safely unmap? */
+ if (max_len < del.br_blockcount) {
+ del.br_startoff += del.br_blockcount - max_len;
+ if (!wasdel)
+ del.br_startblock += del.br_blockcount - max_len;
+ del.br_blockcount = max_len;
+ }
+
sum = del.br_startblock + del.br_blockcount;
if (isrt &&
(mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
@@ -5707,6 +5727,7 @@ __xfs_bunmapi(
if (!isrt && wasdel)
xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+ max_len -= del.br_blockcount;
bno = del.br_startoff - 1;
nodelete:
/*
@@ -6077,7 +6098,7 @@ xfs_bmap_shift_extents(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_shift_extents",
XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
@@ -6229,7 +6250,7 @@ xfs_bmap_split_extent_at(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
@@ -6472,33 +6493,33 @@ xfs_bmap_finish_one(
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
+ xfs_filblks_t *blockcount,
xfs_exntst_t state)
{
- int error = 0, done;
+ xfs_fsblock_t firstfsb;
+ int error = 0;
trace_xfs_bmap_deferred(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
- ip->i_ino, whichfork, startoff, blockcount, state);
+ ip->i_ino, whichfork, startoff, *blockcount, state);
if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
return -EFSCORRUPTED;
if (XFS_TEST_ERROR(false, tp->t_mountp,
- XFS_ERRTAG_BMAP_FINISH_ONE,
- XFS_RANDOM_BMAP_FINISH_ONE))
+ XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
switch (type) {
case XFS_BMAP_MAP:
- error = xfs_bmapi_remap(tp, ip, startoff, blockcount,
+ error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
startblock, dfops);
+ *blockcount = 0;
break;
case XFS_BMAP_UNMAP:
- error = xfs_bunmapi(tp, ip, startoff, blockcount,
- XFS_BMAPI_REMAP, 1, &startblock, dfops, &done);
- ASSERT(done);
+ error = __xfs_bunmapi(tp, ip, startoff, blockcount,
+ XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
break;
default:
ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index c35a14fa1527..851982a5dfbc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -271,7 +271,7 @@ struct xfs_bmap_intent {
int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
struct xfs_inode *ip, enum xfs_bmap_intent_type type,
int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
- xfs_filblks_t blockcount, xfs_exntst_t state);
+ xfs_filblks_t *blockcount, xfs_exntst_t state);
int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6cba69aff077..85de22513014 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -94,8 +94,8 @@ xfs_bmdr_to_bmbt(
*/
STATIC void
__xfs_bmbt_get_all(
- __uint64_t l0,
- __uint64_t l1,
+ uint64_t l0,
+ uint64_t l1,
xfs_bmbt_irec_t *s)
{
int ext_flag;
@@ -573,6 +573,16 @@ xfs_bmbt_init_key_from_rec(
}
STATIC void
+xfs_bmbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->bmbt.br_startoff = cpu_to_be64(
+ xfs_bmbt_disk_get_startoff(&rec->bmbt) +
+ xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1);
+}
+
+STATIC void
xfs_bmbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -588,15 +598,25 @@ xfs_bmbt_init_ptr_from_cur(
ptr->l = 0;
}
-STATIC __int64_t
+STATIC int64_t
xfs_bmbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+ return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
cur->bc_rec.b.br_startoff;
}
+STATIC int64_t
+xfs_bmbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
+ be64_to_cpu(k2->bmbt.br_startoff);
+}
+
static bool
xfs_bmbt_verify(
struct xfs_buf *bp)
@@ -687,7 +707,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bmbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -708,7 +727,6 @@ xfs_bmbt_recs_inorder(
xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
xfs_bmbt_disk_get_startoff(&r2->bmbt);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
@@ -722,14 +740,14 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.get_minrecs = xfs_bmbt_get_minrecs,
.get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
+ .diff_two_keys = xfs_bmbt_diff_two_keys,
.buf_ops = &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3a673ba201aa..4da85fff69ad 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,7 +43,7 @@ kmem_zone_t *xfs_btree_cur_zone;
/*
* Btree magic numbers.
*/
-static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
XFS_FIBT_MAGIC, 0 },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
@@ -51,12 +51,12 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
XFS_REFC_CRC_MAGIC }
};
-__uint32_t
+uint32_t
xfs_btree_magic(
int crc,
xfs_btnum_t btnum)
{
- __uint32_t magic = xfs_magics[crc][btnum];
+ uint32_t magic = xfs_magics[crc][btnum];
/* Ensure we asked for crc for crc-only magics. */
ASSERT(magic != 0);
@@ -101,8 +101,7 @@ xfs_btree_check_lblock(
be64_to_cpu(block->bb_u.l.bb_rightsib)));
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
- XFS_ERRTAG_BTREE_CHECK_LBLOCK,
- XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+ XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -153,8 +152,7 @@ xfs_btree_check_sblock(
block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
- XFS_ERRTAG_BTREE_CHECK_SBLOCK,
- XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+ XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -568,7 +566,7 @@ xfs_btree_ptr_offset(
/*
* Return a pointer to the n-th record in the btree block.
*/
-STATIC union xfs_btree_rec *
+union xfs_btree_rec *
xfs_btree_rec_addr(
struct xfs_btree_cur *cur,
int n,
@@ -581,7 +579,7 @@ xfs_btree_rec_addr(
/*
* Return a pointer to the n-th key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -594,7 +592,7 @@ xfs_btree_key_addr(
/*
* Return a pointer to the n-th high key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_high_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -607,7 +605,7 @@ xfs_btree_high_key_addr(
/*
* Return a pointer to the n-th block pointer in the btree block.
*/
-STATIC union xfs_btree_ptr *
+union xfs_btree_ptr *
xfs_btree_ptr_addr(
struct xfs_btree_cur *cur,
int n,
@@ -641,7 +639,7 @@ xfs_btree_get_iroot(
* Retrieve the block pointer from the cursor at the given level.
* This may be an inode btree root or from a buffer.
*/
-STATIC struct xfs_btree_block * /* generic btree block pointer */
+struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in btree */
@@ -778,14 +776,14 @@ xfs_btree_lastrec(
*/
void
xfs_btree_offsets(
- __int64_t fields, /* bitmask of fields */
+ int64_t fields, /* bitmask of fields */
const short *offsets, /* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
int *last) /* output: last byte offset */
{
int i; /* current bit number */
- __int64_t imask; /* mask for current bit number */
+ int64_t imask; /* mask for current bit number */
ASSERT(fields != 0);
/*
@@ -1756,7 +1754,7 @@ error0:
return error;
}
-STATIC int
+int
xfs_btree_lookup_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in the btree */
@@ -1846,7 +1844,7 @@ xfs_btree_lookup(
int *stat) /* success/failure */
{
struct xfs_btree_block *block; /* current btree block */
- __int64_t diff; /* difference for the current key */
+ int64_t diff; /* difference for the current key */
int error; /* error return value */
int keyno; /* current key number */
int level; /* level in the btree */
@@ -4435,7 +4433,7 @@ xfs_btree_visit_blocks(
* recovery completion writes the changes to disk.
*/
struct xfs_btree_block_change_owner_info {
- __uint64_t new_owner;
+ uint64_t new_owner;
struct list_head *buffer_list;
};
@@ -4481,7 +4479,7 @@ xfs_btree_block_change_owner(
int
xfs_btree_change_owner(
struct xfs_btree_cur *cur,
- __uint64_t new_owner,
+ uint64_t new_owner,
struct list_head *buffer_list)
{
struct xfs_btree_block_change_owner_info bbcoi;
@@ -4585,7 +4583,7 @@ xfs_btree_simple_query_range(
{
union xfs_btree_rec *recp;
union xfs_btree_key rec_key;
- __int64_t diff;
+ int64_t diff;
int stat;
bool firstrec = true;
int error;
@@ -4682,8 +4680,8 @@ xfs_btree_overlapped_query_range(
union xfs_btree_key *hkp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
- __int64_t ldiff;
- __int64_t hdiff;
+ int64_t ldiff;
+ int64_t hdiff;
int level;
struct xfs_buf *bp;
int i;
@@ -4849,12 +4847,14 @@ xfs_btree_query_all(
xfs_btree_query_range_fn fn,
void *priv)
{
- union xfs_btree_irec low_rec;
- union xfs_btree_irec high_rec;
+ union xfs_btree_key low_key;
+ union xfs_btree_key high_key;
+
+ memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
+ memset(&low_key, 0, sizeof(low_key));
+ memset(&high_key, 0xFF, sizeof(high_key));
- memset(&low_rec, 0, sizeof(low_rec));
- memset(&high_rec, 0xFF, sizeof(high_rec));
- return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
+ return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
}
/*
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 27bed08261c5..9c95e965cfe5 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -76,7 +76,7 @@ union xfs_btree_rec {
#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi)
-__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
/*
* For logging record fields.
@@ -150,20 +150,19 @@ struct xfs_btree_ops {
union xfs_btree_rec *rec);
/* difference between key value and cursor value */
- __int64_t (*key_diff)(struct xfs_btree_cur *cur,
+ int64_t (*key_diff)(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
/*
* Difference between key2 and key1 -- positive if key1 > key2,
* negative if key1 < key2, and zero if equal.
*/
- __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+ int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
union xfs_btree_key *key1,
union xfs_btree_key *key2);
const struct xfs_buf_ops *buf_ops;
-#if defined(DEBUG) || defined(XFS_WARN)
/* check that k1 is lower than k2 */
int (*keys_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
@@ -173,7 +172,6 @@ struct xfs_btree_ops {
int (*recs_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
-#endif
};
/*
@@ -213,11 +211,11 @@ typedef struct xfs_btree_cur
union xfs_btree_irec bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
- __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
+ uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */
#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */
- __uint8_t bc_nlevels; /* number of levels in the tree */
- __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
+ uint8_t bc_nlevels; /* number of levels in the tree */
+ uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
int bc_statoff; /* offset of btre stats array */
union {
@@ -330,7 +328,7 @@ xfs_btree_islastblock(
*/
void
xfs_btree_offsets(
- __int64_t fields, /* bitmask of fields */
+ int64_t fields, /* bitmask of fields */
const short *offsets,/* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
@@ -408,7 +406,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
int xfs_btree_insert(struct xfs_btree_cur *, int *);
int xfs_btree_delete(struct xfs_btree_cur *, int *);
int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
-int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner,
struct list_head *buffer_list);
/*
@@ -434,7 +432,7 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
}
static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
- __uint16_t numrecs)
+ uint16_t numrecs)
{
block->bb_numrecs = cpu_to_be16(numrecs);
}
@@ -506,4 +504,17 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
+union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
+ union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
+ int level, struct xfs_buf **bpp);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index a416c7cb23ea..8211f48b98e6 100644
--- a/fs/xfs/libxfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
@@ -1,7 +1,7 @@
#ifndef _XFS_CKSUM_H
#define _XFS_CKSUM_H 1
-#define XFS_CRC_SEED (~(__uint32_t)0)
+#define XFS_CRC_SEED (~(uint32_t)0)
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
@@ -9,11 +9,11 @@
* cksum_offset parameter. We do not modify the buffer during verification,
* hence we have to split the CRC calculation across the cksum_offset.
*/
-static inline __uint32_t
+static inline uint32_t
xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t zero = 0;
- __uint32_t crc;
+ uint32_t zero = 0;
+ uint32_t crc;
/* Calculate CRC up to the checksum. */
crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
@@ -30,7 +30,7 @@ xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
* Fast CRC method where the buffer is modified. Callers must have exclusive
* access to the buffer while the calculation takes place.
*/
-static inline __uint32_t
+static inline uint32_t
xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
{
/* zero the CRC field */
@@ -48,7 +48,7 @@ xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
* so that it is consistent on disk.
*/
static inline __le32
-xfs_end_cksum(__uint32_t crc)
+xfs_end_cksum(uint32_t crc)
{
return ~cpu_to_le32(crc);
}
@@ -62,7 +62,7 @@ xfs_end_cksum(__uint32_t crc)
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
+ uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
@@ -73,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
+ uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 1bdf2888295b..6d4335815c3f 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -263,7 +263,7 @@ xfs_da3_node_read(
err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
which_fork, &xfs_da3_node_buf_ops);
- if (!err && tp) {
+ if (!err && tp && *bpp) {
struct xfs_da_blkinfo *info = (*bpp)->b_addr;
int type;
@@ -1282,7 +1282,7 @@ xfs_da3_fixhashpath(
return;
break;
case XFS_DIR2_LEAFN_MAGIC:
- lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+ lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
if (count == 0)
return;
break;
@@ -1502,8 +1502,8 @@ xfs_da3_node_lookup_int(
if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
blk->magic == XFS_DIR3_LEAFN_MAGIC) {
blk->magic = XFS_DIR2_LEAFN_MAGIC;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+ blk->bp, NULL);
break;
}
@@ -1929,8 +1929,8 @@ xfs_da3_path_shift(
blk->magic = XFS_DIR2_LEAFN_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
- blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
- blk->bp, NULL);
+ blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+ blk->bp, NULL);
break;
default:
ASSERT(0);
@@ -1952,7 +1952,7 @@ xfs_da3_path_shift(
* This is implemented with some source-level loop unrolling.
*/
xfs_dahash_t
-xfs_da_hashname(const __uint8_t *name, int namelen)
+xfs_da_hashname(const uint8_t *name, int namelen)
{
xfs_dahash_t hash;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 4e29cb6a3627..ae6de17467f2 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -60,10 +60,10 @@ enum xfs_dacmp {
*/
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
- const __uint8_t *name; /* string (maybe not NULL terminated) */
+ const uint8_t *name; /* string (maybe not NULL terminated) */
int namelen; /* length of string (maybe no NULL) */
- __uint8_t filetype; /* filetype of inode for directories */
- __uint8_t *value; /* set of bytes (maybe contain NULLs) */
+ uint8_t filetype; /* filetype of inode for directories */
+ uint8_t *value; /* set of bytes (maybe contain NULLs) */
int valuelen; /* length of value */
int flags; /* argument flags (eg: ATTR_NOCREATE) */
xfs_dahash_t hashval; /* hash value of name */
@@ -207,7 +207,7 @@ int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
-uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
+uint xfs_da_hashname(const uint8_t *name_string, int name_length);
enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
const unsigned char *name, int len);
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index f1e8d4dbb600..6d77d1a8498a 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -49,7 +49,7 @@ xfs_dir3_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
- return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
+ return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
}
static struct xfs_dir2_sf_entry *
@@ -77,7 +77,7 @@ xfs_dir3_sf_nextentry(
* not necessary. For non-filetype enable directories, the type is always
* unknown and we never store the value.
*/
-static __uint8_t
+static uint8_t
xfs_dir2_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
@@ -87,16 +87,16 @@ xfs_dir2_sfe_get_ftype(
static void
xfs_dir2_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
-static __uint8_t
+static uint8_t
xfs_dir3_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
- __uint8_t ftype;
+ uint8_t ftype;
ftype = sfep->name[sfep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
@@ -107,7 +107,7 @@ xfs_dir3_sfe_get_ftype(
static void
xfs_dir3_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
@@ -124,7 +124,7 @@ xfs_dir3_sfe_put_ftype(
static xfs_ino_t
xfs_dir2_sf_get_ino(
struct xfs_dir2_sf_hdr *hdr,
- __uint8_t *from)
+ uint8_t *from)
{
if (hdr->i8count)
return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
@@ -135,7 +135,7 @@ xfs_dir2_sf_get_ino(
static void
xfs_dir2_sf_put_ino(
struct xfs_dir2_sf_hdr *hdr,
- __uint8_t *to,
+ uint8_t *to,
xfs_ino_t ino)
{
ASSERT((ino & 0xff00000000000000ULL) == 0);
@@ -225,7 +225,7 @@ xfs_dir3_sfe_put_ino(
#define XFS_DIR3_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \
+ sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \
XFS_DIR2_DATA_ALIGN)
static int
@@ -242,7 +242,7 @@ xfs_dir3_data_entsize(
return XFS_DIR3_DATA_ENTSIZE(n);
}
-static __uint8_t
+static uint8_t
xfs_dir2_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
@@ -252,16 +252,16 @@ xfs_dir2_data_get_ftype(
static void
xfs_dir2_data_put_ftype(
struct xfs_dir2_data_entry *dep,
- __uint8_t ftype)
+ uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
-static __uint8_t
+static uint8_t
xfs_dir3_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
- __uint8_t ftype = dep->name[dep->namelen];
+ uint8_t ftype = dep->name[dep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
@@ -271,7 +271,7 @@ xfs_dir3_data_get_ftype(
static void
xfs_dir3_data_put_ftype(
struct xfs_dir2_data_entry *dep,
- __uint8_t type)
+ uint8_t type)
{
ASSERT(type < XFS_DIR3_FT_MAX);
ASSERT(dep->namelen != 0);
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 9a492a9e19bd..3771edcb301d 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -111,11 +111,11 @@ struct xfs_da3_intnode {
* appropriate.
*/
struct xfs_da3_icnode_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t level;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t level;
};
/*
@@ -187,14 +187,14 @@ struct xfs_da3_icnode_hdr {
/*
* Byte offset in data block and shortform entry.
*/
-typedef __uint16_t xfs_dir2_data_off_t;
+typedef uint16_t xfs_dir2_data_off_t;
#define NULLDATAOFF 0xffffU
typedef uint xfs_dir2_data_aoff_t; /* argument form */
/*
* Offset in data space of a data entry.
*/
-typedef __uint32_t xfs_dir2_dataptr_t;
+typedef uint32_t xfs_dir2_dataptr_t;
#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
@@ -206,7 +206,7 @@ typedef xfs_off_t xfs_dir2_off_t;
/*
* Directory block number (logical dirblk in file)
*/
-typedef __uint32_t xfs_dir2_db_t;
+typedef uint32_t xfs_dir2_db_t;
#define XFS_INO32_SIZE 4
#define XFS_INO64_SIZE 8
@@ -226,9 +226,9 @@ typedef __uint32_t xfs_dir2_db_t;
* over them.
*/
typedef struct xfs_dir2_sf_hdr {
- __uint8_t count; /* count of entries */
- __uint8_t i8count; /* count of 8-byte inode #s */
- __uint8_t parent[8]; /* parent dir inode number */
+ uint8_t count; /* count of entries */
+ uint8_t i8count; /* count of 8-byte inode #s */
+ uint8_t parent[8]; /* parent dir inode number */
} __packed xfs_dir2_sf_hdr_t;
typedef struct xfs_dir2_sf_entry {
@@ -447,11 +447,11 @@ struct xfs_dir3_leaf_hdr {
};
struct xfs_dir3_icleaf_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t stale;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t stale;
};
/*
@@ -538,10 +538,10 @@ struct xfs_dir3_free {
* xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
*/
struct xfs_dir3_icfree_hdr {
- __uint32_t magic;
- __uint32_t firstdb;
- __uint32_t nvalid;
- __uint32_t nused;
+ uint32_t magic;
+ uint32_t firstdb;
+ uint32_t nvalid;
+ uint32_t nused;
};
@@ -632,10 +632,10 @@ typedef struct xfs_attr_shortform {
__u8 padding;
} hdr;
struct xfs_attr_sf_entry {
- __uint8_t namelen; /* actual length of name (no NULL) */
- __uint8_t valuelen; /* actual length of value (no NULL) */
- __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
- __uint8_t nameval[1]; /* name & value bytes concatenated */
+ uint8_t namelen; /* actual length of name (no NULL) */
+ uint8_t valuelen; /* actual length of value (no NULL) */
+ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
+ uint8_t nameval[1]; /* name & value bytes concatenated */
} list[1]; /* variable sized array */
} xfs_attr_shortform_t;
@@ -725,22 +725,22 @@ struct xfs_attr3_leafblock {
* incore, neutral version of the attribute leaf header
*/
struct xfs_attr3_icleaf_hdr {
- __uint32_t forw;
- __uint32_t back;
- __uint16_t magic;
- __uint16_t count;
- __uint16_t usedbytes;
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t usedbytes;
/*
* firstused is 32-bit here instead of 16-bit like the on-disk variant
* to support maximum fsb size of 64k without overflow issues throughout
* the attr code. Instead, the overflow condition is handled on
* conversion to/from disk.
*/
- __uint32_t firstused;
+ uint32_t firstused;
__u8 holes;
struct {
- __uint16_t base;
- __uint16_t size;
+ uint16_t base;
+ uint16_t size;
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 2f389d366e93..ccf9783fd3f0 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -218,8 +218,7 @@ xfs_dir_ino_validate(
agblkno != 0 &&
ioff < (1 << mp->m_sb.sb_inopblog) &&
XFS_AGINO_TO_INO(mp, agno, agino) == ino;
- if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
- XFS_RANDOM_DIR_INO_VALIDATE))) {
+ if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index d6e6d9d16f6c..21c8f8bf94d5 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -47,9 +47,9 @@ struct xfs_dir_ops {
struct xfs_dir2_sf_entry *
(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
- __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
+ uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
- __uint8_t ftype);
+ uint8_t ftype);
xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep);
void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
@@ -60,9 +60,9 @@ struct xfs_dir_ops {
xfs_ino_t ino);
int (*data_entsize)(int len);
- __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
+ uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
void (*data_put_ftype)(struct xfs_dir2_data_entry *dep,
- __uint8_t ftype);
+ uint8_t ftype);
__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
struct xfs_dir2_data_free *
(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index aa17cb788946..43c902f7a68d 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -139,7 +139,7 @@ xfs_dir3_block_read(
err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
return err;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index b887fb2a2bcf..27297a689d9c 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -145,7 +145,7 @@ xfs_dir3_leaf_check_int(
static bool
xfs_dir3_leaf_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -154,7 +154,7 @@ xfs_dir3_leaf_verify(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
- __uint16_t magic3;
+ uint16_t magic3;
magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
: XFS_DIR3_LEAFN_MAGIC;
@@ -178,7 +178,7 @@ xfs_dir3_leaf_verify(
static void
__read_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
@@ -195,7 +195,7 @@ __read_verify(
static void
__write_verify(
struct xfs_buf *bp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_buf_log_item *bip = bp->b_fspriv;
@@ -256,7 +256,7 @@ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
.verify_write = xfs_dir3_leafn_write_verify,
};
-static int
+int
xfs_dir3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
@@ -268,7 +268,7 @@ xfs_dir3_leaf_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
return err;
}
@@ -285,7 +285,7 @@ xfs_dir3_leafn_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
return err;
}
@@ -299,7 +299,7 @@ xfs_dir3_leaf_init(
struct xfs_trans *tp,
struct xfs_buf *bp,
xfs_ino_t owner,
- __uint16_t type)
+ uint16_t type)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -343,7 +343,7 @@ xfs_dir3_leaf_get_buf(
xfs_da_args_t *args,
xfs_dir2_db_t bno,
struct xfs_buf **bpp,
- __uint16_t magic)
+ uint16_t magic)
{
struct xfs_inode *dp = args->dp;
struct xfs_trans *tp = args->trans;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index bbd1238852b3..682e2bf370c7 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -528,7 +528,7 @@ xfs_dir2_free_hdr_check(
* Stale entries are ok.
*/
xfs_dahash_t /* hash value */
-xfs_dir2_leafn_lasthash(
+xfs_dir2_leaf_lasthash(
struct xfs_inode *dp,
struct xfs_buf *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
@@ -540,7 +540,9 @@ xfs_dir2_leafn_lasthash(
dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
- leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+ leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
+ leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+ leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
if (count)
*count = leafhdr.count;
@@ -1405,8 +1407,8 @@ xfs_dir2_leafn_split(
/*
* Update last hashval in each block since we added the name.
*/
- oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
- newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
+ oldblk->hashval = xfs_dir2_leaf_lasthash(dp, oldblk->bp, NULL);
+ newblk->hashval = xfs_dir2_leaf_lasthash(dp, newblk->bp, NULL);
xfs_dir3_leaf_check(dp, oldblk->bp);
xfs_dir3_leaf_check(dp, newblk->bp);
return error;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 39f8604f764e..4badd26c47e6 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -58,6 +58,8 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
struct xfs_buf **bpp);
/* xfs_dir2_leaf.c */
+extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
@@ -69,7 +71,7 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
struct xfs_dir2_leaf_entry *ents, int *indexp,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
- struct xfs_buf **bpp, __uint16_t magic);
+ struct xfs_buf **bpp, uint16_t magic);
extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
struct xfs_buf *bp, int first, int last);
extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
@@ -93,7 +95,7 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
+extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp,
struct xfs_buf *bp, int *count);
extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
struct xfs_da_args *args, int *indexp,
@@ -128,7 +130,7 @@ extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
/* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
- size_t bufsize);
+extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct dir_context *ctx, size_t bufsize);
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e84af093b2ab..be8b9755f66a 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -647,7 +647,7 @@ xfs_dir2_sf_verify(
int offset;
int size;
int error;
- __uint8_t filetype;
+ uint8_t filetype;
ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index a1dccd8d96bc..23229f0c5b15 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -103,8 +103,8 @@ struct xfs_ifork;
* Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
- __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
- __uint32_t sb_blocksize; /* logical block size, bytes */
+ uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
+ uint32_t sb_blocksize; /* logical block size, bytes */
xfs_rfsblock_t sb_dblocks; /* number of data blocks */
xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
xfs_rtblock_t sb_rextents; /* number of realtime extents */
@@ -118,45 +118,45 @@ typedef struct xfs_sb {
xfs_agnumber_t sb_agcount; /* number of allocation groups */
xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */
xfs_extlen_t sb_logblocks; /* number of log blocks */
- __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */
- __uint16_t sb_sectsize; /* volume sector size, bytes */
- __uint16_t sb_inodesize; /* inode size, bytes */
- __uint16_t sb_inopblock; /* inodes per block */
+ uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */
+ uint16_t sb_sectsize; /* volume sector size, bytes */
+ uint16_t sb_inodesize; /* inode size, bytes */
+ uint16_t sb_inopblock; /* inodes per block */
char sb_fname[12]; /* file system name */
- __uint8_t sb_blocklog; /* log2 of sb_blocksize */
- __uint8_t sb_sectlog; /* log2 of sb_sectsize */
- __uint8_t sb_inodelog; /* log2 of sb_inodesize */
- __uint8_t sb_inopblog; /* log2 of sb_inopblock */
- __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */
- __uint8_t sb_rextslog; /* log2 of sb_rextents */
- __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */
- __uint8_t sb_imax_pct; /* max % of fs for inode space */
+ uint8_t sb_blocklog; /* log2 of sb_blocksize */
+ uint8_t sb_sectlog; /* log2 of sb_sectsize */
+ uint8_t sb_inodelog; /* log2 of sb_inodesize */
+ uint8_t sb_inopblog; /* log2 of sb_inopblock */
+ uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */
+ uint8_t sb_rextslog; /* log2 of sb_rextents */
+ uint8_t sb_inprogress; /* mkfs is in progress, don't mount */
+ uint8_t sb_imax_pct; /* max % of fs for inode space */
/* statistics */
/*
* These fields must remain contiguous. If you really
* want to change their layout, make sure you fix the
* code in xfs_trans_apply_sb_deltas().
*/
- __uint64_t sb_icount; /* allocated inodes */
- __uint64_t sb_ifree; /* free inodes */
- __uint64_t sb_fdblocks; /* free data blocks */
- __uint64_t sb_frextents; /* free realtime extents */
+ uint64_t sb_icount; /* allocated inodes */
+ uint64_t sb_ifree; /* free inodes */
+ uint64_t sb_fdblocks; /* free data blocks */
+ uint64_t sb_frextents; /* free realtime extents */
/*
* End contiguous fields.
*/
xfs_ino_t sb_uquotino; /* user quota inode */
xfs_ino_t sb_gquotino; /* group quota inode */
- __uint16_t sb_qflags; /* quota flags */
- __uint8_t sb_flags; /* misc. flags */
- __uint8_t sb_shared_vn; /* shared version number */
+ uint16_t sb_qflags; /* quota flags */
+ uint8_t sb_flags; /* misc. flags */
+ uint8_t sb_shared_vn; /* shared version number */
xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */
- __uint32_t sb_unit; /* stripe or raid unit */
- __uint32_t sb_width; /* stripe or raid width */
- __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */
- __uint8_t sb_logsectlog; /* log2 of the log sector size */
- __uint16_t sb_logsectsize; /* sector size for the log, bytes */
- __uint32_t sb_logsunit; /* stripe unit size for the log */
- __uint32_t sb_features2; /* additional feature bits */
+ uint32_t sb_unit; /* stripe or raid unit */
+ uint32_t sb_width; /* stripe or raid width */
+ uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */
+ uint8_t sb_logsectlog; /* log2 of the log sector size */
+ uint16_t sb_logsectsize; /* sector size for the log, bytes */
+ uint32_t sb_logsunit; /* stripe unit size for the log */
+ uint32_t sb_features2; /* additional feature bits */
/*
* bad features2 field as a result of failing to pad the sb structure to
@@ -167,17 +167,17 @@ typedef struct xfs_sb {
* the value in sb_features2 when formatting the incore superblock to
* the disk buffer.
*/
- __uint32_t sb_bad_features2;
+ uint32_t sb_bad_features2;
/* version 5 superblock fields start here */
/* feature masks */
- __uint32_t sb_features_compat;
- __uint32_t sb_features_ro_compat;
- __uint32_t sb_features_incompat;
- __uint32_t sb_features_log_incompat;
+ uint32_t sb_features_compat;
+ uint32_t sb_features_ro_compat;
+ uint32_t sb_features_incompat;
+ uint32_t sb_features_log_incompat;
- __uint32_t sb_crc; /* superblock crc */
+ uint32_t sb_crc; /* superblock crc */
xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
xfs_ino_t sb_pquotino; /* project quota inode */
@@ -449,7 +449,7 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
static inline bool
xfs_sb_has_compat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
@@ -465,7 +465,7 @@ xfs_sb_has_compat_feature(
static inline bool
xfs_sb_has_ro_compat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
@@ -482,7 +482,7 @@ xfs_sb_has_ro_compat_feature(
static inline bool
xfs_sb_has_incompat_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
@@ -492,7 +492,7 @@ xfs_sb_has_incompat_feature(
static inline bool
xfs_sb_has_incompat_log_feature(
struct xfs_sb *sbp,
- __uint32_t feature)
+ uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
@@ -594,8 +594,8 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
*/
#define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog)
#define XFS_B_TO_FSB(mp,b) \
- ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+ ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask)
/*
@@ -1072,7 +1072,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
* next agno_log bits - ag number
* high agno_log-agblklog-inopblog bits - 0
*/
-#define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1)
+#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1)
#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log
@@ -1211,6 +1211,7 @@ struct xfs_dsymlink_hdr {
#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
+#define XFS_SYMLINK_MAXLEN 1024
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 3 extents back from
@@ -1269,16 +1270,16 @@ typedef __be32 xfs_alloc_ptr_t;
#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
-typedef __uint64_t xfs_inofree_t;
+typedef uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3)
#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
-#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
+#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(uint16_t))
#define XFS_INODES_PER_HOLEMASK_BIT \
- (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+ (XFS_INODES_PER_CHUNK / (NBBY * sizeof(uint16_t)))
static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
{
@@ -1312,9 +1313,9 @@ typedef struct xfs_inobt_rec {
typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
- __uint16_t ir_holemask; /* hole mask for sparse chunks */
- __uint8_t ir_count; /* total inode count */
- __uint8_t ir_freecount; /* count of free inodes (set bits) */
+ uint16_t ir_holemask; /* hole mask for sparse chunks */
+ uint8_t ir_count; /* total inode count */
+ uint8_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
} xfs_inobt_rec_incore_t;
@@ -1397,15 +1398,15 @@ struct xfs_rmap_rec {
* rm_offset:54-60 aren't used and should be zero
* rm_offset:0-53 is the block offset within the inode
*/
-#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63)
-#define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62)
-#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61)
+#define XFS_RMAP_OFF_ATTR_FORK ((uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT_BLOCK ((uint64_t)1ULL << 62)
+#define XFS_RMAP_OFF_UNWRITTEN ((uint64_t)1ULL << 61)
-#define XFS_RMAP_LEN_MAX ((__uint32_t)~0U)
+#define XFS_RMAP_LEN_MAX ((uint32_t)~0U)
#define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \
XFS_RMAP_OFF_BMBT_BLOCK | \
XFS_RMAP_OFF_UNWRITTEN)
-#define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL)
+#define XFS_RMAP_OFF_MASK ((uint64_t)0x3FFFFFFFFFFFFFULL)
#define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK)
@@ -1431,8 +1432,8 @@ struct xfs_rmap_rec {
struct xfs_rmap_irec {
xfs_agblock_t rm_startblock; /* extent start block */
xfs_extlen_t rm_blockcount; /* extent length */
- __uint64_t rm_owner; /* extent owner */
- __uint64_t rm_offset; /* offset within the owner */
+ uint64_t rm_owner; /* extent owner */
+ uint64_t rm_offset; /* offset within the owner */
unsigned int rm_flags; /* state flags */
};
@@ -1544,11 +1545,11 @@ typedef struct xfs_bmbt_rec {
__be64 l0, l1;
} xfs_bmbt_rec_t;
-typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
+typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
typedef struct xfs_bmbt_rec_host {
- __uint64_t l0, l1;
+ uint64_t l0, l1;
} xfs_bmbt_rec_host_t;
/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 095bdf049a3f..8c61f21535d4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -302,10 +302,10 @@ typedef struct xfs_bstat {
* and using two 16bit values to hold new 32bit projid was choosen
* to retain compatibility with "old" filesystems).
*/
-static inline __uint32_t
+static inline uint32_t
bstat_get_projid(struct xfs_bstat *bs)
{
- return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
+ return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
}
/*
@@ -446,19 +446,15 @@ typedef struct xfs_handle {
} xfs_handle_t;
#define ha_fsid ha_u._ha_fsid
-#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \
- - (char *) &(handle)) \
- + (handle).ha_fid.fid_len)
-
/*
* Structure passed to XFS_IOC_SWAPEXT
*/
typedef struct xfs_swapext
{
- __int64_t sx_version; /* version */
+ int64_t sx_version; /* version */
#define XFS_SX_VERSION 0
- __int64_t sx_fdtarget; /* fd of target file */
- __int64_t sx_fdtmp; /* fd of tmp file */
+ int64_t sx_fdtarget; /* fd of target file */
+ int64_t sx_fdtmp; /* fd of tmp file */
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
@@ -546,7 +542,7 @@ typedef struct xfs_swapext
#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
-#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index d41ade5d293e..ffd5a15d1bb6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -46,7 +46,7 @@
/*
* Allocation group level functions.
*/
-static inline int
+int
xfs_ialloc_cluster_alignment(
struct xfs_mount *mp)
{
@@ -98,24 +98,15 @@ xfs_inobt_update(
return xfs_btree_update(cur, &rec);
}
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_inobt_get_rec(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_inobt_rec_incore_t *irec, /* btree record */
- int *stat) /* output: success/failure */
+/* Convert on-disk btree record to incore inobt record. */
+void
+xfs_inobt_btrec_to_irec(
+ struct xfs_mount *mp,
+ union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec)
{
- union xfs_btree_rec *rec;
- int error;
-
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || *stat == 0)
- return error;
-
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -130,6 +121,25 @@ xfs_inobt_get_rec(
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_inobt_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_inobt_rec_incore *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
return 0;
}
@@ -140,9 +150,9 @@ xfs_inobt_get_rec(
STATIC int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
- __uint16_t holemask,
- __uint8_t count,
- __int32_t freecount,
+ uint16_t holemask,
+ uint8_t count,
+ int32_t freecount,
xfs_inofree_t free,
int *stat)
{
@@ -2542,8 +2552,7 @@ xfs_agi_read_verify(
!xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
- XFS_ERRTAG_IALLOC_READ_AGI,
- XFS_RANDOM_IALLOC_READ_AGI))
+ XFS_ERRTAG_IALLOC_READ_AGI))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 0bb89669fc07..b32cfb5aeb5b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -168,5 +168,10 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
+union xfs_btree_rec;
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec);
+
+int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 7c471881c9a6..317caba9faa6 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -175,6 +175,18 @@ xfs_inobt_init_key_from_rec(
}
STATIC void
+xfs_inobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->inobt.ir_startino);
+ x += XFS_INODES_PER_CHUNK - 1;
+ key->inobt.ir_startino = cpu_to_be32(x);
+}
+
+STATIC void
xfs_inobt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -219,15 +231,25 @@ xfs_finobt_init_ptr_from_cur(
ptr->s = agi->agi_free_root;
}
-STATIC __int64_t
+STATIC int64_t
xfs_inobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+ return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
cur->bc_rec.i.ir_startino;
}
+STATIC int64_t
+xfs_inobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
+ be32_to_cpu(k2->inobt.ir_startino);
+}
+
static int
xfs_inobt_verify(
struct xfs_buf *bp)
@@ -302,7 +324,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
.verify_write = xfs_inobt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_inobt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -322,7 +343,6 @@ xfs_inobt_recs_inorder(
return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
be32_to_cpu(r2->inobt.ir_startino);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
@@ -335,14 +355,14 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -356,14 +376,14 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 09c3d1aecef2..378f8fbc91a7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -105,8 +105,7 @@ xfs_inode_buf_verify(
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
xfs_dinode_good_version(mp, dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
- XFS_ERRTAG_ITOBP_INOTOBP,
- XFS_RANDOM_ITOBP_INOTOBP))) {
+ XFS_ERRTAG_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
xfs_buf_ioerror(bp, -EIO);
@@ -381,7 +380,7 @@ xfs_log_dinode_to_disk(
}
}
-static bool
+bool
xfs_dinode_verify(
struct xfs_mount *mp,
xfs_ino_t ino,
@@ -444,7 +443,7 @@ xfs_dinode_calc_crc(
struct xfs_mount *mp,
struct xfs_dinode *dip)
{
- __uint32_t crc;
+ uint32_t crc;
if (dip->di_version < 3)
return;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0afbce7..a9c97a356c30 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,26 +28,26 @@ struct xfs_dinode;
* format specific structures at the appropriate time.
*/
struct xfs_icdinode {
- __int8_t di_version; /* inode version */
- __int8_t di_format; /* format of di_c data */
- __uint16_t di_flushiter; /* incremented on flush */
- __uint32_t di_uid; /* owner's user id */
- __uint32_t di_gid; /* owner's group id */
- __uint16_t di_projid_lo; /* lower part of owner's project id */
- __uint16_t di_projid_hi; /* higher part of owner's project id */
+ int8_t di_version; /* inode version */
+ int8_t di_format; /* format of di_c data */
+ uint16_t di_flushiter; /* incremented on flush */
+ uint32_t di_uid; /* owner's user id */
+ uint32_t di_gid; /* owner's group id */
+ uint16_t di_projid_lo; /* lower part of owner's project id */
+ uint16_t di_projid_hi; /* higher part of owner's project id */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
- __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
- __int8_t di_aformat; /* format of attr fork's data */
- __uint32_t di_dmevmask; /* DMIG event mask */
- __uint16_t di_dmstate; /* DMIG state info */
- __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
+ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
+ int8_t di_aformat; /* format of attr fork's data */
+ uint32_t di_dmevmask; /* DMIG event mask */
+ uint16_t di_dmstate; /* DMIG state info */
+ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
- __uint64_t di_flags2; /* more random flags */
- __uint32_t di_cowextsize; /* basic cow extent size for file */
+ uint64_t di_flags2; /* more random flags */
+ uint32_t di_cowextsize; /* basic cow extent size for file */
xfs_ictimestamp_t di_crtime; /* time created */
};
@@ -82,4 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
+bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+ struct xfs_dinode *dip);
+
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 7ae571f8e34a..8372e9bcd7b6 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -31,7 +31,7 @@ struct xfs_trans_res;
* through all the log items definitions and everything they encode into the
* log.
*/
-typedef __uint32_t xlog_tid_t;
+typedef uint32_t xlog_tid_t;
#define XLOG_MIN_ICLOGS 2
#define XLOG_MAX_ICLOGS 8
@@ -211,7 +211,7 @@ typedef struct xfs_log_iovec {
typedef struct xfs_trans_header {
uint th_magic; /* magic number */
uint th_type; /* transaction type */
- __int32_t th_tid; /* transaction id (unused) */
+ int32_t th_tid; /* transaction id (unused) */
uint th_num_items; /* num items logged by trans */
} xfs_trans_header_t;
@@ -265,52 +265,52 @@ typedef struct xfs_trans_header {
* must be added on to the end.
*/
typedef struct xfs_inode_log_format {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
typedef struct xfs_inode_log_format_32 {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} __attribute__((packed)) xfs_inode_log_format_32_t;
typedef struct xfs_inode_log_format_64 {
- __uint16_t ilf_type; /* inode log item type */
- __uint16_t ilf_size; /* size of this item */
- __uint32_t ilf_fields; /* flags for fields logged */
- __uint16_t ilf_asize; /* size of attr d/ext/root */
- __uint16_t ilf_dsize; /* size of data/ext/root */
- __uint32_t ilf_pad; /* pad for 64 bit boundary */
- __uint64_t ilf_ino; /* inode number */
+ uint16_t ilf_type; /* inode log item type */
+ uint16_t ilf_size; /* size of this item */
+ uint32_t ilf_fields; /* flags for fields logged */
+ uint16_t ilf_asize; /* size of attr d/ext/root */
+ uint16_t ilf_dsize; /* size of data/ext/root */
+ uint32_t ilf_pad; /* pad for 64 bit boundary */
+ uint64_t ilf_ino; /* inode number */
union {
- __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* blkno of inode buffer */
- __int32_t ilf_len; /* len of inode buffer */
- __int32_t ilf_boffset; /* off of inode in buffer */
+ int64_t ilf_blkno; /* blkno of inode buffer */
+ int32_t ilf_len; /* len of inode buffer */
+ int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
@@ -379,8 +379,8 @@ static inline int xfs_ilog_fdata(int w)
* information.
*/
typedef struct xfs_ictimestamp {
- __int32_t t_sec; /* timestamp seconds */
- __int32_t t_nsec; /* timestamp nanoseconds */
+ int32_t t_sec; /* timestamp seconds */
+ int32_t t_nsec; /* timestamp nanoseconds */
} xfs_ictimestamp_t;
/*
@@ -388,18 +388,18 @@ typedef struct xfs_ictimestamp {
* kept identical to struct xfs_dinode except for the endianness annotations.
*/
struct xfs_log_dinode {
- __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
- __uint16_t di_mode; /* mode and type of file */
- __int8_t di_version; /* inode version */
- __int8_t di_format; /* format of di_c data */
- __uint8_t di_pad3[2]; /* unused in v2/3 inodes */
- __uint32_t di_uid; /* owner's user id */
- __uint32_t di_gid; /* owner's group id */
- __uint32_t di_nlink; /* number of links to file */
- __uint16_t di_projid_lo; /* lower part of owner's project id */
- __uint16_t di_projid_hi; /* higher part of owner's project id */
- __uint8_t di_pad[6]; /* unused, zeroed space */
- __uint16_t di_flushiter; /* incremented on flush */
+ uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
+ uint16_t di_mode; /* mode and type of file */
+ int8_t di_version; /* inode version */
+ int8_t di_format; /* format of di_c data */
+ uint8_t di_pad3[2]; /* unused in v2/3 inodes */
+ uint32_t di_uid; /* owner's user id */
+ uint32_t di_gid; /* owner's group id */
+ uint32_t di_nlink; /* number of links to file */
+ uint16_t di_projid_lo; /* lower part of owner's project id */
+ uint16_t di_projid_hi; /* higher part of owner's project id */
+ uint8_t di_pad[6]; /* unused, zeroed space */
+ uint16_t di_flushiter; /* incremented on flush */
xfs_ictimestamp_t di_atime; /* time last accessed */
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
@@ -408,23 +408,23 @@ struct xfs_log_dinode {
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
- __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
- __int8_t di_aformat; /* format of attr fork's data */
- __uint32_t di_dmevmask; /* DMIG event mask */
- __uint16_t di_dmstate; /* DMIG state info */
- __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
- __uint32_t di_gen; /* generation number */
+ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
+ int8_t di_aformat; /* format of attr fork's data */
+ uint32_t di_dmevmask; /* DMIG event mask */
+ uint16_t di_dmstate; /* DMIG state info */
+ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
+ uint32_t di_gen; /* generation number */
/* di_next_unlinked is the only non-core field in the old dinode */
xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
/* start of the extended dinode, writable fields */
- __uint32_t di_crc; /* CRC of the inode */
- __uint64_t di_changecount; /* number of attribute changes */
+ uint32_t di_crc; /* CRC of the inode */
+ uint64_t di_changecount; /* number of attribute changes */
xfs_lsn_t di_lsn; /* flush sequence */
- __uint64_t di_flags2; /* more random flags */
- __uint32_t di_cowextsize; /* basic cow extent size for file */
- __uint8_t di_pad2[12]; /* more padding for future expansion */
+ uint64_t di_flags2; /* more random flags */
+ uint32_t di_cowextsize; /* basic cow extent size for file */
+ uint8_t di_pad2[12]; /* more padding for future expansion */
/* fields only written to during inode creation */
xfs_ictimestamp_t di_crtime; /* time created */
@@ -483,7 +483,7 @@ typedef struct xfs_buf_log_format {
unsigned short blf_size; /* size of this item */
unsigned short blf_flags; /* misc state */
unsigned short blf_len; /* number of blocks in this buf */
- __int64_t blf_blkno; /* starting blkno of this buf */
+ int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
} xfs_buf_log_format_t;
@@ -533,7 +533,7 @@ xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
}
-static inline __uint16_t
+static inline uint16_t
xfs_blft_from_flags(struct xfs_buf_log_format *blf)
{
return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
@@ -554,14 +554,14 @@ typedef struct xfs_extent {
* conversion routine.
*/
typedef struct xfs_extent_32 {
- __uint64_t ext_start;
- __uint32_t ext_len;
+ uint64_t ext_start;
+ uint32_t ext_len;
} __attribute__((packed)) xfs_extent_32_t;
typedef struct xfs_extent_64 {
- __uint64_t ext_start;
- __uint32_t ext_len;
- __uint32_t ext_pad;
+ uint64_t ext_start;
+ uint32_t ext_len;
+ uint32_t ext_pad;
} xfs_extent_64_t;
/*
@@ -570,26 +570,26 @@ typedef struct xfs_extent_64 {
* size is given by efi_nextents.
*/
typedef struct xfs_efi_log_format {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_t;
typedef struct xfs_efi_log_format_32 {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_32_t efi_extents[1]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
typedef struct xfs_efi_log_format_64 {
- __uint16_t efi_type; /* efi log item type */
- __uint16_t efi_size; /* size of this item */
- __uint32_t efi_nextents; /* # extents to free */
- __uint64_t efi_id; /* efi identifier */
+ uint16_t efi_type; /* efi log item type */
+ uint16_t efi_size; /* size of this item */
+ uint32_t efi_nextents; /* # extents to free */
+ uint64_t efi_id; /* efi identifier */
xfs_extent_64_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_64_t;
@@ -599,26 +599,26 @@ typedef struct xfs_efi_log_format_64 {
* size is given by efd_nextents;
*/
typedef struct xfs_efd_log_format {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_t;
typedef struct xfs_efd_log_format_32 {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_32_t efd_extents[1]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
typedef struct xfs_efd_log_format_64 {
- __uint16_t efd_type; /* efd log item type */
- __uint16_t efd_size; /* size of this item */
- __uint32_t efd_nextents; /* # of extents freed */
- __uint64_t efd_efi_id; /* id of corresponding efi */
+ uint16_t efd_type; /* efd log item type */
+ uint16_t efd_size; /* size of this item */
+ uint32_t efd_nextents; /* # of extents freed */
+ uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_64_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_64_t;
@@ -626,11 +626,11 @@ typedef struct xfs_efd_log_format_64 {
* RUI/RUD (reverse mapping) log format definitions
*/
struct xfs_map_extent {
- __uint64_t me_owner;
- __uint64_t me_startblock;
- __uint64_t me_startoff;
- __uint32_t me_len;
- __uint32_t me_flags;
+ uint64_t me_owner;
+ uint64_t me_startblock;
+ uint64_t me_startoff;
+ uint32_t me_len;
+ uint32_t me_flags;
};
/* rmap me_flags: upper bits are flags, lower byte is type code */
@@ -659,10 +659,10 @@ struct xfs_map_extent {
* size is given by rui_nextents.
*/
struct xfs_rui_log_format {
- __uint16_t rui_type; /* rui log item type */
- __uint16_t rui_size; /* size of this item */
- __uint32_t rui_nextents; /* # extents to free */
- __uint64_t rui_id; /* rui identifier */
+ uint16_t rui_type; /* rui log item type */
+ uint16_t rui_size; /* size of this item */
+ uint32_t rui_nextents; /* # extents to free */
+ uint64_t rui_id; /* rui identifier */
struct xfs_map_extent rui_extents[]; /* array of extents to rmap */
};
@@ -680,19 +680,19 @@ xfs_rui_log_format_sizeof(
* size is given by rud_nextents;
*/
struct xfs_rud_log_format {
- __uint16_t rud_type; /* rud log item type */
- __uint16_t rud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t rud_rui_id; /* id of corresponding rui */
+ uint16_t rud_type; /* rud log item type */
+ uint16_t rud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t rud_rui_id; /* id of corresponding rui */
};
/*
* CUI/CUD (refcount update) log format definitions
*/
struct xfs_phys_extent {
- __uint64_t pe_startblock;
- __uint32_t pe_len;
- __uint32_t pe_flags;
+ uint64_t pe_startblock;
+ uint32_t pe_len;
+ uint32_t pe_flags;
};
/* refcount pe_flags: upper bits are flags, lower byte is type code */
@@ -707,10 +707,10 @@ struct xfs_phys_extent {
* size is given by cui_nextents.
*/
struct xfs_cui_log_format {
- __uint16_t cui_type; /* cui log item type */
- __uint16_t cui_size; /* size of this item */
- __uint32_t cui_nextents; /* # extents to free */
- __uint64_t cui_id; /* cui identifier */
+ uint16_t cui_type; /* cui log item type */
+ uint16_t cui_size; /* size of this item */
+ uint32_t cui_nextents; /* # extents to free */
+ uint64_t cui_id; /* cui identifier */
struct xfs_phys_extent cui_extents[]; /* array of extents */
};
@@ -728,10 +728,10 @@ xfs_cui_log_format_sizeof(
* size is given by cud_nextents;
*/
struct xfs_cud_log_format {
- __uint16_t cud_type; /* cud log item type */
- __uint16_t cud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t cud_cui_id; /* id of corresponding cui */
+ uint16_t cud_type; /* cud log item type */
+ uint16_t cud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t cud_cui_id; /* id of corresponding cui */
};
/*
@@ -755,10 +755,10 @@ struct xfs_cud_log_format {
* size is given by bui_nextents.
*/
struct xfs_bui_log_format {
- __uint16_t bui_type; /* bui log item type */
- __uint16_t bui_size; /* size of this item */
- __uint32_t bui_nextents; /* # extents to free */
- __uint64_t bui_id; /* bui identifier */
+ uint16_t bui_type; /* bui log item type */
+ uint16_t bui_size; /* size of this item */
+ uint32_t bui_nextents; /* # extents to free */
+ uint64_t bui_id; /* bui identifier */
struct xfs_map_extent bui_extents[]; /* array of extents to bmap */
};
@@ -776,10 +776,10 @@ xfs_bui_log_format_sizeof(
* size is given by bud_nextents;
*/
struct xfs_bud_log_format {
- __uint16_t bud_type; /* bud log item type */
- __uint16_t bud_size; /* size of this item */
- __uint32_t __pad;
- __uint64_t bud_bui_id; /* id of corresponding bui */
+ uint16_t bud_type; /* bud log item type */
+ uint16_t bud_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t bud_bui_id; /* id of corresponding bui */
};
/*
@@ -789,12 +789,12 @@ struct xfs_bud_log_format {
* 32 bits : log_recovery code assumes that.
*/
typedef struct xfs_dq_logformat {
- __uint16_t qlf_type; /* dquot log item type */
- __uint16_t qlf_size; /* size of this item */
+ uint16_t qlf_type; /* dquot log item type */
+ uint16_t qlf_size; /* size of this item */
xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
- __int64_t qlf_blkno; /* blkno of dquot buffer */
- __int32_t qlf_len; /* len of dquot buffer */
- __uint32_t qlf_boffset; /* off of dquot in buffer */
+ int64_t qlf_blkno; /* blkno of dquot buffer */
+ int32_t qlf_len; /* len of dquot buffer */
+ uint32_t qlf_boffset; /* off of dquot in buffer */
} xfs_dq_logformat_t;
/*
@@ -853,8 +853,8 @@ typedef struct xfs_qoff_logformat {
* decoding can be done correctly.
*/
struct xfs_icreate_log {
- __uint16_t icl_type; /* type of log format structure */
- __uint16_t icl_size; /* size of log format structure */
+ uint16_t icl_type; /* type of log format structure */
+ uint16_t icl_size; /* size of log format structure */
__be32 icl_ag; /* ag being allocated in */
__be32 icl_agbno; /* start block of inode range */
__be32 icl_count; /* number of inodes to initialise */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 29a01ec89dd0..66948a9fd486 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -26,7 +26,7 @@
#define XLOG_RHASH_SIZE 16
#define XLOG_RHASH_SHIFT 2
#define XLOG_RHASH(tid) \
- ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+ ((((uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 8eed51275bb3..2834574cb6e7 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -27,8 +27,8 @@
* they may need 64-bit accounting. Hence, 64-bit quota-counters,
* and quota-limits. This is a waste in the common case, but hey ...
*/
-typedef __uint64_t xfs_qcnt_t;
-typedef __uint16_t xfs_qwarncnt_t;
+typedef uint64_t xfs_qcnt_t;
+typedef uint16_t xfs_qwarncnt_t;
/*
* flags for q_flags field in the dquot.
@@ -136,6 +136,8 @@ typedef __uint16_t xfs_qwarncnt_t;
*/
#define XFS_QMOPT_INHERIT 0x1000000
+#define XFS_QMOPT_NOLOCK 0x2000000 /* don't ilock during dqget */
+
/*
* flags to xfs_trans_mod_dquot.
*/
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 82a38d86ebad..900ea231f9a3 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
}
/*
- * While we're adjusting the refcounts records of an extent, we have
- * to keep an eye on the number of extents we're dirtying -- run too
- * many in a single transaction and we'll exceed the transaction's
- * reservation and crash the fs. Each record adds 12 bytes to the
- * log (plus any key updates) so we'll conservatively assume 24 bytes
- * per record. We must also leave space for btree splits on both ends
- * of the range and space for the CUD and a new CUI.
- *
* XXX: This is a pretty hand-wavy estimate. The penalty for guessing
* true incorrectly is a shutdown FS; the penalty for guessing false
* incorrectly is more transaction rolls than might be necessary.
@@ -813,8 +805,7 @@ xfs_refcount_still_have_space(
*/
if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
XFS_TEST_ERROR(false, cur->bc_mp,
- XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE,
- XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE))
+ XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
return false;
if (cur->bc_private.a.priv.refc.nr_ops == 0)
@@ -822,7 +813,7 @@ xfs_refcount_still_have_space(
else if (overhead > cur->bc_tp->t_log_res)
return false;
return cur->bc_tp->t_log_res - overhead >
- cur->bc_private.a.priv.refc.nr_ops * 32;
+ cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
}
/*
@@ -1076,8 +1067,7 @@ xfs_refcount_finish_one(
blockcount);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_REFCOUNT_FINISH_ONE,
- XFS_RANDOM_REFCOUNT_FINISH_ONE))
+ XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
/*
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 098dc668ab2c..eafb9d1f3b37 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
xfs_agnumber_t agno);
+/*
+ * While we're adjusting the refcounts records of an extent, we have
+ * to keep an eye on the number of extents we're dirtying -- run too
+ * many in a single transaction and we'll exceed the transaction's
+ * reservation and crash the fs. Each record adds 12 bytes to the
+ * log (plus any key updates) so we'll conservatively assume 32 bytes
+ * per record. We must also leave space for btree splits on both ends
+ * of the range and space for the CUD and a new CUI.
+ */
+#define XFS_REFCOUNT_ITEM_OVERHEAD 32
+
+static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
+{
+ return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
+}
+
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 50add5272807..3c59dd3d58d7 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -202,7 +202,7 @@ xfs_refcountbt_init_ptr_from_cur(
ptr->s = agf->agf_refcount_root;
}
-STATIC __int64_t
+STATIC int64_t
xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
@@ -210,16 +210,16 @@ xfs_refcountbt_key_diff(
struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
struct xfs_refcount_key *kp = &key->refc;
- return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
}
-STATIC __int64_t
+STATIC int64_t
xfs_refcountbt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) -
+ return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
be32_to_cpu(k2->refc.rc_startblock);
}
@@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
.verify_write = xfs_refcountbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_refcountbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder(
be32_to_cpu(r1->refc.rc_blockcount) <=
be32_to_cpu(r2->refc.rc_startblock);
}
-#endif
static const struct xfs_btree_ops xfs_refcountbt_ops = {
.rec_len = sizeof(struct xfs_refcount_rec),
@@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
.key_diff = xfs_refcountbt_key_diff,
.buf_ops = &xfs_refcountbt_buf_ops,
.diff_two_keys = xfs_refcountbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_refcountbt_keys_inorder,
.recs_inorder = xfs_refcountbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 06cfb93c2ef9..55c88a732690 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -179,7 +179,8 @@ done:
return error;
}
-static int
+/* Convert an internal btree record to an rmap record. */
+int
xfs_rmap_btrec_to_irec(
union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec)
@@ -2061,7 +2062,7 @@ int
xfs_rmap_finish_one(
struct xfs_trans *tp,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
@@ -2086,8 +2087,7 @@ xfs_rmap_finish_one(
startoff, blockcount, state);
if (XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_RMAP_FINISH_ONE,
- XFS_RANDOM_RMAP_FINISH_ONE))
+ XFS_ERRTAG_RMAP_FINISH_ONE))
return -EIO;
/*
@@ -2182,7 +2182,7 @@ __xfs_rmap_add(
struct xfs_mount *mp,
struct xfs_defer_ops *dfops,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
struct xfs_bmbt_irec *bmap)
{
@@ -2266,7 +2266,7 @@ xfs_rmap_alloc_extent(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- __uint64_t owner)
+ uint64_t owner)
{
struct xfs_bmbt_irec bmap;
@@ -2290,7 +2290,7 @@ xfs_rmap_free_extent(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- __uint64_t owner)
+ uint64_t owner)
{
struct xfs_bmbt_irec bmap;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 98f908fea103..466ede637080 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -179,7 +179,7 @@ enum xfs_rmap_intent_type {
struct xfs_rmap_intent {
struct list_head ri_list;
enum xfs_rmap_intent_type ri_type;
- __uint64_t ri_owner;
+ uint64_t ri_owner;
int ri_whichfork;
struct xfs_bmbt_irec ri_bmap;
};
@@ -196,15 +196,15 @@ int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
struct xfs_bmbt_irec *imap);
int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- __uint64_t owner);
+ uint64_t owner);
int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- __uint64_t owner);
+ uint64_t owner);
void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
- __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+ uint64_t owner, int whichfork, xfs_fileoff_t startoff,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
@@ -216,5 +216,8 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_compare(const struct xfs_rmap_irec *a,
const struct xfs_rmap_irec *b);
+union xfs_btree_rec;
+int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+ struct xfs_rmap_irec *irec);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 74e5a54bc428..9d9c9192584c 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -199,7 +199,7 @@ xfs_rmapbt_init_high_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- __uint64_t off;
+ uint64_t off;
int adj;
adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
@@ -241,7 +241,7 @@ xfs_rmapbt_init_ptr_from_cur(
ptr->s = agf->agf_roots[cur->bc_btnum];
}
-STATIC __int64_t
+STATIC int64_t
xfs_rmapbt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
@@ -249,9 +249,9 @@ xfs_rmapbt_key_diff(
struct xfs_rmap_irec *rec = &cur->bc_rec.r;
struct xfs_rmap_key *kp = &key->rmap;
__u64 x, y;
- __int64_t d;
+ int64_t d;
- d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+ d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
if (d)
return d;
@@ -271,7 +271,7 @@ xfs_rmapbt_key_diff(
return 0;
}
-STATIC __int64_t
+STATIC int64_t
xfs_rmapbt_diff_two_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
@@ -279,10 +279,10 @@ xfs_rmapbt_diff_two_keys(
{
struct xfs_rmap_key *kp1 = &k1->rmap;
struct xfs_rmap_key *kp2 = &k2->rmap;
- __int64_t d;
+ int64_t d;
__u64 x, y;
- d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
+ d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
be32_to_cpu(kp2->rm_startblock);
if (d)
return d;
@@ -377,17 +377,16 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
.verify_write = xfs_rmapbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_rmapbt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- __uint32_t x;
- __uint32_t y;
- __uint64_t a;
- __uint64_t b;
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
x = be32_to_cpu(k1->rmap.rm_startblock);
y = be32_to_cpu(k2->rmap.rm_startblock);
@@ -414,10 +413,10 @@ xfs_rmapbt_recs_inorder(
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- __uint32_t x;
- __uint32_t y;
- __uint64_t a;
- __uint64_t b;
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
x = be32_to_cpu(r1->rmap.rm_startblock);
y = be32_to_cpu(r2->rmap.rm_startblock);
@@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder(
return 1;
return 0;
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_rmapbt_ops = {
.rec_len = sizeof(struct xfs_rmap_rec),
@@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
.key_diff = xfs_rmapbt_key_diff,
.buf_ops = &xfs_rmapbt_buf_ops,
.diff_two_keys = xfs_rmapbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index e47b99e59f60..5d4e43ef4eea 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
-static int
+int
xfs_rtbuf_get(
xfs_mount_t *mp, /* file system mount structure */
xfs_trans_t *tp, /* transaction pointer */
@@ -1011,7 +1011,7 @@ xfs_rtfree_extent(
mp->m_sb.sb_rextents) {
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
- *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
+ *(uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
}
return 0;
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 584ec896a533..9b5aae2bcc0b 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -448,7 +448,7 @@ xfs_sb_quota_to_disk(
struct xfs_dsb *to,
struct xfs_sb *from)
{
- __uint16_t qflags = from->sb_qflags;
+ uint16_t qflags = from->sb_qflags;
to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
if (xfs_sb_version_has_pquotino(from)) {
@@ -756,7 +756,7 @@ xfs_sb_mount_common(
mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
- mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
+ mp->m_ialloc_inos = (int)MAX((uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 2e2c6716b623..c484877129a0 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -114,7 +114,7 @@ xfs_symlink_verify(
if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
return false;
if (be32_to_cpu(dsl->sl_offset) +
- be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+ be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
return false;
if (dsl->sl_owner == 0)
return false;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index b456cca1bfb2..6bd916bd35e2 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -477,14 +477,14 @@ xfs_calc_mkdir_reservation(
/*
* Making a new symplink is the same as creating a new file, but
* with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
+ * length (XFS_SYMLINK_MAXLEN).
*/
STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
return xfs_calc_create_reservation(mp) +
- xfs_calc_buf_res(1, MAXPATHLEN);
+ xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/*
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 717909f2f7b7..0220159bd463 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -18,34 +18,34 @@
#ifndef __XFS_TYPES_H__
#define __XFS_TYPES_H__
-typedef __uint32_t prid_t; /* project ID */
+typedef uint32_t prid_t; /* project ID */
-typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */
-typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */
-typedef __uint32_t xfs_extlen_t; /* extent length in blocks */
-typedef __uint32_t xfs_agnumber_t; /* allocation group number */
-typedef __int32_t xfs_extnum_t; /* # of extents in a file */
-typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */
-typedef __int64_t xfs_fsize_t; /* bytes in a file */
-typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */
+typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
+typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
+typedef uint32_t xfs_extlen_t; /* extent length in blocks */
+typedef uint32_t xfs_agnumber_t; /* allocation group number */
+typedef int32_t xfs_extnum_t; /* # of extents in a file */
+typedef int16_t xfs_aextnum_t; /* # extents in an attribute fork */
+typedef int64_t xfs_fsize_t; /* bytes in a file */
+typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */
-typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */
-typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */
+typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
+typedef int32_t xfs_rtword_t; /* word type for bitmap manipulations */
-typedef __int64_t xfs_lsn_t; /* log sequence number */
-typedef __int32_t xfs_tid_t; /* transaction identifier */
+typedef int64_t xfs_lsn_t; /* log sequence number */
+typedef int32_t xfs_tid_t; /* transaction identifier */
-typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
-typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
+typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
+typedef uint32_t xfs_dahash_t; /* dir/attr hash value */
-typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
-typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __uint64_t xfs_fileoff_t; /* block number in a file */
-typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
+typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
+typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
+typedef uint64_t xfs_fileoff_t; /* block number in a file */
+typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
-typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
+typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+typedef int64_t xfs_sfiloff_t; /* signed block number in a file */
/*
* Null values for the types.
@@ -125,7 +125,7 @@ struct xfs_name {
* uid_t and gid_t are hard-coded to 32 bits in the inode.
* Hence, an 'id' in a dquot is 32 bits..
*/
-typedef __uint32_t xfs_dqid_t;
+typedef uint32_t xfs_dqid_t;
/*
* Constants for bit manipulations.
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index a742c47f7d5a..80cd0fd86783 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,6 +24,10 @@
#define XFS_BUF_LOCK_TRACKING 1
#endif
+#ifdef CONFIG_XFS_ASSERT_FATAL
+#define XFS_ASSERT_FATAL 1
+#endif
+
#ifdef CONFIG_XFS_WARN
#define XFS_WARN 1
#endif
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b468e041f207..7034e17535de 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type)
return acl;
}
-STATIC int
-__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+int
+__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct xfs_inode *ip = XFS_I(inode);
unsigned char *ea_name;
@@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
set_acl:
- return __xfs_set_acl(inode, type, acl);
+ return __xfs_set_acl(inode, acl, type);
}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 286fa89217f5..04327318ef67 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,6 +24,7 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..6bf120bb1a17 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
- int error = ioend->io_bio->bi_error;
+ int error;
/*
* Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
/*
* Clean up any COW blocks on an I/O error.
*/
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
if (unlikely(error)) {
switch (ioend->io_type) {
case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
- xfs_destroy_ioend(ioend, bio->bi_error);
+ xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
}
STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
* time.
*/
if (status) {
- ioend->io_bio->bi_error = status;
+ ioend->io_bio->bi_status = errno_to_blk_status(status);
bio_endio(ioend->io_bio);
return status;
}
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -564,6 +566,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
@@ -836,7 +839,7 @@ xfs_writepage_map(
struct inode *inode,
struct page *page,
loff_t offset,
- __uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
@@ -991,7 +994,7 @@ xfs_do_writepage(
struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
loff_t offset;
- __uint64_t end_offset;
+ uint64_t end_offset;
pgoff_t end_index;
trace_xfs_writepage(inode, page, 0, 0);
@@ -1316,9 +1319,12 @@ xfs_vm_bmap(
* The swap code (ab-)uses ->bmap to get a block mapping and then
* bypasseѕ the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
- * 0 is the magic code for a bmap error..
+ * 0 is the magic code for a bmap error.
+ *
+ * Since we don't pass back blockdev info, we can't return bmap
+ * information for rt files either.
*/
- if (xfs_is_reflink_inode(ip))
+ if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
filemap_write_and_wait(mapping);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index d14691aa02b4..5d5a5e277f35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {
+ struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
@@ -140,8 +141,10 @@ typedef struct xfs_attr_list_context {
* Overall external interface routines.
*/
int xfs_attr_inactive(struct xfs_inode *dp);
+int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char *value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6eb91e..545eca508d42 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
bp = NULL;
if (cursor->blkno > 0) {
- error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+ error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if ((error != 0) && (error != -EFSCORRUPTED))
return error;
@@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
break;
case XFS_ATTR_LEAF_MAGIC:
@@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (cursor->hashval > be32_to_cpu(
entries[leafhdr.count - 1].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
} else if (cursor->hashval <= be32_to_cpu(
entries[0].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
break;
default:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
}
@@ -279,9 +279,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (bp == NULL) {
cursor->blkno = 0;
for (;;) {
- __uint16_t magic;
+ uint16_t magic;
- error = xfs_da3_node_read(NULL, dp,
+ error = xfs_da3_node_read(context->tp, dp,
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
@@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
node);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return -EFSCORRUPTED;
}
@@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
}
}
if (i == nodehdr.count) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
}
}
ASSERT(bp != NULL);
@@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (context->seen_enough || leafhdr.forw == 0)
break;
cursor->blkno = leafhdr.forw;
- xfs_trans_brelse(NULL, bp);
- error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+ xfs_trans_brelse(context->tp, bp);
+ error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
if (error)
return error;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
@@ -448,16 +448,34 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
trace_xfs_attr_leaf_list(context);
context->cursor->blkno = 0;
- error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+ error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
if (error)
return error;
xfs_attr3_leaf_list_int(bp, context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
int
+xfs_attr_list_int_ilocked(
+ struct xfs_attr_list_context *context)
+{
+ struct xfs_inode *dp = context->dp;
+
+ /*
+ * Decide on what work routines to call based on the inode size.
+ */
+ if (!xfs_inode_hasattr(dp))
+ return 0;
+ else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xfs_attr_shortform_list(context);
+ else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+ return xfs_attr_leaf_list(context);
+ return xfs_attr_node_list(context);
+}
+
+int
xfs_attr_list_int(
xfs_attr_list_context_t *context)
{
@@ -470,19 +488,8 @@ xfs_attr_list_int(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- /*
- * Decide on what work routines to call based on the inode size.
- */
lock_mode = xfs_ilock_attr_map_shared(dp);
- if (!xfs_inode_hasattr(dp)) {
- error = 0;
- } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- error = xfs_attr_shortform_list(context);
- } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
- error = xfs_attr_leaf_list(context);
- } else {
- error = xfs_attr_node_list(context);
- }
+ error = xfs_attr_list_int_ilocked(context);
xfs_iunlock(dp, lock_mode);
return error;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index d419d23fa214..88073910fa5d 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -396,6 +396,7 @@ xfs_bui_recover(
struct xfs_map_extent *bmap;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t inode_fsb;
+ xfs_filblks_t count;
bool op_ok;
struct xfs_bud_log_item *budp;
enum xfs_bmap_intent_type type;
@@ -404,6 +405,7 @@ xfs_bui_recover(
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
struct xfs_defer_ops dfops;
+ struct xfs_bmbt_irec irec;
xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -481,13 +483,24 @@ xfs_bui_recover(
}
xfs_trans_ijoin(tp, ip, 0);
+ count = bmap->me_len;
error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
ip, whichfork, bmap->me_startoff,
- bmap->me_startblock, bmap->me_len,
- state);
+ bmap->me_startblock, &count, state);
if (error)
goto err_dfops;
+ if (count > 0) {
+ ASSERT(type == XFS_BMAP_UNMAP);
+ irec.br_startblock = bmap->me_startblock;
+ irec.br_blockcount = count;
+ irec.br_startoff = bmap->me_startoff;
+ irec.br_state = state;
+ error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+ if (error)
+ goto err_dfops;
+ }
+
/* Finish transaction, free inodes. */
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 9e3cc2146d5b..93e955262d07 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -219,20 +219,24 @@ xfs_bmap_eof(
*/
/*
- * Count leaf blocks given a range of extent records.
+ * Count leaf blocks given a range of extent records. Delayed allocation
+ * extents are not counted towards the totals.
*/
STATIC void
xfs_bmap_count_leaves(
- xfs_ifork_t *ifp,
- xfs_extnum_t idx,
- int numrecs,
- int *count)
+ struct xfs_ifork *ifp,
+ xfs_extnum_t *numrecs,
+ xfs_filblks_t *count)
{
- int b;
-
- for (b = 0; b < numrecs; b++) {
- xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
- *count += xfs_bmbt_get_blockcount(frp);
+ xfs_extnum_t i;
+ xfs_extnum_t nr_exts = xfs_iext_count(ifp);
+
+ for (i = 0; i < nr_exts; i++) {
+ xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
+ if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
+ (*numrecs)++;
+ *count += xfs_bmbt_get_blockcount(frp);
+ }
}
}
@@ -245,7 +249,7 @@ xfs_bmap_disk_count_leaves(
struct xfs_mount *mp,
struct xfs_btree_block *block,
int numrecs,
- int *count)
+ xfs_filblks_t *count)
{
int b;
xfs_bmbt_rec_t *frp;
@@ -260,17 +264,18 @@ xfs_bmap_disk_count_leaves(
* Recursively walks each level of a btree
* to count total fsblocks in use.
*/
-STATIC int /* error */
+STATIC int
xfs_bmap_count_tree(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ifork_t *ifp, /* inode fork pointer */
- xfs_fsblock_t blockno, /* file system block number */
- int levelin, /* level in btree */
- int *count) /* Count of blocks */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_ifork *ifp,
+ xfs_fsblock_t blockno,
+ int levelin,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
{
int error;
- xfs_buf_t *bp, *nbp;
+ struct xfs_buf *bp, *nbp;
int level = levelin;
__be64 *pp;
xfs_fsblock_t bno = blockno;
@@ -303,8 +308,9 @@ xfs_bmap_count_tree(
/* Dive to the next level */
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
- if (unlikely((error =
- xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
+ count);
+ if (error) {
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
@@ -316,6 +322,7 @@ xfs_bmap_count_tree(
for (;;) {
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
+ (*nextents) += numrecs;
xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
@@ -334,46 +341,64 @@ xfs_bmap_count_tree(
}
/*
- * Count fsblocks of the given fork.
+ * Count fsblocks of the given fork. Delayed allocation extents are
+ * not counted towards the totals.
*/
-static int /* error */
+int
xfs_bmap_count_blocks(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- int whichfork, /* data or attr fork */
- int *count) /* out: count of blocks */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
{
+ struct xfs_mount *mp; /* file system mount structure */
+ __be64 *pp; /* pointer to block address */
struct xfs_btree_block *block; /* current btree block */
+ struct xfs_ifork *ifp; /* fork structure */
xfs_fsblock_t bno; /* block # of "block" */
- xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
- xfs_mount_t *mp; /* file system mount structure */
- __be64 *pp; /* pointer to block address */
+ int error;
bno = NULLFSBLOCK;
mp = ip->i_mount;
+ *nextents = 0;
+ *count = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
- if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
- xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+ if (!ifp)
return 0;
- }
- /*
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
- */
- block = ifp->if_broot;
- level = be16_to_cpu(block->bb_level);
- ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
- bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
- if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
- mp);
- return -EFSCORRUPTED;
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_EXTENTS:
+ xfs_bmap_count_leaves(ifp, nextents, count);
+ return 0;
+ case XFS_DINODE_FMT_BTREE:
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+ */
+ block = ifp->if_broot;
+ level = be16_to_cpu(block->bb_level);
+ ASSERT(level > 0);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+ bno = be64_to_cpu(*pp);
+ ASSERT(bno != NULLFSBLOCK);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
+ nextents, count);
+ if (error) {
+ XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
+ XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
return 0;
@@ -389,11 +414,11 @@ xfs_getbmapx_fix_eof_hole(
struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
* preallocated data space */
- __int64_t end, /* last block requested */
+ int64_t end, /* last block requested */
xfs_fsblock_t startblock,
bool moretocome)
{
- __int64_t fixlen;
+ int64_t fixlen;
xfs_mount_t *mp; /* file system mount point */
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_extnum_t lastx; /* last extent pointer */
@@ -455,8 +480,8 @@ xfs_getbmap_adjust_shared(
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
- error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount,
- &ebno, &elen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ map->br_blockcount, &ebno, &elen, true);
if (error)
return error;
@@ -514,9 +539,9 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */
{
- __int64_t bmvend; /* last block requested */
+ int64_t bmvend; /* last block requested */
int error = 0; /* return value */
- __int64_t fixlen; /* length for -1 case */
+ int64_t fixlen; /* length for -1 case */
int i; /* extent number */
int lock; /* lock state */
xfs_bmbt_irec_t *map; /* buffer for user's data */
@@ -605,7 +630,7 @@ xfs_getbmap(
if (bmv->bmv_length == -1) {
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
bmv->bmv_length =
- max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+ max_t(int64_t, fixlen - bmv->bmv_offset, 0);
} else if (bmv->bmv_length == 0) {
bmv->bmv_entries = 0;
return 0;
@@ -742,7 +767,7 @@ xfs_getbmap(
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
- max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+ max_t(int64_t, 0, bmvend - bmv->bmv_offset);
/*
* In case we don't want to return the hole,
@@ -1617,7 +1642,7 @@ xfs_swap_extents_check_format(
* extent format...
*/
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
- if (XFS_IFORK_BOFF(ip) &&
+ if (XFS_IFORK_Q(ip) &&
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
return -EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
@@ -1627,7 +1652,7 @@ xfs_swap_extents_check_format(
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
- if (XFS_IFORK_BOFF(tip) &&
+ if (XFS_IFORK_Q(tip) &&
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
return -EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
@@ -1676,7 +1701,7 @@ xfs_swap_extent_rmap(
xfs_filblks_t ilen;
xfs_filblks_t rlen;
int nimaps;
- __uint64_t tip_flags2;
+ uint64_t tip_flags2;
/*
* If the source file has shared blocks, we must flag the donor
@@ -1789,10 +1814,11 @@ xfs_swap_extent_forks(
int *target_log_flags)
{
struct xfs_ifork tempifp, *ifp, *tifp;
- int aforkblks = 0;
- int taforkblks = 0;
+ xfs_filblks_t aforkblks = 0;
+ xfs_filblks_t taforkblks = 0;
+ xfs_extnum_t junk;
xfs_extnum_t nextents;
- __uint64_t tmp;
+ uint64_t tmp;
int error;
/*
@@ -1800,14 +1826,14 @@ xfs_swap_extent_forks(
*/
if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
&aforkblks);
if (error)
return error;
}
if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
(tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
&taforkblks);
if (error)
return error;
@@ -1850,15 +1876,15 @@ xfs_swap_extent_forks(
/*
* Fix the on-disk inode values
*/
- tmp = (__uint64_t)ip->i_d.di_nblocks;
+ tmp = (uint64_t)ip->i_d.di_nblocks;
ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
- tmp = (__uint64_t) ip->i_d.di_nextents;
+ tmp = (uint64_t) ip->i_d.di_nextents;
ip->i_d.di_nextents = tip->i_d.di_nextents;
tip->i_d.di_nextents = tmp;
- tmp = (__uint64_t) ip->i_d.di_format;
+ tmp = (uint64_t) ip->i_d.di_format;
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
@@ -1927,7 +1953,7 @@ xfs_swap_extents(
int error = 0;
int lock_flags;
struct xfs_ifork *cowfp;
- __uint64_t f;
+ uint64_t f;
int resblks;
/*
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 135d8267e284..0cede1043571 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -70,4 +70,8 @@ int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, xfs_extnum_t *nextents,
+ xfs_filblks_t *count);
+
#endif /* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 07b77b73b024..72f038492ba8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -117,7 +117,7 @@ static inline void
__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- ASSERT(spin_is_locked(&bp->b_lock));
+ lockdep_assert_held(&bp->b_lock);
if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
@@ -1194,7 +1194,7 @@ xfs_buf_ioerror_alert(
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+ (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
}
int
@@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -2047,6 +2050,66 @@ xfs_buf_delwri_submit(
return error;
}
+/*
+ * Push a single buffer on a delwri queue.
+ *
+ * The purpose of this function is to submit a single buffer of a delwri queue
+ * and return with the buffer still on the original queue. The waiting delwri
+ * buffer submission infrastructure guarantees transfer of the delwri queue
+ * buffer reference to a temporary wait list. We reuse this infrastructure to
+ * transfer the buffer back to the original queue.
+ *
+ * Note the buffer transitions from the queued state, to the submitted and wait
+ * listed state and back to the queued state during this call. The buffer
+ * locking and queue management logic between _delwri_pushbuf() and
+ * _delwri_queue() guarantee that the buffer cannot be queued to another list
+ * before returning.
+ */
+int
+xfs_buf_delwri_pushbuf(
+ struct xfs_buf *bp,
+ struct list_head *buffer_list)
+{
+ LIST_HEAD (submit_list);
+ int error;
+
+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+ trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
+
+ /*
+ * Isolate the buffer to a new local list so we can submit it for I/O
+ * independently from the rest of the original list.
+ */
+ xfs_buf_lock(bp);
+ list_move(&bp->b_list, &submit_list);
+ xfs_buf_unlock(bp);
+
+ /*
+ * Delwri submission clears the DELWRI_Q buffer flag and returns with
+ * the buffer on the wait list with an associated reference. Rather than
+ * bounce the buffer from a local wait list back to the original list
+ * after I/O completion, reuse the original list as the wait list.
+ */
+ xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
+
+ /*
+ * The buffer is now under I/O and wait listed as during typical delwri
+ * submission. Lock the buffer to wait for I/O completion. Rather than
+ * remove the buffer from the wait list and release the reference, we
+ * want to return with the buffer queued to the original list. The
+ * buffer already sits on the original list with a wait list reference,
+ * however. If we let the queue inherit that wait list reference, all we
+ * need to do is reset the DELWRI_Q flag.
+ */
+ xfs_buf_lock(bp);
+ error = bp->b_error;
+ bp->b_flags |= _XBF_DELWRI_Q;
+ xfs_buf_unlock(bp);
+
+ return error;
+}
+
int __init
xfs_buf_init(void)
{
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 1508121f29f2..20721261dae5 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -332,6 +332,7 @@ extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
+extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 0306168af332..f6a8422e9562 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -636,20 +636,23 @@ xfs_buf_item_unlock(
/*
* Clean buffers, by definition, cannot be in the AIL. However, aborted
- * buffers may be dirty and hence in the AIL. Therefore if we are
- * aborting a buffer and we've just taken the last refernce away, we
- * have to check if it is in the AIL before freeing it. We need to free
- * it in this case, because an aborted transaction has already shut the
- * filesystem down and this is the last chance we will have to do so.
+ * buffers may be in the AIL regardless of dirty state. An aborted
+ * transaction that invalidates a buffer already in the AIL may have
+ * marked it stale and cleared the dirty state, for example.
+ *
+ * Therefore if we are aborting a buffer and we've just taken the last
+ * reference away, we have to check if it is in the AIL before freeing
+ * it. We need to free it in this case, because an aborted transaction
+ * has already shut the filesystem down and this is the last chance we
+ * will have to do so.
*/
if (atomic_dec_and_test(&bip->bli_refcount)) {
- if (clean)
- xfs_buf_item_relse(bp);
- else if (aborted) {
+ if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
- }
+ } else if (clean)
+ xfs_buf_item_relse(bp);
}
if (!(flags & XFS_BLI_HOLD))
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 20b7a5c6eb2f..ba2638d37031 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
static unsigned char
xfs_dir3_get_dtype(
struct xfs_mount *mp,
- __uint8_t filetype)
+ uint8_t filetype)
{
if (!xfs_sb_version_hasftype(&mp->m_sb))
return DT_UNKNOWN;
@@ -117,7 +117,7 @@ xfs_dir2_sf_getdents(
*/
sfep = xfs_dir2_sf_firstentry(sfp);
for (i = 0; i < sfp->count; i++) {
- __uint8_t filetype;
+ uint8_t filetype;
off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
xfs_dir2_sf_get_offset(sfep));
@@ -170,7 +170,7 @@ xfs_dir2_block_getdents(
return 0;
lock_mode = xfs_ilock_data_map_shared(dp);
- error = xfs_dir3_block_read(NULL, dp, &bp);
+ error = xfs_dir3_block_read(args->trans, dp, &bp);
xfs_iunlock(dp, lock_mode);
if (error)
return error;
@@ -194,7 +194,7 @@ xfs_dir2_block_getdents(
* Each object is a real entry (dep) or an unused one (dup).
*/
while (ptr < endptr) {
- __uint8_t filetype;
+ uint8_t filetype;
dup = (xfs_dir2_data_unused_t *)ptr;
/*
@@ -228,7 +228,7 @@ xfs_dir2_block_getdents(
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
xfs_dir3_get_dtype(dp->i_mount, filetype))) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
}
@@ -239,218 +239,104 @@ xfs_dir2_block_getdents(
*/
ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
0x7fffffff;
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
-struct xfs_dir2_leaf_map_info {
- xfs_extlen_t map_blocks; /* number of fsbs in map */
- xfs_dablk_t map_off; /* last mapped file offset */
- int map_size; /* total entries in *map */
- int map_valid; /* valid entries in *map */
- int nmap; /* mappings to ask xfs_bmapi */
- xfs_dir2_db_t curdb; /* db for current block */
- int ra_current; /* number of read-ahead blks */
- int ra_index; /* *map index for read-ahead */
- int ra_offset; /* map entry offset for ra */
- int ra_want; /* readahead count wanted */
- struct xfs_bmbt_irec map[]; /* map vector for blocks */
-};
-
+/*
+ * Read a directory block and initiate readahead for blocks beyond that.
+ * We maintain a sliding readahead window of the remaining space in the
+ * buffer rounded up to the nearest block.
+ */
STATIC int
xfs_dir2_leaf_readbuf(
struct xfs_da_args *args,
size_t bufsize,
- struct xfs_dir2_leaf_map_info *mip,
- xfs_dir2_off_t *curoff,
- struct xfs_buf **bpp,
- bool trim_map)
+ xfs_dir2_off_t *cur_off,
+ xfs_dablk_t *ra_blk,
+ struct xfs_buf **bpp)
{
struct xfs_inode *dp = args->dp;
struct xfs_buf *bp = NULL;
- struct xfs_bmbt_irec *map = mip->map;
+ struct xfs_da_geometry *geo = args->geo;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+ struct xfs_bmbt_irec map;
struct blk_plug plug;
+ xfs_dir2_off_t new_off;
+ xfs_dablk_t next_ra;
+ xfs_dablk_t map_off;
+ xfs_dablk_t last_da;
+ xfs_extnum_t idx;
+ int ra_want;
int error = 0;
- int length;
- int i;
- int j;
- struct xfs_da_geometry *geo = args->geo;
-
- /*
- * If the caller just finished processing a buffer, it will tell us
- * we need to trim that block out of the mapping now it is done.
- */
- if (trim_map) {
- mip->map_blocks -= geo->fsbcount;
- /*
- * Loop to get rid of the extents for the
- * directory block.
- */
- for (i = geo->fsbcount; i > 0; ) {
- j = min_t(int, map->br_blockcount, i);
- map->br_blockcount -= j;
- map->br_startblock += j;
- map->br_startoff += j;
- /*
- * If mapping is done, pitch it from
- * the table.
- */
- if (!map->br_blockcount && --mip->map_valid)
- memmove(&map[0], &map[1],
- sizeof(map[0]) * mip->map_valid);
- i -= j;
- }
- }
- /*
- * Recalculate the readahead blocks wanted.
- */
- mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
- ASSERT(mip->ra_want >= 0);
-
- /*
- * If we don't have as many as we want, and we haven't
- * run out of data blocks, get some more mappings.
- */
- if (1 + mip->ra_want > mip->map_blocks &&
- mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
- /*
- * Get more bmaps, fill in after the ones
- * we already have in the table.
- */
- mip->nmap = mip->map_size - mip->map_valid;
- error = xfs_bmapi_read(dp, mip->map_off,
- xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
- mip->map_off,
- &map[mip->map_valid], &mip->nmap, 0);
-
- /*
- * Don't know if we should ignore this or try to return an
- * error. The trouble with returning errors is that readdir
- * will just stop without actually passing the error through.
- */
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK);
if (error)
- goto out; /* XXX */
-
- /*
- * If we got all the mappings we asked for, set the final map
- * offset based on the last bmap value received. Otherwise,
- * we've reached the end.
- */
- if (mip->nmap == mip->map_size - mip->map_valid) {
- i = mip->map_valid + mip->nmap - 1;
- mip->map_off = map[i].br_startoff + map[i].br_blockcount;
- } else
- mip->map_off = xfs_dir2_byte_to_da(geo,
- XFS_DIR2_LEAF_OFFSET);
-
- /*
- * Look for holes in the mapping, and eliminate them. Count up
- * the valid blocks.
- */
- for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
- if (map[i].br_startblock == HOLESTARTBLOCK) {
- mip->nmap--;
- length = mip->map_valid + mip->nmap - i;
- if (length)
- memmove(&map[i], &map[i + 1],
- sizeof(map[i]) * length);
- } else {
- mip->map_blocks += map[i].br_blockcount;
- i++;
- }
- }
- mip->map_valid += mip->nmap;
+ goto out;
}
/*
- * No valid mappings, so no more data blocks.
+ * Look for mapped directory blocks at or above the current offset.
+ * Truncate down to the nearest directory block to start the scanning
+ * operation.
*/
- if (!mip->map_valid) {
- *curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
+ last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
+ map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
+ if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map))
goto out;
- }
+ if (map.br_startoff >= last_da)
+ goto out;
+ xfs_trim_extent(&map, map_off, last_da - map_off);
- /*
- * Read the directory block starting at the first mapping.
- */
- mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
- error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
- map->br_blockcount >= geo->fsbcount ?
- XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
- -1, &bp);
- /*
- * Should just skip over the data block instead of giving up.
- */
+ /* Read the directory block of that first mapping. */
+ new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
+ if (new_off > *cur_off)
+ *cur_off = new_off;
+ error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp);
if (error)
- goto out; /* XXX */
-
- /*
- * Adjust the current amount of read-ahead: we just read a block that
- * was previously ra.
- */
- if (mip->ra_current)
- mip->ra_current -= geo->fsbcount;
+ goto out;
/*
- * Do we need more readahead?
- * Each loop tries to process 1 full dir blk; last may be partial.
+ * Start readahead for the next bufsize's worth of dir data blocks.
+ * We may have already issued readahead for some of that range;
+ * ra_blk tracks the last block we tried to read(ahead).
*/
+ ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
+ if (*ra_blk >= last_da)
+ goto out;
+ else if (*ra_blk == 0)
+ *ra_blk = map.br_startoff;
+ next_ra = map.br_startoff + geo->fsbcount;
+ if (next_ra >= last_da)
+ goto out_no_ra;
+ if (map.br_blockcount < geo->fsbcount &&
+ !xfs_iext_get_extent(ifp, ++idx, &map))
+ goto out_no_ra;
+ if (map.br_startoff >= last_da)
+ goto out_no_ra;
+ xfs_trim_extent(&map, next_ra, last_da - next_ra);
+
+ /* Start ra for each dir (not fs) block that has a mapping. */
blk_start_plug(&plug);
- for (mip->ra_index = mip->ra_offset = i = 0;
- mip->ra_want > mip->ra_current && i < mip->map_blocks;
- i += geo->fsbcount) {
- ASSERT(mip->ra_index < mip->map_valid);
- /*
- * Read-ahead a contiguous directory block.
- */
- if (i > mip->ra_current &&
- (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
- geo->fsbcount) {
- xfs_dir3_data_readahead(dp,
- map[mip->ra_index].br_startoff + mip->ra_offset,
- XFS_FSB_TO_DADDR(dp->i_mount,
- map[mip->ra_index].br_startblock +
- mip->ra_offset));
- mip->ra_current = i;
- }
-
- /*
- * Read-ahead a non-contiguous directory block. This doesn't
- * use our mapping, but this is a very rare case.
- */
- else if (i > mip->ra_current) {
- xfs_dir3_data_readahead(dp,
- map[mip->ra_index].br_startoff +
- mip->ra_offset, -1);
- mip->ra_current = i;
- }
-
- /*
- * Advance offset through the mapping table, processing a full
- * dir block even if it is fragmented into several extents.
- * But stop if we have consumed all valid mappings, even if
- * it's not yet a full directory block.
- */
- for (j = 0;
- j < geo->fsbcount && mip->ra_index < mip->map_valid;
- j += length ) {
- /*
- * The rest of this extent but not more than a dir
- * block.
- */
- length = min_t(int, geo->fsbcount - j,
- map[mip->ra_index].br_blockcount -
- mip->ra_offset);
- mip->ra_offset += length;
-
- /*
- * Advance to the next mapping if this one is used up.
- */
- if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
- mip->ra_offset = 0;
- mip->ra_index++;
+ while (ra_want > 0) {
+ next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount);
+ while (ra_want > 0 &&
+ next_ra < map.br_startoff + map.br_blockcount) {
+ if (next_ra >= last_da) {
+ *ra_blk = last_da;
+ break;
}
+ if (next_ra > *ra_blk) {
+ xfs_dir3_data_readahead(dp, next_ra, -2);
+ *ra_blk = next_ra;
+ }
+ ra_want -= geo->fsbcount;
+ next_ra += geo->fsbcount;
+ }
+ if (!xfs_iext_get_extent(ifp, ++idx, &map)) {
+ *ra_blk = last_da;
+ break;
}
}
blk_finish_plug(&plug);
@@ -458,6 +344,9 @@ xfs_dir2_leaf_readbuf(
out:
*bpp = bp;
return error;
+out_no_ra:
+ *ra_blk = last_da;
+ goto out;
}
/*
@@ -475,14 +364,14 @@ xfs_dir2_leaf_getdents(
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
- int error = 0; /* error return value */
- int length; /* temporary length value */
- int byteoff; /* offset in current block */
- xfs_dir2_off_t curoff; /* current overall offset */
- xfs_dir2_off_t newoff; /* new curoff after new blk */
char *ptr = NULL; /* pointer to current data */
- struct xfs_dir2_leaf_map_info *map_info;
struct xfs_da_geometry *geo = args->geo;
+ xfs_dablk_t rablk = 0; /* current readahead block */
+ xfs_dir2_off_t curoff; /* current overall offset */
+ int length; /* temporary length value */
+ int byteoff; /* offset in current block */
+ int lock_mode;
+ int error = 0; /* error return value */
/*
* If the offset is at or past the largest allowed value,
@@ -492,73 +381,35 @@ xfs_dir2_leaf_getdents(
return 0;
/*
- * Set up to bmap a number of blocks based on the caller's
- * buffer size, the directory block size, and the filesystem
- * block size.
- */
- length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
- map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
- (length * sizeof(struct xfs_bmbt_irec)),
- KM_SLEEP | KM_NOFS);
- map_info->map_size = length;
-
- /*
* Inside the loop we keep the main offset value as a byte offset
* in the directory file.
*/
curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
/*
- * Force this conversion through db so we truncate the offset
- * down to get the start of the data block.
- */
- map_info->map_off = xfs_dir2_db_to_da(geo,
- xfs_dir2_byte_to_db(geo, curoff));
-
- /*
* Loop over directory entries until we reach the end offset.
* Get more blocks and readahead as necessary.
*/
while (curoff < XFS_DIR2_LEAF_OFFSET) {
- __uint8_t filetype;
+ uint8_t filetype;
/*
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
- int lock_mode;
- bool trim_map = false;
-
if (bp) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
bp = NULL;
- trim_map = true;
}
lock_mode = xfs_ilock_data_map_shared(dp);
- error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
- &curoff, &bp, trim_map);
+ error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
+ &rablk, &bp);
xfs_iunlock(dp, lock_mode);
- if (error || !map_info->map_valid)
+ if (error || !bp)
break;
- /*
- * Having done a read, we need to set a new offset.
- */
- newoff = xfs_dir2_db_off_to_byte(geo,
- map_info->curdb, 0);
- /*
- * Start of the current block.
- */
- if (curoff < newoff)
- curoff = newoff;
- /*
- * Make sure we're in the right block.
- */
- else if (curoff > newoff)
- ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
- map_info->curdb);
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
/*
@@ -643,17 +494,22 @@ xfs_dir2_leaf_getdents(
ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
else
ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
- kmem_free(map_info);
if (bp)
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return error;
}
/*
* Read a directory.
+ *
+ * If supplied, the transaction collects locked dir buffers to avoid
+ * nested buffer deadlocks. This function does not dirty the
+ * transaction. The caller should ensure that the inode is locked
+ * before calling this function.
*/
int
xfs_readdir(
+ struct xfs_trans *tp,
struct xfs_inode *dp,
struct dir_context *ctx,
size_t bufsize)
@@ -672,6 +528,7 @@ xfs_readdir(
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
+ args.trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6a05d278da64..b2cde5426182 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -39,7 +39,7 @@ xfs_trim_extents(
xfs_daddr_t start,
xfs_daddr_t end,
xfs_daddr_t minlen,
- __uint64_t *blocks_trimmed)
+ uint64_t *blocks_trimmed)
{
struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
struct xfs_btree_cur *cur;
@@ -166,7 +166,7 @@ xfs_ioc_trim(
struct fstrim_range range;
xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
- __uint64_t blocks_trimmed = 0;
+ uint64_t blocks_trimmed = 0;
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9d06cc30e875..f89f7b5241e6 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -276,7 +276,7 @@ xfs_qm_init_dquot_blk(
void
xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
{
- __uint64_t space;
+ uint64_t space;
dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
@@ -472,18 +472,23 @@ xfs_qm_dqtobp(
struct xfs_mount *mp = dqp->q_mount;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
struct xfs_trans *tp = (tpp ? *tpp : NULL);
- uint lock_mode;
+ uint lock_mode = 0;
quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
- lock_mode = xfs_ilock_data_map_shared(quotip);
+ ASSERT(!(flags & XFS_QMOPT_NOLOCK) ||
+ xfs_isilocked(quotip, XFS_ILOCK_SHARED) ||
+ xfs_isilocked(quotip, XFS_ILOCK_EXCL));
+ if (!(flags & XFS_QMOPT_NOLOCK))
+ lock_mode = xfs_ilock_data_map_shared(quotip);
if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
/*
* Return if this type of quotas is turned off while we
* didn't have the quota inode lock.
*/
- xfs_iunlock(quotip, lock_mode);
+ if (lock_mode)
+ xfs_iunlock(quotip, lock_mode);
return -ESRCH;
}
@@ -493,7 +498,8 @@ xfs_qm_dqtobp(
error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
- xfs_iunlock(quotip, lock_mode);
+ if (lock_mode)
+ xfs_iunlock(quotip, lock_mode);
if (error)
return error;
@@ -695,21 +701,18 @@ error0:
*/
static int
xfs_dq_get_next_id(
- xfs_mount_t *mp,
+ struct xfs_mount *mp,
uint type,
- xfs_dqid_t *id,
- loff_t eof)
+ xfs_dqid_t *id)
{
- struct xfs_inode *quotip;
+ struct xfs_inode *quotip = xfs_quota_inode(mp, type);
+ xfs_dqid_t next_id = *id + 1; /* simple advance */
+ uint lock_flags;
+ struct xfs_bmbt_irec got;
+ xfs_extnum_t idx;
xfs_fsblock_t start;
- loff_t offset;
- uint lock;
- xfs_dqid_t next_id;
int error = 0;
- /* Simple advance */
- next_id = *id + 1;
-
/* If we'd wrap past the max ID, stop */
if (next_id < *id)
return -ENOENT;
@@ -723,23 +726,25 @@ xfs_dq_get_next_id(
/* Nope, next_id is now past the current chunk, so find the next one */
start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
- quotip = xfs_quota_inode(mp, type);
- lock = xfs_ilock_data_map_shared(quotip);
-
- offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
- eof, SEEK_DATA);
- if (offset < 0)
- error = offset;
+ lock_flags = xfs_ilock_data_map_shared(quotip);
+ if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
- xfs_iunlock(quotip, lock);
+ if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) {
+ /* contiguous chunk, bump startoff for the id calculation */
+ if (got.br_startoff < start)
+ got.br_startoff = start;
+ *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
+ } else {
+ error = -ENOENT;
+ }
- /* -ENXIO is essentially "no more data" */
- if (error)
- return (error == -ENXIO ? -ENOENT: error);
+ xfs_iunlock(quotip, lock_flags);
- /* Convert next data offset back to a quota id */
- *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
- return 0;
+ return error;
}
/*
@@ -762,7 +767,6 @@ xfs_qm_dqget(
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
- loff_t eof = 0;
int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -790,21 +794,6 @@ xfs_qm_dqget(
}
#endif
- /* Get the end of the quota file if we need it */
- if (flags & XFS_QMOPT_DQNEXT) {
- struct xfs_inode *quotip;
- xfs_fileoff_t last;
- uint lock_mode;
-
- quotip = xfs_quota_inode(mp, type);
- lock_mode = xfs_ilock_data_map_shared(quotip);
- error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
- xfs_iunlock(quotip, lock_mode);
- if (error)
- return error;
- eof = XFS_FSB_TO_B(mp, last);
- }
-
restart:
mutex_lock(&qi->qi_tree_lock);
dqp = radix_tree_lookup(tree, id);
@@ -823,7 +812,7 @@ restart:
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_dqunlock(dqp);
mutex_unlock(&qi->qi_tree_lock);
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (error)
return error;
goto restart;
@@ -858,7 +847,7 @@ restart:
/* If we are asked to find next active id, keep looking */
if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (!error)
goto restart;
}
@@ -917,7 +906,7 @@ restart:
if (flags & XFS_QMOPT_DQNEXT) {
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
xfs_qm_dqput(dqp);
- error = xfs_dq_get_next_id(mp, type, &id, eof);
+ error = xfs_dq_get_next_id(mp, type, &id);
if (error)
return error;
goto restart;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed7ee4e8af73..2f4feb959bfb 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,103 +22,280 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_error.h"
+#include "xfs_sysfs.h"
#ifdef DEBUG
-int xfs_etest[XFS_NUM_INJECT_ERROR];
-int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
-char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-int xfs_error_test_active;
+static unsigned int xfs_errortag_random_default[] = {
+ XFS_RANDOM_DEFAULT,
+ XFS_RANDOM_IFLUSH_1,
+ XFS_RANDOM_IFLUSH_2,
+ XFS_RANDOM_IFLUSH_3,
+ XFS_RANDOM_IFLUSH_4,
+ XFS_RANDOM_IFLUSH_5,
+ XFS_RANDOM_IFLUSH_6,
+ XFS_RANDOM_DA_READ_BUF,
+ XFS_RANDOM_BTREE_CHECK_LBLOCK,
+ XFS_RANDOM_BTREE_CHECK_SBLOCK,
+ XFS_RANDOM_ALLOC_READ_AGF,
+ XFS_RANDOM_IALLOC_READ_AGI,
+ XFS_RANDOM_ITOBP_INOTOBP,
+ XFS_RANDOM_IUNLINK,
+ XFS_RANDOM_IUNLINK_REMOVE,
+ XFS_RANDOM_DIR_INO_VALIDATE,
+ XFS_RANDOM_BULKSTAT_READ_CHUNK,
+ XFS_RANDOM_IODONE_IOERR,
+ XFS_RANDOM_STRATREAD_IOERR,
+ XFS_RANDOM_STRATCMPL_IOERR,
+ XFS_RANDOM_DIOWRITE_IOERR,
+ XFS_RANDOM_BMAPIFORMAT,
+ XFS_RANDOM_FREE_EXTENT,
+ XFS_RANDOM_RMAP_FINISH_ONE,
+ XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
+ XFS_RANDOM_REFCOUNT_FINISH_ONE,
+ XFS_RANDOM_BMAP_FINISH_ONE,
+ XFS_RANDOM_AG_RESV_CRITICAL,
+ XFS_RANDOM_DROP_WRITES,
+ XFS_RANDOM_LOG_BAD_CRC,
+};
-int
-xfs_error_test(int error_tag, int *fsidp, char *expression,
- int line, char *file, unsigned long randfactor)
+struct xfs_errortag_attr {
+ struct attribute attr;
+ unsigned int tag;
+};
+
+static inline struct xfs_errortag_attr *
+to_attr(struct attribute *attr)
{
- int i;
- int64_t fsid;
+ return container_of(attr, struct xfs_errortag_attr, attr);
+}
- if (prandom_u32() % randfactor)
- return 0;
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
- memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
+ return container_of(kobj, struct xfs_mount, m_errortag_kobj);
+}
+
+STATIC ssize_t
+xfs_errortag_attr_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+ int ret;
+ unsigned int val;
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
- xfs_warn(NULL,
- "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
- expression, file, line, xfs_etest_fsname[i]);
- return 1;
- }
+ if (strcmp(buf, "default") == 0) {
+ val = xfs_errortag_random_default[xfs_attr->tag];
+ } else {
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
}
- return 0;
+ ret = xfs_errortag_set(mp, xfs_attr->tag, val);
+ if (ret)
+ return ret;
+ return count;
}
+STATIC ssize_t
+xfs_errortag_attr_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ xfs_errortag_get(mp, xfs_attr->tag));
+}
+
+static const struct sysfs_ops xfs_errortag_sysfs_ops = {
+ .show = xfs_errortag_attr_show,
+ .store = xfs_errortag_attr_store,
+};
+
+#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
+static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \
+ .attr = {.name = __stringify(_name), \
+ .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \
+ .tag = (_tag), \
+}
+
+#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
+
+XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR);
+XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1);
+XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2);
+XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3);
+XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4);
+XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5);
+XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6);
+XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF);
+XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK);
+XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK);
+XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF);
+XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI);
+XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP);
+XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK);
+XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE);
+XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE);
+XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK);
+XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR);
+XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR);
+XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT);
+XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT);
+XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
+XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
+XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
+XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
+
+static struct attribute *xfs_errortag_attrs[] = {
+ XFS_ERRORTAG_ATTR_LIST(noerror),
+ XFS_ERRORTAG_ATTR_LIST(iflush1),
+ XFS_ERRORTAG_ATTR_LIST(iflush2),
+ XFS_ERRORTAG_ATTR_LIST(iflush3),
+ XFS_ERRORTAG_ATTR_LIST(iflush4),
+ XFS_ERRORTAG_ATTR_LIST(iflush5),
+ XFS_ERRORTAG_ATTR_LIST(iflush6),
+ XFS_ERRORTAG_ATTR_LIST(dareadbuf),
+ XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
+ XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
+ XFS_ERRORTAG_ATTR_LIST(readagf),
+ XFS_ERRORTAG_ATTR_LIST(readagi),
+ XFS_ERRORTAG_ATTR_LIST(itobp),
+ XFS_ERRORTAG_ATTR_LIST(iunlink),
+ XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
+ XFS_ERRORTAG_ATTR_LIST(dirinovalid),
+ XFS_ERRORTAG_ATTR_LIST(bulkstat),
+ XFS_ERRORTAG_ATTR_LIST(logiodone),
+ XFS_ERRORTAG_ATTR_LIST(stratread),
+ XFS_ERRORTAG_ATTR_LIST(stratcmpl),
+ XFS_ERRORTAG_ATTR_LIST(diowrite),
+ XFS_ERRORTAG_ATTR_LIST(bmapifmt),
+ XFS_ERRORTAG_ATTR_LIST(free_extent),
+ XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
+ XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
+ XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+ XFS_ERRORTAG_ATTR_LIST(drop_writes),
+ XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
+ NULL,
+};
+
+struct kobj_type xfs_errortag_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_errortag_sysfs_ops,
+ .default_attrs = xfs_errortag_attrs,
+};
+
int
-xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp)
+xfs_errortag_init(
+ struct xfs_mount *mp)
{
- int i;
- int len;
- int64_t fsid;
+ mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
+ KM_SLEEP | KM_MAYFAIL);
+ if (!mp->m_errortag)
+ return -ENOMEM;
- if (error_tag >= XFS_ERRTAG_MAX)
- return -EINVAL;
+ return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+}
- memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
+void
+xfs_errortag_del(
+ struct xfs_mount *mp)
+{
+ xfs_sysfs_del(&mp->m_errortag_kobj);
+ kmem_free(mp->m_errortag);
+}
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
- xfs_warn(mp, "error tag #%d on", error_tag);
- return 0;
- }
- }
+bool
+xfs_errortag_test(
+ struct xfs_mount *mp,
+ const char *expression,
+ const char *file,
+ int line,
+ unsigned int error_tag)
+{
+ unsigned int randfactor;
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest[i] == 0) {
- xfs_warn(mp, "Turned on XFS error tag #%d",
- error_tag);
- xfs_etest[i] = error_tag;
- xfs_etest_fsid[i] = fsid;
- len = strlen(mp->m_fsname);
- xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
- strcpy(xfs_etest_fsname[i], mp->m_fsname);
- xfs_error_test_active++;
- return 0;
- }
- }
+ /*
+ * To be able to use error injection anywhere, we need to ensure error
+ * injection mechanism is already initialized.
+ *
+ * Code paths like I/O completion can be called before the
+ * initialization is complete, but be able to inject errors in such
+ * places is still useful.
+ */
+ if (!mp->m_errortag)
+ return false;
- xfs_warn(mp, "error tag overflow, too many turned on");
+ ASSERT(error_tag < XFS_ERRTAG_MAX);
+ randfactor = mp->m_errortag[error_tag];
+ if (!randfactor || prandom_u32() % randfactor)
+ return false;
- return 1;
+ xfs_warn_ratelimited(mp,
+"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
+ expression, file, line, mp->m_fsname);
+ return true;
}
int
-xfs_errortag_clearall(xfs_mount_t *mp, int loud)
+xfs_errortag_get(
+ struct xfs_mount *mp,
+ unsigned int error_tag)
{
- int64_t fsid;
- int cleared = 0;
- int i;
-
- memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
- xfs_etest[i] != 0) {
- cleared = 1;
- xfs_warn(mp, "Clearing XFS error tag #%d",
- xfs_etest[i]);
- xfs_etest[i] = 0;
- xfs_etest_fsid[i] = 0LL;
- kmem_free(xfs_etest_fsname[i]);
- xfs_etest_fsname[i] = NULL;
- xfs_error_test_active--;
- }
- }
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
+
+ return mp->m_errortag[error_tag];
+}
+
+int
+xfs_errortag_set(
+ struct xfs_mount *mp,
+ unsigned int error_tag,
+ unsigned int tag_value)
+{
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
- if (loud || cleared)
- xfs_warn(mp, "Cleared all XFS error tags for filesystem");
+ mp->m_errortag[error_tag] = tag_value;
+ return 0;
+}
+int
+xfs_errortag_add(
+ struct xfs_mount *mp,
+ unsigned int error_tag)
+{
+ if (error_tag >= XFS_ERRTAG_MAX)
+ return -EINVAL;
+
+ return xfs_errortag_set(mp, error_tag,
+ xfs_errortag_random_default[error_tag]);
+}
+
+int
+xfs_errortag_clearall(
+ struct xfs_mount *mp)
+{
+ memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX);
return 0;
}
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 05f8666733a0..7577be5f09bc 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -96,7 +96,17 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
#define XFS_ERRTAG_BMAP_FINISH_ONE 26
#define XFS_ERRTAG_AG_RESV_CRITICAL 27
-#define XFS_ERRTAG_MAX 28
+/*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are silenty dropped and handled as if they failed.
+ * All delalloc blocks in the range of the write (including pre-existing
+ * delalloc blocks!) are tossed as part of the write failure error
+ * handling sequence.
+ */
+#define XFS_ERRTAG_DROP_WRITES 28
+#define XFS_ERRTAG_LOG_BAD_CRC 29
+#define XFS_ERRTAG_MAX 30
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -129,23 +139,29 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
#define XFS_RANDOM_BMAP_FINISH_ONE 1
#define XFS_RANDOM_AG_RESV_CRITICAL 4
+#define XFS_RANDOM_DROP_WRITES 1
+#define XFS_RANDOM_LOG_BAD_CRC 1
#ifdef DEBUG
-extern int xfs_error_test_active;
-extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-
-#define XFS_NUM_INJECT_ERROR 10
-#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || (xfs_error_test_active && \
- xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
- (rf))))
+extern int xfs_errortag_init(struct xfs_mount *mp);
+extern void xfs_errortag_del(struct xfs_mount *mp);
+extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
+ const char *file, int line, unsigned int error_tag);
+#define XFS_TEST_ERROR(expr, mp, tag) \
+ ((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
-extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp);
-extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
+extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
+ unsigned int tag_value);
+extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_clearall(struct xfs_mount *mp);
#else
-#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
-#define xfs_errortag_add(tag, mp) (ENOSYS)
-#define xfs_errortag_clearall(mp, loud) (ENOSYS)
+#define xfs_errortag_init(mp) (0)
+#define xfs_errortag_del(mp)
+#define XFS_TEST_ERROR(expr, mp, tag) (expr)
+#define xfs_errortag_set(mp, tag, val) (ENOSYS)
+#define xfs_errortag_add(mp, tag) (ENOSYS)
+#define xfs_errortag_clearall(mp) (ENOSYS)
#endif /* DEBUG */
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a0958a14..c4893e226fd8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -140,7 +140,7 @@ xfs_file_fsync(
trace_xfs_file_fsync(ip);
- error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ error = file_write_and_wait_range(file, start, end);
if (error)
return error;
@@ -237,7 +237,11 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
@@ -585,7 +599,12 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -660,6 +679,7 @@ write_retry:
xfs_iunlock(ip, iolock);
eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+ xfs_icache_free_cowblocks(ip->i_mount, &eofb);
goto write_retry;
}
@@ -892,6 +912,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
+ file->f_mode |= FMODE_AIO_NOWAIT;
return 0;
}
@@ -950,362 +971,7 @@ xfs_file_readdir(
*/
bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
- return xfs_readdir(ip, ctx, bufsize);
-}
-
-/*
- * This type is designed to indicate the type of offset we would like
- * to search from page cache for xfs_seek_hole_data().
- */
-enum {
- HOLE_OFF = 0,
- DATA_OFF,
-};
-
-/*
- * Lookup the desired type of offset from the given page.
- *
- * On success, return true and the offset argument will point to the
- * start of the region that was found. Otherwise this function will
- * return false and keep the offset argument unchanged.
- */
-STATIC bool
-xfs_lookup_buffer_offset(
- struct page *page,
- loff_t *offset,
- unsigned int type)
-{
- loff_t lastoff = page_offset(page);
- bool found = false;
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- /*
- * Unwritten extents that have data in the page
- * cache covering them can be identified by the
- * BH_Unwritten state flag. Pages with multiple
- * buffers might have a mix of holes, data and
- * unwritten extents - any buffer with valid
- * data in it should have BH_Uptodate flag set
- * on it.
- */
- if (buffer_unwritten(bh) ||
- buffer_uptodate(bh)) {
- if (type == DATA_OFF)
- found = true;
- } else {
- if (type == HOLE_OFF)
- found = true;
- }
-
- if (found) {
- *offset = lastoff;
- break;
- }
- lastoff += bh->b_size;
- } while ((bh = bh->b_this_page) != head);
-
- return found;
-}
-
-/*
- * This routine is called to find out and return a data or hole offset
- * from the page cache for unwritten extents according to the desired
- * type for xfs_seek_hole_data().
- *
- * The argument offset is used to tell where we start to search from the
- * page cache. Map is used to figure out the end points of the range to
- * lookup pages.
- *
- * Return true if the desired type of offset was found, and the argument
- * offset is filled with that address. Otherwise, return false and keep
- * offset unchanged.
- */
-STATIC bool
-xfs_find_get_desired_pgoff(
- struct inode *inode,
- struct xfs_bmbt_irec *map,
- unsigned int type,
- loff_t *offset)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct pagevec pvec;
- pgoff_t index;
- pgoff_t end;
- loff_t endoff;
- loff_t startoff = *offset;
- loff_t lastoff = startoff;
- bool found = false;
-
- pagevec_init(&pvec, 0);
-
- index = startoff >> PAGE_SHIFT;
- endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
- end = (endoff - 1) >> PAGE_SHIFT;
- do {
- int want;
- unsigned nr_pages;
- unsigned int i;
-
- want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
- want);
- if (nr_pages == 0)
- break;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- loff_t b_offset;
-
- /*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL),
- * or even swizzled back from swapper_space to tmpfs
- * file mapping. However, page->index will not change
- * because we have a reference on the page.
- *
- * If current page offset is beyond where we've ended,
- * we've found a hole.
- */
- if (type == HOLE_OFF && lastoff < endoff &&
- lastoff < page_offset(pvec.pages[i])) {
- found = true;
- *offset = lastoff;
- goto out;
- }
- /* Searching done if the page index is out of range. */
- if (page->index > end)
- goto out;
-
- lock_page(page);
- /*
- * Page truncated or invalidated(page->mapping == NULL).
- * We can freely skip it and proceed to check the next
- * page.
- */
- if (unlikely(page->mapping != inode->i_mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!page_has_buffers(page)) {
- unlock_page(page);
- continue;
- }
-
- found = xfs_lookup_buffer_offset(page, &b_offset, type);
- if (found) {
- /*
- * The found offset may be less than the start
- * point to search if this is the first time to
- * come here.
- */
- *offset = max_t(loff_t, startoff, b_offset);
- unlock_page(page);
- goto out;
- }
-
- /*
- * We either searching data but nothing was found, or
- * searching hole but found a data buffer. In either
- * case, probably the next page contains the desired
- * things, update the last offset to it so.
- */
- lastoff = page_offset(page) + PAGE_SIZE;
- unlock_page(page);
- }
-
- /*
- * The number of returned pages less than our desired, search
- * done.
- */
- if (nr_pages < want)
- break;
-
- index = pvec.pages[i - 1]->index + 1;
- pagevec_release(&pvec);
- } while (index <= end);
-
- /* No page at lastoff and we are not done - we found a hole. */
- if (type == HOLE_OFF && lastoff < endoff) {
- *offset = lastoff;
- found = true;
- }
-out:
- pagevec_release(&pvec);
- return found;
-}
-
-/*
- * caller must lock inode with xfs_ilock_data_map_shared,
- * can we craft an appropriate ASSERT?
- *
- * end is because the VFS-level lseek interface is defined such that any
- * offset past i_size shall return -ENXIO, but we use this for quota code
- * which does not maintain i_size, and we want to SEEK_DATA past i_size.
- */
-loff_t
-__xfs_seek_hole_data(
- struct inode *inode,
- loff_t start,
- loff_t end,
- int whence)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- loff_t uninitialized_var(offset);
- xfs_fileoff_t fsbno;
- xfs_filblks_t lastbno;
- int error;
-
- if (start >= end) {
- error = -ENXIO;
- goto out_error;
- }
-
- /*
- * Try to read extents from the first block indicated
- * by fsbno to the end block of the file.
- */
- fsbno = XFS_B_TO_FSBT(mp, start);
- lastbno = XFS_B_TO_FSB(mp, end);
-
- for (;;) {
- struct xfs_bmbt_irec map[2];
- int nmap = 2;
- unsigned int i;
-
- error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
- XFS_BMAPI_ENTIRE);
- if (error)
- goto out_error;
-
- /* No extents at given offset, must be beyond EOF */
- if (nmap == 0) {
- error = -ENXIO;
- goto out_error;
- }
-
- for (i = 0; i < nmap; i++) {
- offset = max_t(loff_t, start,
- XFS_FSB_TO_B(mp, map[i].br_startoff));
-
- /* Landed in the hole we wanted? */
- if (whence == SEEK_HOLE &&
- map[i].br_startblock == HOLESTARTBLOCK)
- goto out;
-
- /* Landed in the data extent we wanted? */
- if (whence == SEEK_DATA &&
- (map[i].br_startblock == DELAYSTARTBLOCK ||
- (map[i].br_state == XFS_EXT_NORM &&
- !isnullstartblock(map[i].br_startblock))))
- goto out;
-
- /*
- * Landed in an unwritten extent, try to search
- * for hole or data from page cache.
- */
- if (map[i].br_state == XFS_EXT_UNWRITTEN) {
- if (xfs_find_get_desired_pgoff(inode, &map[i],
- whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
- &offset))
- goto out;
- }
- }
-
- /*
- * We only received one extent out of the two requested. This
- * means we've hit EOF and didn't find what we are looking for.
- */
- if (nmap == 1) {
- /*
- * If we were looking for a hole, set offset to
- * the end of the file (i.e., there is an implicit
- * hole at the end of any file).
- */
- if (whence == SEEK_HOLE) {
- offset = end;
- break;
- }
- /*
- * If we were looking for data, it's nowhere to be found
- */
- ASSERT(whence == SEEK_DATA);
- error = -ENXIO;
- goto out_error;
- }
-
- ASSERT(i > 1);
-
- /*
- * Nothing was found, proceed to the next round of search
- * if the next reading offset is not at or beyond EOF.
- */
- fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
- start = XFS_FSB_TO_B(mp, fsbno);
- if (start >= end) {
- if (whence == SEEK_HOLE) {
- offset = end;
- break;
- }
- ASSERT(whence == SEEK_DATA);
- error = -ENXIO;
- goto out_error;
- }
- }
-
-out:
- /*
- * If at this point we have found the hole we wanted, the returned
- * offset may be bigger than the file size as it may be aligned to
- * page boundary for unwritten extents. We need to deal with this
- * situation in particular.
- */
- if (whence == SEEK_HOLE)
- offset = min_t(loff_t, offset, end);
-
- return offset;
-
-out_error:
- return error;
-}
-
-STATIC loff_t
-xfs_seek_hole_data(
- struct file *file,
- loff_t start,
- int whence)
-{
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- uint lock;
- loff_t offset, end;
- int error = 0;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- lock = xfs_ilock_data_map_shared(ip);
-
- end = i_size_read(inode);
- offset = __xfs_seek_hole_data(inode, start, end, whence);
- if (offset < 0) {
- error = offset;
- goto out_unlock;
- }
-
- offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-
-out_unlock:
- xfs_iunlock(ip, lock);
-
- if (error)
- return error;
- return offset;
+ return xfs_readdir(NULL, ip, ctx, bufsize);
}
STATIC loff_t
@@ -1314,17 +980,25 @@ xfs_file_llseek(
loff_t offset,
int whence)
{
+ struct inode *inode = file->f_mapping->host;
+
+ if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+ return -EIO;
+
switch (whence) {
- case SEEK_END:
- case SEEK_CUR:
- case SEEK_SET:
+ default:
return generic_file_llseek(file, offset, whence);
case SEEK_HOLE:
+ offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+ break;
case SEEK_DATA:
- return xfs_seek_hole_data(file, offset, whence);
- default:
- return -EINVAL;
+ offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+ break;
}
+
+ if (offset < 0)
+ return offset;
+ return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 6ccaae9eb0ee..8f22fc579dbb 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -602,7 +602,7 @@ xfs_growfs_data_private(
if (nagimax)
mp->m_maxagi = nagimax;
if (mp->m_sb.sb_imax_pct) {
- __uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
+ uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
do_div(icount, 100);
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
} else
@@ -793,17 +793,17 @@ xfs_fs_counts(
int
xfs_reserve_blocks(
xfs_mount_t *mp,
- __uint64_t *inval,
+ uint64_t *inval,
xfs_fsop_resblks_t *outval)
{
- __int64_t lcounter, delta;
- __int64_t fdblks_delta = 0;
- __uint64_t request;
- __int64_t free;
+ int64_t lcounter, delta;
+ int64_t fdblks_delta = 0;
+ uint64_t request;
+ int64_t free;
int error = 0;
/* If inval is null, report current values and return */
- if (inval == (__uint64_t *)NULL) {
+ if (inval == (uint64_t *)NULL) {
if (!outval)
return -EINVAL;
outval->resblks = mp->m_resblks;
@@ -904,7 +904,7 @@ out:
int
xfs_fs_goingdown(
xfs_mount_t *mp,
- __uint32_t inflags)
+ uint32_t inflags)
{
switch (inflags) {
case XFS_FSOP_GOING_FLAGS_DEFAULT: {
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f34915898fea..2954c13a3acd 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -22,9 +22,9 @@ extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
-extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
+extern int xfs_reserve_blocks(xfs_mount_t *mp, uint64_t *inval,
xfs_fsop_resblks_t *outval);
-extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern int xfs_fs_goingdown(xfs_mount_t *mp, uint32_t inflags);
extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 687a4b01fc53..3e1cc3001bcb 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -47,4 +47,9 @@ xfs_param_t xfs_params = {
struct xfs_globals xfs_globals = {
.log_recovery_delay = 0, /* no delay by default */
+#ifdef XFS_ASSERT_FATAL
+ .bug_on_assert = true, /* assert failures BUG() */
+#else
+ .bug_on_assert = false, /* assert failures WARN() */
+#endif
};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f61c84f8e31a..0a9e6985a0d0 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (pag->pag_ici_reclaimable++)
return;
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (--pag->pag_ici_reclaimable)
return;
@@ -270,12 +269,12 @@ xfs_inew_wait(
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
do {
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (!xfs_iflags_test(ip, XFS_INEW))
break;
schedule();
} while (true);
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
/*
@@ -369,6 +368,11 @@ xfs_iget_cache_hit(
if (ip->i_flags & XFS_IRECLAIMABLE) {
trace_xfs_iget_reclaim(ip);
+ if (flags & XFS_IGET_INCORE) {
+ error = -EAGAIN;
+ goto out_error;
+ }
+
/*
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
* from stomping over us while we recycle the inode. We can't
@@ -433,7 +437,8 @@ xfs_iget_cache_hit(
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
- xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+ if (!(flags & XFS_IGET_INCORE))
+ xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
XFS_STATS_INC(mp, xs_ig_found);
return 0;
@@ -604,6 +609,10 @@ again:
goto out_error_or_again;
} else {
rcu_read_unlock();
+ if (flags & XFS_IGET_INCORE) {
+ error = -ENOENT;
+ goto out_error_or_again;
+ }
XFS_STATS_INC(mp, xs_ig_missed);
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
@@ -624,7 +633,7 @@ again:
return 0;
out_error_or_again:
- if (error == -EAGAIN) {
+ if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
delay(1);
goto again;
}
@@ -633,6 +642,44 @@ out_error_or_again:
}
/*
+ * "Is this a cached inode that's also allocated?"
+ *
+ * Look up an inode by number in the given file system. If the inode is
+ * in cache and isn't in purgatory, return 1 if the inode is allocated
+ * and 0 if it is not. For all other cases (not in cache, being torn
+ * down, etc.), return a negative error code.
+ *
+ * The caller has to prevent inode allocation and freeing activity,
+ * presumably by locking the AGI buffer. This is to ensure that an
+ * inode cannot transition from allocated to freed until the caller is
+ * ready to allow that. If the inode is in an intermediate state (new,
+ * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
+ * inode is not in the cache, -ENOENT will be returned. The caller must
+ * deal with these scenarios appropriately.
+ *
+ * This is a specialized use case for the online scrubber; if you're
+ * reading this, you probably want xfs_iget.
+ */
+int
+xfs_icache_inode_is_allocated(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ bool *inuse)
+{
+ struct xfs_inode *ip;
+ int error;
+
+ error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
+ if (error)
+ return error;
+
+ *inuse = !!(VFS_I(ip)->i_mode);
+ IRELE(ip);
+ return 0;
+}
+
+/*
* The inode lookup is done in batches to keep the amount of lock traffic and
* radix tree lookups to a minimum. The batch size is a trade off between
* lookup reduction and stack usage. This is in the reclaim path, so we can't
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9183f77958ef..bff4d85e5498 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -47,6 +47,7 @@ struct xfs_eofblocks {
#define XFS_IGET_CREATE 0x1
#define XFS_IGET_UNTRUSTED 0x2
#define XFS_IGET_DONTCACHE 0x4
+#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */
/*
* flags for AG inode iterator
@@ -126,4 +127,7 @@ xfs_fs_eofblocks_from_user(
return 0;
}
+int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t ino, bool *inuse);
+
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c56500..ceef77c0416a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -622,17 +622,17 @@ __xfs_iflock(
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
- prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags,
+ uint16_t di_flags,
uint64_t di_flags2,
bool has_attr)
{
@@ -855,8 +855,8 @@ xfs_ialloc(
inode->i_version = 1;
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
- ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
+ ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
+ ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
}
@@ -2486,11 +2486,11 @@ __xfs_iunpin_wait(
xfs_iunpin(ip);
do {
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_ipincount(ip))
io_schedule();
} while (xfs_ipincount(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
void
@@ -3489,7 +3489,7 @@ xfs_iflush_int(
dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
- mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
+ mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
@@ -3499,7 +3499,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
+ mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad regular inode %Lu, ptr 0x%p",
__func__, ip->i_ino, ip);
@@ -3510,7 +3510,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
- mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
+ mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad directory inode %Lu, ptr 0x%p",
__func__, ip->i_ino, ip);
@@ -3518,8 +3518,7 @@ xfs_iflush_int(
}
}
if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
- ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
- XFS_RANDOM_IFLUSH_5)) {
+ ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: detected corrupt incore inode %Lu, "
"total extents = %d, nblocks = %Ld, ptr 0x%p",
@@ -3529,7 +3528,7 @@ xfs_iflush_int(
goto corrupt_out;
}
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
- mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
+ mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 10e89fcb49d7..0ee453de239a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -192,8 +192,8 @@ static inline void
xfs_set_projid(struct xfs_inode *ip,
prid_t projid)
{
- ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
- ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+ ip->i_d.di_projid_hi = (uint16_t) (projid >> 16);
+ ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff);
}
static inline prid_t
@@ -445,9 +445,6 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing);
int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
bool *did_zero);
-loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
- loff_t eof, int whence);
-
/* from xfs_iops.c */
extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6190697603c9..9c0c7a920304 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -120,8 +120,7 @@ xfs_find_handle(
handle.ha_fid.fid_pad = 0;
handle.ha_fid.fid_gen = inode->i_generation;
handle.ha_fid.fid_ino = ip->i_ino;
-
- hsize = XFS_HSIZE(handle);
+ hsize = sizeof(xfs_handle_t);
}
error = -EFAULT;
@@ -444,8 +443,8 @@ xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags)
+ uint32_t *len,
+ uint32_t flags)
{
unsigned char *kbuf;
int error = -EFAULT;
@@ -473,8 +472,8 @@ xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags)
+ uint32_t len,
+ uint32_t flags)
{
unsigned char *kbuf;
int error;
@@ -499,7 +498,7 @@ int
xfs_attrmulti_attr_remove(
struct inode *inode,
unsigned char *name,
- __uint32_t flags)
+ uint32_t flags)
{
int error;
@@ -877,7 +876,7 @@ xfs_merge_ioc_xflags(
STATIC unsigned int
xfs_di2lxflags(
- __uint16_t di_flags)
+ uint16_t di_flags)
{
unsigned int flags = 0;
@@ -1288,7 +1287,7 @@ xfs_ioctl_setattr_check_projid(
struct fsxattr *fa)
{
/* Disallow 32bit project ids if projid32bit feature is not enabled. */
- if (fa->fsx_projid > (__uint16_t)-1 &&
+ if (fa->fsx_projid > (uint16_t)-1 &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
return -EINVAL;
@@ -1932,7 +1931,7 @@ xfs_file_ioctl(
case XFS_IOC_SET_RESBLKS: {
xfs_fsop_resblks_t inout;
- __uint64_t in;
+ uint64_t in;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2018,12 +2017,12 @@ xfs_file_ioctl(
}
case XFS_IOC_GOINGDOWN: {
- __uint32_t in;
+ uint32_t in;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (get_user(in, (__uint32_t __user *)arg))
+ if (get_user(in, (uint32_t __user *)arg))
return -EFAULT;
return xfs_fs_goingdown(mp, in);
@@ -2038,14 +2037,14 @@ xfs_file_ioctl(
if (copy_from_user(&in, arg, sizeof(in)))
return -EFAULT;
- return xfs_errortag_add(in.errtag, mp);
+ return xfs_errortag_add(mp, in.errtag);
}
case XFS_IOC_ERROR_CLEARALL:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return xfs_errortag_clearall(mp, 1);
+ return xfs_errortag_clearall(mp);
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_fs_eofblocks eofb;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 8b52881bfd90..e86c3ea137d2 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -48,22 +48,22 @@ xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags);
+ uint32_t *len,
+ uint32_t flags);
extern int
xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags);
+ uint32_t len,
+ uint32_t flags);
extern int
xfs_attrmulti_attr_remove(
struct inode *inode,
unsigned char *name,
- __uint32_t flags);
+ uint32_t flags);
extern struct dentry *
xfs_handle_to_dentry(
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index b1bb45444df8..5492bcf6f442 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -112,9 +112,9 @@ typedef struct compat_xfs_fsop_handlereq {
/* The bstat field in the swapext struct needs translation */
typedef struct compat_xfs_swapext {
- __int64_t sx_version; /* version */
- __int64_t sx_fdtarget; /* fd of target file */
- __int64_t sx_fdtmp; /* fd of tmp file */
+ int64_t sx_version; /* version */
+ int64_t sx_fdtarget; /* fd of target file */
+ int64_t sx_fdtmp; /* fd of tmp file */
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf7304c..813394c62849 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -543,7 +543,7 @@ xfs_file_iomap_begin_delay(
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ mp, XFS_ERRTAG_BMAPIFORMAT))) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_unlock;
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
lockmode = xfs_ilock_data_map_shared(ip);
}
+ if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
@@ -1097,7 +1119,7 @@ xfs_file_iomap_end_delalloc(
* Behave as if the write failed if drop writes is enabled. Set the NEW
* flag to force delalloc cleanup.
*/
- if (xfs_mp_drop_writes(mp)) {
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
iomap->flags |= IOMAP_F_NEW;
written = 0;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ebfc13350f9a..469c9fa4c178 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -190,12 +190,12 @@ xfs_generic_create(
#ifdef CONFIG_XFS_POSIX_ACL
if (default_acl) {
- error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
goto out_cleanup_inode;
}
if (acl) {
- error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
if (error)
goto out_cleanup_inode;
}
@@ -460,7 +460,7 @@ xfs_vn_get_link(
if (!dentry)
return ERR_PTR(-ECHILD);
- link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+ link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 26d67ce3c18d..c393a2f6d8c3 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,7 +31,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
-STATIC int
+int
xfs_internal_inum(
xfs_mount_t *mp,
xfs_ino_t ino)
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b3912fa4..17e86e0541af 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,4 +96,6 @@ xfs_inumbers(
void __user *buffer, /* buffer with inode info */
inumbers_fmt_pf formatter);
+int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+
#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 2d167fe643ec..9301c5a6060b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -24,14 +24,6 @@
/*
* Kernel specific type declarations for XFS
*/
-typedef signed char __int8_t;
-typedef unsigned char __uint8_t;
-typedef signed short int __int16_t;
-typedef unsigned short int __uint16_t;
-typedef signed int __int32_t;
-typedef unsigned int __uint32_t;
-typedef signed long long int __int64_t;
-typedef unsigned long long int __uint64_t;
typedef __s64 xfs_off_t; /* <file offset> type */
typedef unsigned long long xfs_ino_t; /* <inode> type */
@@ -151,7 +143,6 @@ typedef __u32 xfs_nlink_t;
#define __return_address __builtin_return_address(0)
#define XFS_PROJID_DEFAULT 0
-#define MAXPATHLEN 1024
#define MIN(a,b) (min(a,b))
#define MAX(a,b) (max(a,b))
@@ -186,22 +177,22 @@ extern struct xstats xfsstats;
* are converting to the init_user_ns. The uid is later mapped to a particular
* user namespace value when crossing the kernel/user boundary.
*/
-static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
+static inline uint32_t xfs_kuid_to_uid(kuid_t uid)
{
return from_kuid(&init_user_ns, uid);
}
-static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
+static inline kuid_t xfs_uid_to_kuid(uint32_t uid)
{
return make_kuid(&init_user_ns, uid);
}
-static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
+static inline uint32_t xfs_kgid_to_gid(kgid_t gid)
{
return from_kgid(&init_user_ns, gid);
}
-static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
+static inline kgid_t xfs_gid_to_kgid(uint32_t gid)
{
return make_kgid(&init_user_ns, gid);
}
@@ -231,14 +222,14 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+static inline uint64_t roundup_64(uint64_t x, uint32_t y)
{
x += y - 1;
do_div(x, y);
return x * y;
}
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+static inline uint64_t howmany_64(uint64_t x, uint32_t y)
{
x += y - 1;
do_div(x, y);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3731f13f63e9..0053bcf2b10a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -434,7 +434,7 @@ xfs_log_reserve(
int unit_bytes,
int cnt,
struct xlog_ticket **ticp,
- __uint8_t client,
+ uint8_t client,
bool permanent)
{
struct xlog *log = mp->m_log;
@@ -825,9 +825,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!error) {
/* the data section must be 32 bit size aligned */
struct {
- __uint16_t magic;
- __uint16_t pad1;
- __uint32_t pad2; /* may as well make it 64 bits */
+ uint16_t magic;
+ uint16_t pad1;
+ uint32_t pad2; /* may as well make it 64 bits */
} magic = {
.magic = XLOG_UNMOUNT_TYPE,
};
@@ -1189,8 +1189,7 @@ xlog_iodone(xfs_buf_t *bp)
* IOABORT state. The IOABORT state is only set in DEBUG mode to inject
* CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
- XFS_RANDOM_IODONE_IOERR) ||
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
iclog->ic_state & XLOG_STATE_IOABORT) {
if (iclog->ic_state & XLOG_STATE_IOABORT)
iclog->ic_state &= ~XLOG_STATE_IOABORT;
@@ -1665,7 +1664,7 @@ xlog_cksum(
char *dp,
int size)
{
- __uint32_t crc;
+ uint32_t crc;
/* first generate the crc for the record header ... */
crc = xfs_start_cksum_update((char *)rhead,
@@ -1828,7 +1827,7 @@ xlog_sync(
*/
dptr = (char *)&iclog->ic_header + count;
for (i = 0; i < split; i += BBSIZE) {
- __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
+ uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
if (++cycle == XLOG_HEADER_MAGIC_NUM)
cycle++;
*(__be32 *)dptr = cpu_to_be32(cycle);
@@ -1842,7 +1841,6 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
-#ifdef DEBUG
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
@@ -1850,15 +1848,13 @@ xlog_sync(
* write on I/O completion and shutdown the fs. The subsequent mount
* detects the bad CRC and attempts to recover.
*/
- if (log->l_badcrc_factor &&
- (prandom_u32() % log->l_badcrc_factor == 0)) {
+ if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
iclog->ic_state |= XLOG_STATE_IOABORT;
xfs_warn(log->l_mp,
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
be64_to_cpu(iclog->ic_header.h_lsn));
}
-#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
@@ -2024,7 +2020,7 @@ xlog_print_tic_res(
};
#undef REG_TYPE_STR
- xfs_warn(mp, "xlog_write: reservation summary:");
+ xfs_warn(mp, "ticket reservation summary:");
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
xfs_warn(mp, " current res = %d bytes",
@@ -2045,10 +2041,55 @@ xlog_print_tic_res(
"bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
+}
+
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_log_item_desc *lidp;
+
+ /* dump core transaction and ticket info */
+ xfs_warn(mp, "transaction summary:");
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+
+ xlog_print_tic_res(mp, tp->t_ticket);
- xfs_alert_tag(mp, XFS_PTAG_LOGRES,
- "xlog_write: reservation ran out. Need to up reservation");
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ /* dump each log item */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv = lip->li_lv;
+ struct xfs_log_iovec *vec;
+ int i;
+
+ xfs_warn(mp, "log item: ");
+ xfs_warn(mp, " type = 0x%x", lip->li_type);
+ xfs_warn(mp, " flags = 0x%x", lip->li_flags);
+ if (!lv)
+ continue;
+ xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
+ xfs_warn(mp, " size = %d", lv->lv_size);
+ xfs_warn(mp, " bytes = %d", lv->lv_bytes);
+ xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
+
+ /* dump each iovec for the log item */
+ vec = lv->lv_iovecp;
+ for (i = 0; i < lv->lv_niovecs; i++) {
+ int dumplen = min(vec->i_len, 32);
+
+ xfs_warn(mp, " iovec[%d]", i);
+ xfs_warn(mp, " type = 0x%x", vec->i_type);
+ xfs_warn(mp, " len = %d", vec->i_len);
+ xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
+ xfs_hex_dump(vec->i_addr, dumplen);
+
+ vec++;
+ }
+ }
}
/*
@@ -2321,8 +2362,12 @@ xlog_write(
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
- if (ticket->t_curr_res < 0)
+ if (ticket->t_curr_res < 0) {
+ xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+ "ctx ticket reservation ran out. Need to up reservation");
xlog_print_tic_res(log->l_mp, ticket);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ }
index = 0;
lv = log_vector;
@@ -2363,8 +2408,8 @@ xlog_write(
}
reg = &vecp[index];
- ASSERT(reg->i_len % sizeof(__int32_t) == 0);
- ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+ ASSERT(reg->i_len % sizeof(int32_t) == 0);
+ ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
start_rec_copy = xlog_write_start_rec(ptr, ticket);
if (start_rec_copy) {
@@ -3143,7 +3188,7 @@ xlog_state_switch_iclogs(
/* Round up to next log-sunit */
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
log->l_mp->m_sb.sb_logsunit > 1) {
- __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
+ uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
}
@@ -3771,7 +3816,7 @@ xlog_verify_iclog(
xlog_in_core_2_t *xhdr;
void *base_ptr, *ptr, *p;
ptrdiff_t field_offset;
- __uint8_t clientid;
+ uint8_t clientid;
int len, i, j, k, op_len;
int idx;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index cc5a9f1574e7..bf212772595c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -159,7 +159,7 @@ int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
struct xlog_ticket **ticket,
- __uint8_t clientid,
+ uint8_t clientid,
bool permanent);
int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
void xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 82f1cbcc4de1..fbe72b134bef 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -410,6 +410,7 @@ xlog_cil_insert_items(
int len = 0;
int diff_iovecs = 0;
int iclog_space;
+ int iovhdr_res = 0, split_res = 0, ctx_res = 0;
ASSERT(tp);
@@ -419,30 +420,11 @@ xlog_cil_insert_items(
*/
xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
- /*
- * Now (re-)position everything modified at the tail of the CIL.
- * We do this here so we only need to take the CIL lock once during
- * the transaction commit.
- */
spin_lock(&cil->xc_cil_lock);
- list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_item *lip = lidp->lid_item;
-
- /* Skip items which aren't dirty in this transaction. */
- if (!(lidp->lid_flags & XFS_LID_DIRTY))
- continue;
-
- /*
- * Only move the item if it isn't already at the tail. This is
- * to prevent a transient list_empty() state when reinserting
- * an item that is already the only item in the CIL.
- */
- if (!list_is_last(&lip->li_cil, &cil->xc_cil))
- list_move_tail(&lip->li_cil, &cil->xc_cil);
- }
/* account for space used by new iovec headers */
- len += diff_iovecs * sizeof(xlog_op_header_t);
+ iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+ len += iovhdr_res;
ctx->nvecs += diff_iovecs;
/* attach the transaction to the CIL if it has any busy extents */
@@ -457,28 +439,66 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ ctx_res = ctx->ticket->t_unit_res;
+ ctx->ticket->t_curr_res = ctx_res;
+ tp->t_ticket->t_curr_res -= ctx_res;
}
/* do we need space for more log record headers? */
iclog_space = log->l_iclog_size - log->l_iclog_hsize;
if (len > 0 && (ctx->space_used / iclog_space !=
(ctx->space_used + len) / iclog_space)) {
- int hdrs;
-
- hdrs = (len + iclog_space - 1) / iclog_space;
+ split_res = (len + iclog_space - 1) / iclog_space;
/* need to take into account split region headers, too */
- hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += hdrs;
- ctx->ticket->t_curr_res += hdrs;
- tp->t_ticket->t_curr_res -= hdrs;
+ split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+ ctx->ticket->t_unit_res += split_res;
+ ctx->ticket->t_curr_res += split_res;
+ tp->t_ticket->t_curr_res -= split_res;
ASSERT(tp->t_ticket->t_curr_res >= len);
}
tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
+ /*
+ * If we've overrun the reservation, dump the tx details before we move
+ * the log items. Shutdown is imminent...
+ */
+ if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+ xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+ xfs_warn(log->l_mp,
+ " log items: %d bytes (iov hdrs: %d bytes)",
+ len, iovhdr_res);
+ xfs_warn(log->l_mp, " split region headers: %d bytes",
+ split_res);
+ xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
+ xlog_print_trans(tp);
+ }
+
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ /*
+ * Only move the item if it isn't already at the tail. This is
+ * to prevent a transient list_empty() state when reinserting
+ * an item that is already the only item in the CIL.
+ */
+ if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
+ }
+
spin_unlock(&cil->xc_cil_lock);
+
+ if (tp->t_ticket->t_curr_res < 0)
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
static void
@@ -973,6 +993,7 @@ xfs_log_commit_cil(
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
+ xfs_lsn_t xc_commit_lsn;
/*
* Do all necessary memory allocation before we lock the CIL.
@@ -986,13 +1007,9 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- /* check we didn't blow the reservation */
- if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(mp, tp->t_ticket);
-
- tp->t_commit_lsn = cil->xc_ctx->sequence;
+ xc_commit_lsn = cil->xc_ctx->sequence;
if (commit_lsn)
- *commit_lsn = tp->t_commit_lsn;
+ *commit_lsn = xc_commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, regrant);
xfs_trans_unreserve_and_mod_sb(tp);
@@ -1008,7 +1025,7 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, tp->t_commit_lsn, false);
+ xfs_trans_free_items(tp, xc_commit_lsn, false);
xlog_cil_push_background(log);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c2604a5366f2..51bf7b827387 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -419,7 +419,7 @@ struct xlog {
};
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
- ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
@@ -456,6 +456,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
}
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void xlog_print_trans(struct xfs_trans *);
int
xlog_write(
struct xlog *log,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8cec1e5505a4..9549188f5a36 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2230,9 +2230,9 @@ xlog_recover_get_buf_lsn(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
- __uint32_t magic32;
- __uint16_t magic16;
- __uint16_t magicda;
+ uint32_t magic32;
+ uint16_t magic16;
+ uint16_t magicda;
void *blk = bp->b_addr;
uuid_t *uuid;
xfs_lsn_t lsn = -1;
@@ -2381,9 +2381,9 @@ xlog_recover_validate_buf_type(
xfs_lsn_t current_lsn)
{
struct xfs_da_blkinfo *info = bp->b_addr;
- __uint32_t magic32;
- __uint16_t magic16;
- __uint16_t magicda;
+ uint32_t magic32;
+ uint16_t magic16;
+ uint16_t magicda;
char *warnmsg = NULL;
/*
@@ -2852,7 +2852,7 @@ xlog_recover_buffer_pass2(
if (XFS_DINODE_MAGIC ==
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
(BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
- (__uint32_t)log->l_mp->m_inode_cluster_size))) {
+ (uint32_t)log->l_mp->m_inode_cluster_size))) {
xfs_buf_stale(bp);
error = xfs_bwrite(bp);
} else {
@@ -3423,7 +3423,7 @@ xlog_recover_efd_pass2(
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
xfs_log_item_t *lip;
- __uint64_t efi_id;
+ uint64_t efi_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3519,7 +3519,7 @@ xlog_recover_rud_pass2(
struct xfs_rud_log_format *rud_formatp;
struct xfs_rui_log_item *ruip = NULL;
struct xfs_log_item *lip;
- __uint64_t rui_id;
+ uint64_t rui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3635,7 +3635,7 @@ xlog_recover_cud_pass2(
struct xfs_cud_log_format *cud_formatp;
struct xfs_cui_log_item *cuip = NULL;
struct xfs_log_item *lip;
- __uint64_t cui_id;
+ uint64_t cui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3754,7 +3754,7 @@ xlog_recover_bud_pass2(
struct xfs_bud_log_format *bud_formatp;
struct xfs_bui_log_item *buip = NULL;
struct xfs_log_item *lip;
- __uint64_t bui_id;
+ uint64_t bui_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -4152,7 +4152,7 @@ xlog_recover_commit_trans(
#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
- hlist_del(&trans->r_list);
+ hlist_del_init(&trans->r_list);
error = xlog_recover_reorder_trans(log, trans, pass);
if (error)
@@ -4354,6 +4354,8 @@ xlog_recover_free_trans(
xlog_recover_item_t *item, *n;
int i;
+ hlist_del_init(&trans->r_list);
+
list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
/* Free the regions in the item. */
list_del(&item->ri_list);
@@ -5224,12 +5226,16 @@ xlog_do_recovery_pass(
int error2 = 0;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
+ int i;
struct hlist_head rhash[XLOG_RHASH_SIZE];
LIST_HEAD (buffer_list);
ASSERT(head_blk != tail_blk);
rhead_blk = 0;
+ for (i = 0; i < XLOG_RHASH_SIZE; i++)
+ INIT_HLIST_HEAD(&rhash[i]);
+
/*
* Read the header of the tail block and get the iclog buffer size from
* h_size. Use this to tell how many sectors make up the log header.
@@ -5466,6 +5472,19 @@ xlog_do_recovery_pass(
if (error && first_bad)
*first_bad = rhead_blk;
+ /*
+ * Transactions are freed at commit time but transactions without commit
+ * records on disk are never committed. Free any that may be left in the
+ * hash table.
+ */
+ for (i = 0; i < XLOG_RHASH_SIZE; i++) {
+ struct hlist_node *tmp;
+ struct xlog_recover *trans;
+
+ hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
+ xlog_recover_free_trans(trans);
+ }
+
return error ? error : error2;
}
@@ -5772,9 +5791,9 @@ xlog_recover_check_summary(
xfs_buf_t *agfbp;
xfs_buf_t *agibp;
xfs_agnumber_t agno;
- __uint64_t freeblks;
- __uint64_t itotal;
- __uint64_t ifree;
+ uint64_t freeblks;
+ uint64_t itotal;
+ uint64_t ifree;
int error;
mp = log->l_mp;
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 11792d888e4e..e68bd1050eab 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -110,7 +110,10 @@ assfail(char *expr, char *file, int line)
{
xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
expr, file, line);
- BUG();
+ if (xfs_globals.bug_on_assert)
+ BUG();
+ else
+ WARN_ON(1);
}
void
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d249546da15e..40d4e8b4e193 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -173,7 +173,7 @@ xfs_free_perag(
int
xfs_sb_validate_fsb_count(
xfs_sb_t *sbp,
- __uint64_t nblocks)
+ uint64_t nblocks)
{
ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
ASSERT(sbp->sb_blocklog >= BBSHIFT);
@@ -435,7 +435,7 @@ STATIC void
xfs_set_maxicount(xfs_mount_t *mp)
{
xfs_sb_t *sbp = &(mp->m_sb);
- __uint64_t icount;
+ uint64_t icount;
if (sbp->sb_imax_pct) {
/*
@@ -501,7 +501,7 @@ xfs_set_low_space_thresholds(
int i;
for (i = 0; i < XFS_LOWSP_MAX; i++) {
- __uint64_t space = mp->m_sb.sb_dblocks;
+ uint64_t space = mp->m_sb.sb_dblocks;
do_div(space, 100);
mp->m_low_space[i] = space * (i + 1);
@@ -597,10 +597,10 @@ xfs_mount_reset_sbqflags(
return xfs_sync_sb(mp, false);
}
-__uint64_t
+uint64_t
xfs_default_resblks(xfs_mount_t *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
/*
* We default to 5% or 8192 fsbs of space reserved, whichever is
@@ -611,7 +611,7 @@ xfs_default_resblks(xfs_mount_t *mp)
*/
resblks = mp->m_sb.sb_dblocks;
do_div(resblks, 20);
- resblks = min_t(__uint64_t, resblks, 8192);
+ resblks = min_t(uint64_t, resblks, 8192);
return resblks;
}
@@ -631,7 +631,7 @@ xfs_mountfs(
{
struct xfs_sb *sbp = &(mp->m_sb);
struct xfs_inode *rip;
- __uint64_t resblks;
+ uint64_t resblks;
uint quotamount = 0;
uint quotaflags = 0;
int error = 0;
@@ -719,10 +719,13 @@ xfs_mountfs(
if (error)
goto out_del_stats;
+ error = xfs_errortag_init(mp);
+ if (error)
+ goto out_remove_error_sysfs;
error = xfs_uuid_mount(mp);
if (error)
- goto out_remove_error_sysfs;
+ goto out_remove_errortag;
/*
* Set the minimum read and write sizes
@@ -1044,6 +1047,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_remove_errortag:
+ xfs_errortag_del(mp);
out_remove_error_sysfs:
xfs_error_sysfs_del(mp);
out_del_stats:
@@ -1062,7 +1067,7 @@ void
xfs_unmountfs(
struct xfs_mount *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
int error;
cancel_delayed_work_sync(&mp->m_eofblocks_work);
@@ -1147,10 +1152,11 @@ xfs_unmountfs(
xfs_uuid_unmount(mp);
#if defined(DEBUG)
- xfs_errortag_clearall(mp, 0);
+ xfs_errortag_clearall(mp);
#endif
xfs_free_perag(mp);
+ xfs_errortag_del(mp);
xfs_error_sysfs_del(mp);
xfs_sysfs_del(&mp->m_stats.xs_kobj);
xfs_sysfs_del(&mp->m_kobj);
@@ -1211,7 +1217,7 @@ xfs_mod_icount(
struct xfs_mount *mp,
int64_t delta)
{
- __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+ percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta);
@@ -1290,7 +1296,7 @@ xfs_mod_fdblocks(
else
batch = XFS_FDBLOCKS_BATCH;
- __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+ percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9fa312a41c93..e0792d036be2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -108,10 +108,10 @@ typedef struct xfs_mount {
xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
- __uint8_t m_blkbit_log; /* blocklog + NBBY */
- __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
- __uint8_t m_agno_log; /* log #ag's */
- __uint8_t m_agino_log; /* #bits for agino in inum */
+ uint8_t m_blkbit_log; /* blocklog + NBBY */
+ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
+ uint8_t m_agno_log; /* log #ag's */
+ uint8_t m_agino_log; /* #bits for agino in inum */
uint m_inode_cluster_size;/* min inode buf size */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
@@ -139,7 +139,7 @@ typedef struct xfs_mount {
struct mutex m_growlock; /* growfs mutex */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
- __uint64_t m_flags; /* global mount flags */
+ uint64_t m_flags; /* global mount flags */
bool m_inotbt_nores; /* no per-AG finobt resv. */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
@@ -148,14 +148,14 @@ typedef struct xfs_mount {
int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
- __uint64_t m_maxicount; /* maximum inode count */
- __uint64_t m_resblks; /* total reserved blocks */
- __uint64_t m_resblks_avail;/* available reserved blocks */
- __uint64_t m_resblks_save; /* reserved blks @ remount,ro */
+ uint64_t m_maxicount; /* maximum inode count */
+ uint64_t m_resblks; /* total reserved blocks */
+ uint64_t m_resblks_avail;/* available reserved blocks */
+ uint64_t m_resblks_save; /* reserved blks @ remount,ro */
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
int m_sinoalign; /* stripe unit inode alignment */
- __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
+ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
@@ -194,19 +194,17 @@ typedef struct xfs_mount {
* ever support shrinks it would have to be persisted in addition
* to various other kinds of pain inflicted on the pNFS server.
*/
- __uint32_t m_generation;
+ uint32_t m_generation;
bool m_fail_unmount;
#ifdef DEBUG
/*
- * DEBUG mode instrumentation to test and/or trigger delayed allocation
- * block killing in the event of failed writes. When enabled, all
- * buffered writes are silenty dropped and handled as if they failed.
- * All delalloc blocks in the range of the write (including pre-existing
- * delalloc blocks!) are tossed as part of the write failure error
- * handling sequence.
+ * Frequency with which errors are injected. Replaces xfs_etest; the
+ * value stored in here is the inverse of the frequency with which the
+ * error triggers. 1 = always, 2 = half the time, etc.
*/
- bool m_drop_writes;
+ unsigned int *m_errortag;
+ struct xfs_kobj m_errortag_kobj;
#endif
} xfs_mount_t;
@@ -325,20 +323,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
-#ifdef DEBUG
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
- return mp->m_drop_writes;
-}
-#else
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
- return 0;
-}
-#endif
-
/* per-AG block reservation data structures*/
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
@@ -367,12 +351,12 @@ typedef struct xfs_perag {
char pagi_init; /* this agi's entry is initialized */
char pagf_metadata; /* the agf is preferred to be metadata */
char pagi_inodeok; /* The agi is ok for inodes */
- __uint8_t pagf_levels[XFS_BTNUM_AGF];
+ uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
- __uint32_t pagf_flcount; /* count of blocks in freelist */
+ uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
- __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
+ uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
xfs_agino_t pagi_freecount; /* number of free inodes */
xfs_agino_t pagi_count; /* number of allocated inodes */
@@ -411,7 +395,7 @@ typedef struct xfs_perag {
struct xfs_ag_resv pag_agfl_resv;
/* reference count */
- __uint8_t pagf_refcount_level;
+ uint8_t pagf_refcount_level;
} xfs_perag_t;
static inline struct xfs_ag_resv *
@@ -434,7 +418,7 @@ void xfs_buf_hash_destroy(xfs_perag_t *pag);
extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *);
-extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
+extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
xfs_agnumber_t *maxagi);
@@ -450,7 +434,7 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
-extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
+extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5fe6e70b88ef..6ce948c436d5 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1247,6 +1247,7 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
+ struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@@ -1257,7 +1258,32 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
- xfs_dqflock(dqp);
+ /*
+ * The only way the dquot is already flush locked by the time quotacheck
+ * gets here is if reclaim flushed it before the dqadjust walk dirtied
+ * it for the final time. Quotacheck collects all dquot bufs in the
+ * local delwri queue before dquots are dirtied, so reclaim can't have
+ * possibly queued it for I/O. The only way out is to push the buffer to
+ * cycle the flush lock.
+ */
+ if (!xfs_dqflock_nowait(dqp)) {
+ /* buf is pinned in-core by delwri list */
+ DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen);
+ bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL);
+ if (!bp) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+ xfs_buf_unlock(bp);
+
+ xfs_buf_delwri_pushbuf(bp, buffer_list);
+ xfs_buf_rele(bp);
+
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
error = xfs_qm_dqflush(dqp, &bp);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3e52d5de7ae1..2be6d2735ca9 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -33,7 +33,7 @@ xfs_fill_statvfs_from_dquot(
struct kstatfs *statp,
struct xfs_dquot *dqp)
{
- __uint64_t limit;
+ uint64_t limit;
limit = dqp->q_core.d_blk_softlimit ?
be64_to_cpu(dqp->q_core.d_blk_softlimit) :
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index f82d79a8c694..de9493253edf 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -269,7 +269,6 @@ xfs_fs_get_nextdqblk(
/* ID may be different, so convert back what we got */
*qid = make_kqid(current_user_ns(), qid->type, id);
return 0;
-
}
STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ffe6fe7a7eb5..ab2270a87196 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -155,6 +155,7 @@
int
xfs_reflink_find_shared(
struct xfs_mount *mp,
+ struct xfs_trans *tp,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_extlen_t aglen,
@@ -166,18 +167,18 @@ xfs_reflink_find_shared(
struct xfs_btree_cur *cur;
int error;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
find_end_of_shared);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_brelse(tp, agbp);
return error;
}
@@ -217,7 +218,7 @@ xfs_reflink_trim_around_shared(
agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
aglen = irec->br_blockcount;
- error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
+ error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
aglen, &fbno, &flen, true);
if (error)
return error;
@@ -1373,8 +1374,8 @@ xfs_reflink_dirty_extents(
agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
aglen = map[1].br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
- &rbno, &rlen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ aglen, &rbno, &rlen, true);
if (error)
goto out;
if (rbno == NULLAGBLOCK)
@@ -1405,57 +1406,73 @@ out:
return error;
}
-/* Clear the inode reflink flag if there are no shared extents. */
+/* Does this inode need the reflink flag? */
int
-xfs_reflink_clear_inode_flag(
- struct xfs_inode *ip,
- struct xfs_trans **tpp)
+xfs_reflink_inode_has_shared_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ bool *has_shared)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t fbno;
- xfs_filblks_t end;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- xfs_extlen_t aglen;
- xfs_agblock_t rbno;
- xfs_extlen_t rlen;
- struct xfs_bmbt_irec map;
- int nmaps;
- int error = 0;
-
- ASSERT(xfs_is_reflink_inode(ip));
+ struct xfs_bmbt_irec got;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_extlen_t aglen;
+ xfs_agblock_t rbno;
+ xfs_extlen_t rlen;
+ xfs_extnum_t idx;
+ bool found;
+ int error;
- fbno = 0;
- end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
- while (end - fbno > 0) {
- nmaps = 1;
- /*
- * Look for extents in the file. Skip holes, delalloc, or
- * unwritten extents; they can't be reflinked.
- */
- error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0);
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
return error;
- if (nmaps == 0)
- break;
- if (!xfs_bmap_is_real_extent(&map))
- goto next;
+ }
- agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
- aglen = map.br_blockcount;
+ *has_shared = false;
+ found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
+ while (found) {
+ if (isnullstartblock(got.br_startblock) ||
+ got.br_state != XFS_EXT_NORM)
+ goto next;
+ agno = XFS_FSB_TO_AGNO(mp, got.br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock);
+ aglen = got.br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
+ error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen,
&rbno, &rlen, false);
if (error)
return error;
/* Is there still a shared block here? */
- if (rbno != NULLAGBLOCK)
+ if (rbno != NULLAGBLOCK) {
+ *has_shared = true;
return 0;
+ }
next:
- fbno = map.br_startoff + map.br_blockcount;
+ found = xfs_iext_get_extent(ifp, ++idx, &got);
}
+ return 0;
+}
+
+/* Clear the inode reflink flag if there are no shared extents. */
+int
+xfs_reflink_clear_inode_flag(
+ struct xfs_inode *ip,
+ struct xfs_trans **tpp)
+{
+ bool needs_flag;
+ int error = 0;
+
+ ASSERT(xfs_is_reflink_inode(ip));
+
+ error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+ if (error || needs_flag)
+ return error;
+
/*
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index d29a7967f029..701487bab468 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -20,9 +20,9 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
-extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
- xfs_extlen_t *flen, bool find_maximal);
+extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
+ xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
@@ -47,6 +47,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
+ struct xfs_inode *ip, bool *has_shared);
extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
struct xfs_trans **tpp);
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c57aa7f18087..91472193643b 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1256,13 +1256,13 @@ xfs_rtpick_extent(
{
xfs_rtblock_t b; /* result block */
int log2; /* log of sequence number */
- __uint64_t resid; /* residual after log removed */
- __uint64_t seq; /* sequence number of file creation */
- __uint64_t *seqp; /* pointer to seqno in inode */
+ uint64_t resid; /* residual after log removed */
+ uint64_t seq; /* sequence number of file creation */
+ uint64_t *seqp; /* pointer to seqno in inode */
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
- seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
+ seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
*seqp = 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f13133e6f19f..79defa722bf1 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -107,6 +107,8 @@ xfs_growfs_rt(
/*
* From xfs_rtbitmap.c
*/
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len, int val,
xfs_rtblock_t *new, int *stat);
@@ -143,6 +145,7 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
# define xfs_growfs_rt(mp,in) (ENOSYS)
# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f11282c96887..056e12b421eb 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -33,9 +33,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{
int i, j;
int len = 0;
- __uint64_t xs_xstrat_bytes = 0;
- __uint64_t xs_write_bytes = 0;
- __uint64_t xs_read_bytes = 0;
+ uint64_t xs_xstrat_bytes = 0;
+ uint64_t xs_write_bytes = 0;
+ uint64_t xs_read_bytes = 0;
static const struct xstats_entry {
char *desc;
@@ -100,7 +100,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
void xfs_stats_clearall(struct xfsstats __percpu *stats)
{
int c;
- __uint32_t vn_active;
+ uint32_t vn_active;
xfs_notice(NULL, "Clearing xfsstats");
for_each_possible_cpu(c) {
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 375840f5a99a..f64d0ae345c4 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -54,125 +54,125 @@ enum {
*/
struct __xfsstats {
# define XFSSTAT_END_EXTENT_ALLOC 4
- __uint32_t xs_allocx;
- __uint32_t xs_allocb;
- __uint32_t xs_freex;
- __uint32_t xs_freeb;
+ uint32_t xs_allocx;
+ uint32_t xs_allocb;
+ uint32_t xs_freex;
+ uint32_t xs_freeb;
# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
- __uint32_t xs_abt_lookup;
- __uint32_t xs_abt_compare;
- __uint32_t xs_abt_insrec;
- __uint32_t xs_abt_delrec;
+ uint32_t xs_abt_lookup;
+ uint32_t xs_abt_compare;
+ uint32_t xs_abt_insrec;
+ uint32_t xs_abt_delrec;
# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
- __uint32_t xs_blk_mapr;
- __uint32_t xs_blk_mapw;
- __uint32_t xs_blk_unmap;
- __uint32_t xs_add_exlist;
- __uint32_t xs_del_exlist;
- __uint32_t xs_look_exlist;
- __uint32_t xs_cmp_exlist;
+ uint32_t xs_blk_mapr;
+ uint32_t xs_blk_mapw;
+ uint32_t xs_blk_unmap;
+ uint32_t xs_add_exlist;
+ uint32_t xs_del_exlist;
+ uint32_t xs_look_exlist;
+ uint32_t xs_cmp_exlist;
# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
- __uint32_t xs_bmbt_lookup;
- __uint32_t xs_bmbt_compare;
- __uint32_t xs_bmbt_insrec;
- __uint32_t xs_bmbt_delrec;
+ uint32_t xs_bmbt_lookup;
+ uint32_t xs_bmbt_compare;
+ uint32_t xs_bmbt_insrec;
+ uint32_t xs_bmbt_delrec;
# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
- __uint32_t xs_dir_lookup;
- __uint32_t xs_dir_create;
- __uint32_t xs_dir_remove;
- __uint32_t xs_dir_getdents;
+ uint32_t xs_dir_lookup;
+ uint32_t xs_dir_create;
+ uint32_t xs_dir_remove;
+ uint32_t xs_dir_getdents;
# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
- __uint32_t xs_trans_sync;
- __uint32_t xs_trans_async;
- __uint32_t xs_trans_empty;
+ uint32_t xs_trans_sync;
+ uint32_t xs_trans_async;
+ uint32_t xs_trans_empty;
# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
- __uint32_t xs_ig_attempts;
- __uint32_t xs_ig_found;
- __uint32_t xs_ig_frecycle;
- __uint32_t xs_ig_missed;
- __uint32_t xs_ig_dup;
- __uint32_t xs_ig_reclaims;
- __uint32_t xs_ig_attrchg;
+ uint32_t xs_ig_attempts;
+ uint32_t xs_ig_found;
+ uint32_t xs_ig_frecycle;
+ uint32_t xs_ig_missed;
+ uint32_t xs_ig_dup;
+ uint32_t xs_ig_reclaims;
+ uint32_t xs_ig_attrchg;
# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
- __uint32_t xs_log_writes;
- __uint32_t xs_log_blocks;
- __uint32_t xs_log_noiclogs;
- __uint32_t xs_log_force;
- __uint32_t xs_log_force_sleep;
+ uint32_t xs_log_writes;
+ uint32_t xs_log_blocks;
+ uint32_t xs_log_noiclogs;
+ uint32_t xs_log_force;
+ uint32_t xs_log_force_sleep;
# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
- __uint32_t xs_try_logspace;
- __uint32_t xs_sleep_logspace;
- __uint32_t xs_push_ail;
- __uint32_t xs_push_ail_success;
- __uint32_t xs_push_ail_pushbuf;
- __uint32_t xs_push_ail_pinned;
- __uint32_t xs_push_ail_locked;
- __uint32_t xs_push_ail_flushing;
- __uint32_t xs_push_ail_restarts;
- __uint32_t xs_push_ail_flush;
+ uint32_t xs_try_logspace;
+ uint32_t xs_sleep_logspace;
+ uint32_t xs_push_ail;
+ uint32_t xs_push_ail_success;
+ uint32_t xs_push_ail_pushbuf;
+ uint32_t xs_push_ail_pinned;
+ uint32_t xs_push_ail_locked;
+ uint32_t xs_push_ail_flushing;
+ uint32_t xs_push_ail_restarts;
+ uint32_t xs_push_ail_flush;
# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
- __uint32_t xs_xstrat_quick;
- __uint32_t xs_xstrat_split;
+ uint32_t xs_xstrat_quick;
+ uint32_t xs_xstrat_split;
# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
- __uint32_t xs_write_calls;
- __uint32_t xs_read_calls;
+ uint32_t xs_write_calls;
+ uint32_t xs_read_calls;
# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
- __uint32_t xs_attr_get;
- __uint32_t xs_attr_set;
- __uint32_t xs_attr_remove;
- __uint32_t xs_attr_list;
+ uint32_t xs_attr_get;
+ uint32_t xs_attr_set;
+ uint32_t xs_attr_remove;
+ uint32_t xs_attr_list;
# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
- __uint32_t xs_iflush_count;
- __uint32_t xs_icluster_flushcnt;
- __uint32_t xs_icluster_flushinode;
+ uint32_t xs_iflush_count;
+ uint32_t xs_icluster_flushcnt;
+ uint32_t xs_icluster_flushinode;
# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
- __uint32_t vn_active; /* # vnodes not on free lists */
- __uint32_t vn_alloc; /* # times vn_alloc called */
- __uint32_t vn_get; /* # times vn_get called */
- __uint32_t vn_hold; /* # times vn_hold called */
- __uint32_t vn_rele; /* # times vn_rele called */
- __uint32_t vn_reclaim; /* # times vn_reclaim called */
- __uint32_t vn_remove; /* # times vn_remove called */
- __uint32_t vn_free; /* # times vn_free called */
+ uint32_t vn_active; /* # vnodes not on free lists */
+ uint32_t vn_alloc; /* # times vn_alloc called */
+ uint32_t vn_get; /* # times vn_get called */
+ uint32_t vn_hold; /* # times vn_hold called */
+ uint32_t vn_rele; /* # times vn_rele called */
+ uint32_t vn_reclaim; /* # times vn_reclaim called */
+ uint32_t vn_remove; /* # times vn_remove called */
+ uint32_t vn_free; /* # times vn_free called */
#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
- __uint32_t xb_get;
- __uint32_t xb_create;
- __uint32_t xb_get_locked;
- __uint32_t xb_get_locked_waited;
- __uint32_t xb_busy_locked;
- __uint32_t xb_miss_locked;
- __uint32_t xb_page_retries;
- __uint32_t xb_page_found;
- __uint32_t xb_get_read;
+ uint32_t xb_get;
+ uint32_t xb_create;
+ uint32_t xb_get_locked;
+ uint32_t xb_get_locked_waited;
+ uint32_t xb_busy_locked;
+ uint32_t xb_miss_locked;
+ uint32_t xb_page_retries;
+ uint32_t xb_page_found;
+ uint32_t xb_get_read;
/* Version 2 btree counters */
#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX)
- __uint32_t xs_abtb_2[__XBTS_MAX];
+ uint32_t xs_abtb_2[__XBTS_MAX];
#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
- __uint32_t xs_abtc_2[__XBTS_MAX];
+ uint32_t xs_abtc_2[__XBTS_MAX];
#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
- __uint32_t xs_bmbt_2[__XBTS_MAX];
+ uint32_t xs_bmbt_2[__XBTS_MAX];
#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
- __uint32_t xs_ibt_2[__XBTS_MAX];
+ uint32_t xs_ibt_2[__XBTS_MAX];
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
- __uint32_t xs_fibt_2[__XBTS_MAX];
+ uint32_t xs_fibt_2[__XBTS_MAX];
#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
- __uint32_t xs_rmap_2[__XBTS_MAX];
+ uint32_t xs_rmap_2[__XBTS_MAX];
#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
- __uint32_t xs_refcbt_2[__XBTS_MAX];
+ uint32_t xs_refcbt_2[__XBTS_MAX];
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6)
- __uint32_t xs_qm_dqreclaims;
- __uint32_t xs_qm_dqreclaim_misses;
- __uint32_t xs_qm_dquot_dups;
- __uint32_t xs_qm_dqcachemisses;
- __uint32_t xs_qm_dqcachehits;
- __uint32_t xs_qm_dqwants;
+ uint32_t xs_qm_dqreclaims;
+ uint32_t xs_qm_dqreclaim_misses;
+ uint32_t xs_qm_dquot_dups;
+ uint32_t xs_qm_dqcachemisses;
+ uint32_t xs_qm_dqcachehits;
+ uint32_t xs_qm_dqwants;
#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
- __uint32_t xs_qm_dquot;
- __uint32_t xs_qm_dquot_unused;
+ uint32_t xs_qm_dquot;
+ uint32_t xs_qm_dquot_unused;
/* Extra precision counters */
- __uint64_t xs_xstrat_bytes;
- __uint64_t xs_write_bytes;
- __uint64_t xs_read_bytes;
+ uint64_t xs_xstrat_bytes;
+ uint64_t xs_write_bytes;
+ uint64_t xs_read_bytes;
};
struct xfsstats {
@@ -186,7 +186,7 @@ struct xfsstats {
* simple wrapper for getting the array index of s struct member offset
*/
#define XFS_STATS_CALC_INDEX(member) \
- (offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
+ (offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t))
int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..38aaacdbb8b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -196,7 +196,7 @@ xfs_parseargs(
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
- __uint8_t iosizelog = 0;
+ uint8_t iosizelog = 0;
/*
* set up the mount name first so all the errors will refer to the
@@ -556,7 +556,7 @@ xfs_showargs(
return 0;
}
-static __uint64_t
+static uint64_t
xfs_max_file_offset(
unsigned int blockshift)
{
@@ -587,7 +587,7 @@ xfs_max_file_offset(
# endif
#endif
- return (((__uint64_t)pagefactor) << bitshift) - 1;
+ return (((uint64_t)pagefactor) << bitshift) - 1;
}
/*
@@ -622,7 +622,7 @@ xfs_set_inode_alloc(
* the max inode percentage. Used only for inode32.
*/
if (mp->m_maxicount) {
- __uint64_t icount;
+ uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
do_div(icount, 100);
@@ -1088,12 +1088,12 @@ xfs_fs_statfs(
struct xfs_mount *mp = XFS_M(dentry->d_sb);
xfs_sb_t *sbp = &mp->m_sb;
struct xfs_inode *ip = XFS_I(d_inode(dentry));
- __uint64_t fakeinos, id;
- __uint64_t icount;
- __uint64_t ifree;
- __uint64_t fdblocks;
+ uint64_t fakeinos, id;
+ uint64_t icount;
+ uint64_t ifree;
+ uint64_t fdblocks;
xfs_extlen_t lsize;
- __int64_t ffree;
+ int64_t ffree;
statp->f_type = XFS_SB_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1116,7 +1116,7 @@ xfs_fs_statfs(
statp->f_bavail = statp->f_bfree;
fakeinos = statp->f_bfree << sbp->sb_inopblog;
- statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+ statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
if (mp->m_maxicount)
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
@@ -1129,7 +1129,7 @@ xfs_fs_statfs(
/* make sure statp->f_ffree does not underflow */
ffree = statp->f_files - (icount - ifree);
- statp->f_ffree = max_t(__int64_t, ffree, 0);
+ statp->f_ffree = max_t(int64_t, ffree, 0);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
@@ -1142,7 +1142,7 @@ xfs_fs_statfs(
STATIC void
xfs_save_resvblks(struct xfs_mount *mp)
{
- __uint64_t resblks = 0;
+ uint64_t resblks = 0;
mp->m_resblks_save = mp->m_resblks;
xfs_reserve_blocks(mp, &resblks, NULL);
@@ -1151,7 +1151,7 @@ xfs_save_resvblks(struct xfs_mount *mp)
STATIC void
xfs_restore_resvblks(struct xfs_mount *mp)
{
- __uint64_t resblks;
+ uint64_t resblks;
if (mp->m_resblks_save) {
resblks = mp->m_resblks_save;
@@ -1766,7 +1766,8 @@ STATIC int __init
xfs_init_zones(void)
{
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio));
+ offsetof(struct xfs_ioend, io_inline_bio),
+ BIOSET_NEED_BVECS);
if (!xfs_ioend_bioset)
goto out;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f2cb45ed1d54..12cd9cf7de41 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -43,8 +43,8 @@
#include "xfs_log.h"
/* ----- Kernel only functions below ----- */
-STATIC int
-xfs_readlink_bmap(
+int
+xfs_readlink_bmap_ilocked(
struct xfs_inode *ip,
char *link)
{
@@ -143,7 +143,7 @@ xfs_readlink(
if (!pathlen)
goto out;
- if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
@@ -153,7 +153,7 @@ xfs_readlink(
}
- error = xfs_readlink_bmap(ip, link);
+ error = xfs_readlink_bmap_ilocked(ip, link);
out:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -202,7 +202,7 @@ xfs_symlink(
* Check component lengths of the target path name.
*/
pathlen = strlen(target_path);
- if (pathlen >= MAXPATHLEN) /* total string too long */
+ if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */
return -ENAMETOOLONG;
udqp = gdqp = NULL;
@@ -559,7 +559,7 @@ xfs_inactive_symlink(
return 0;
}
- if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
__func__, (unsigned long long)ip->i_ino, pathlen);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index e75245d09116..aeaee8923617 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -21,6 +21,7 @@
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_inactive_symlink(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 984a3499cfe3..82afee005140 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -95,6 +95,7 @@ extern xfs_param_t xfs_params;
struct xfs_globals {
int log_recovery_delay; /* log recovery delay (secs) */
+ bool bug_on_assert; /* BUG() the kernel on assert failure */
};
extern struct xfs_globals xfs_globals;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 80ac15fb9638..8b2ccc234f36 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -90,15 +90,25 @@ to_mp(struct kobject *kobject)
return container_of(kobj, struct xfs_mount, m_kobj);
}
+static struct attribute *xfs_mp_attrs[] = {
+ NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_mp_attrs,
+};
+
#ifdef DEBUG
+/* debug */
STATIC ssize_t
-drop_writes_store(
+bug_on_assert_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
- struct xfs_mount *mp = to_mp(kobject);
int ret;
int val;
@@ -107,9 +117,9 @@ drop_writes_store(
return ret;
if (val == 1)
- mp->m_drop_writes = true;
+ xfs_globals.bug_on_assert = true;
else if (val == 0)
- mp->m_drop_writes = false;
+ xfs_globals.bug_on_assert = false;
else
return -EINVAL;
@@ -117,33 +127,13 @@ drop_writes_store(
}
STATIC ssize_t
-drop_writes_show(
+bug_on_assert_show(
struct kobject *kobject,
char *buf)
{
- struct xfs_mount *mp = to_mp(kobject);
-
- return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_drop_writes ? 1 : 0);
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
}
-XFS_SYSFS_ATTR_RW(drop_writes);
-
-#endif /* DEBUG */
-
-static struct attribute *xfs_mp_attrs[] = {
-#ifdef DEBUG
- ATTR_LIST(drop_writes),
-#endif
- NULL,
-};
-
-struct kobj_type xfs_mp_ktype = {
- .release = xfs_sysfs_release,
- .sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_mp_attrs,
-};
-
-#ifdef DEBUG
-/* debug */
+XFS_SYSFS_ATTR_RW(bug_on_assert);
STATIC ssize_t
log_recovery_delay_store(
@@ -176,6 +166,7 @@ log_recovery_delay_show(
XFS_SYSFS_ATTR_RW(log_recovery_delay);
static struct attribute *xfs_dbg_attrs[] = {
+ ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
NULL,
};
@@ -314,47 +305,11 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
-#ifdef DEBUG
-STATIC ssize_t
-log_badcrc_factor_store(
- struct kobject *kobject,
- const char *buf,
- size_t count)
-{
- struct xlog *log = to_xlog(kobject);
- int ret;
- uint32_t val;
-
- ret = kstrtouint(buf, 0, &val);
- if (ret)
- return ret;
-
- log->l_badcrc_factor = val;
-
- return count;
-}
-
-STATIC ssize_t
-log_badcrc_factor_show(
- struct kobject *kobject,
- char *buf)
-{
- struct xlog *log = to_xlog(kobject);
-
- return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
-}
-
-XFS_SYSFS_ATTR_RW(log_badcrc_factor);
-#endif /* DEBUG */
-
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
-#ifdef DEBUG
- ATTR_LIST(log_badcrc_factor),
-#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7c5a16528d8b..bcc3cdf8e1c5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -251,7 +251,7 @@ TRACE_EVENT(xfs_iext_insert,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -367,6 +367,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
@@ -1280,7 +1281,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->count,
__print_symbolic(__entry->type, XFS_IO_TYPES),
__entry->startoff,
- (__int64_t)__entry->startblock,
+ (int64_t)__entry->startblock,
__entry->blockcount)
)
@@ -1490,25 +1491,6 @@ TRACE_EVENT(xfs_extent_busy_trim,
__entry->tlen)
);
-TRACE_EVENT(xfs_trans_commit_lsn,
- TP_PROTO(struct xfs_trans *trans),
- TP_ARGS(trans),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(struct xfs_trans *, tp)
- __field(xfs_lsn_t, lsn)
- ),
- TP_fast_assign(
- __entry->dev = trans->t_mountp->m_super->s_dev;
- __entry->tp = trans;
- __entry->lsn = trans->t_commit_lsn;
- ),
- TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->tp,
- __entry->lsn)
-);
-
TRACE_EVENT(xfs_agf,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
@@ -2057,7 +2039,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
TP_ARGS(log, buf_f),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(__int64_t, blkno)
+ __field(int64_t, blkno)
__field(unsigned short, len)
__field(unsigned short, flags)
__field(unsigned short, size)
@@ -2106,7 +2088,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
__field(int, fields)
__field(unsigned short, asize)
__field(unsigned short, dsize)
- __field(__int64_t, blkno)
+ __field(int64_t, blkno)
__field(int, len)
__field(int, boffset)
),
@@ -3256,8 +3238,8 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
__field(xfs_agnumber_t, agno)
__field(xfs_fsblock_t, bno)
__field(xfs_filblks_t, len)
- __field(__uint64_t, owner)
- __field(__uint64_t, offset)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
__field(unsigned int, flags)
),
TP_fast_assign(
@@ -3297,9 +3279,9 @@ DECLARE_EVENT_CLASS(xfs_getfsmap_class,
__field(dev_t, keydev)
__field(xfs_daddr_t, block)
__field(xfs_daddr_t, len)
- __field(__uint64_t, owner)
- __field(__uint64_t, offset)
- __field(__uint64_t, flags)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(uint64_t, flags)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a07acbf0bd8a..6bdad6f58934 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -105,10 +105,6 @@ typedef struct xfs_trans {
unsigned int t_rtx_res; /* # of rt extents resvd */
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
struct xlog_ticket *t_ticket; /* log mgr ticket */
- xfs_lsn_t t_lsn; /* log seq num of start of
- * transaction. */
- xfs_lsn_t t_commit_lsn; /* log seq num of end of
- * transaction. */
struct xfs_mount *t_mountp; /* ptr to fs mount struct */
struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
unsigned int t_flags; /* misc flags */
@@ -249,7 +245,7 @@ struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
struct xfs_rui_log_item *ruip);
int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
- __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+ uint64_t owner, int whichfork, xfs_fileoff_t startoff,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
@@ -275,6 +271,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
enum xfs_bmap_intent_type type, struct xfs_inode *ip,
int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
- xfs_filblks_t blockcount, xfs_exntst_t state);
+ xfs_filblks_t *blockcount, xfs_exntst_t state);
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 6408e7d7c08c..14543d93cd4b 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
+ xfs_filblks_t *blockcount,
xfs_exntst_t state)
{
int error;
@@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
void **state)
{
struct xfs_bmap_intent *bmap;
+ xfs_filblks_t count;
int error;
bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ count = bmap->bi_bmap.br_blockcount;
error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
bmap->bi_type,
bmap->bi_owner, bmap->bi_whichfork,
bmap->bi_bmap.br_startoff,
bmap->bi_bmap.br_startblock,
- bmap->bi_bmap.br_blockcount,
+ &count,
bmap->bi_bmap.br_state);
+ if (!error && count > 0) {
+ ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+ bmap->bi_bmap.br_blockcount = count;
+ return -EAGAIN;
+ }
kmem_free(bmap);
return error;
}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee29ca132dc..86987d823d76 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
+ int freed;
/*
* Default to a normal brelse() call if the tp is NULL.
@@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t *tp,
/*
* Drop our reference to the buf log item.
*/
- atomic_dec(&bip->bli_refcount);
+ freed = atomic_dec_and_test(&bip->bli_refcount);
/*
- * If the buf item is not tracking data in the log, then
- * we must free it before releasing the buffer back to the
- * free pool. Before releasing the buffer to the free pool,
- * clear the transaction pointer in b_fsprivate2 to dissolve
- * its relation to this transaction.
+ * If the buf item is not tracking data in the log, then we must free it
+ * before releasing the buffer back to the free pool.
+ *
+ * If the fs has shutdown and we dropped the last reference, it may fall
+ * on us to release a (possibly dirty) bli if it never made it to the
+ * AIL (e.g., the aborted unpin already happened and didn't release it
+ * due to our reference). Since we're already shutdown and need xa_lock,
+ * just force remove from the AIL and release the bli here.
*/
- if (!xfs_buf_item_dirty(bip)) {
+ if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
+ xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bp);
+ } else if (!xfs_buf_item_dirty(bip)) {
/***
ASSERT(bp->b_pincount == 0);
***/
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 9ead064b5e90..9b577beb43d7 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -96,7 +96,7 @@ xfs_trans_log_finish_rmap_update(
struct xfs_trans *tp,
struct xfs_rud_log_item *rudp,
enum xfs_rmap_intent_type type,
- __uint64_t owner,
+ uint64_t owner,
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
OpenPOWER on IntegriCloud